merge adjust_cost and adjust_cost_2 target hooks
[official-gcc.git] / gcc / config / pa / pa.c
blob251c1ada392af48cebd0ba74cdc41a4bf1c4578e
1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992-2016 Free Software Foundation, Inc.
3 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "df.h"
29 #include "tm_p.h"
30 #include "stringpool.h"
31 #include "optabs.h"
32 #include "regs.h"
33 #include "emit-rtl.h"
34 #include "recog.h"
35 #include "diagnostic-core.h"
36 #include "insn-attr.h"
37 #include "alias.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "varasm.h"
41 #include "calls.h"
42 #include "output.h"
43 #include "except.h"
44 #include "explow.h"
45 #include "expr.h"
46 #include "reload.h"
47 #include "common/common-target.h"
48 #include "langhooks.h"
49 #include "cfgrtl.h"
50 #include "opts.h"
51 #include "builtins.h"
53 /* This file should be included last. */
54 #include "target-def.h"
56 /* Return nonzero if there is a bypass for the output of
57 OUT_INSN and the fp store IN_INSN. */
58 int
59 pa_fpstore_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
61 machine_mode store_mode;
62 machine_mode other_mode;
63 rtx set;
65 if (recog_memoized (in_insn) < 0
66 || (get_attr_type (in_insn) != TYPE_FPSTORE
67 && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
68 || recog_memoized (out_insn) < 0)
69 return 0;
71 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
73 set = single_set (out_insn);
74 if (!set)
75 return 0;
77 other_mode = GET_MODE (SET_SRC (set));
79 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
83 #ifndef DO_FRAME_NOTES
84 #ifdef INCOMING_RETURN_ADDR_RTX
85 #define DO_FRAME_NOTES 1
86 #else
87 #define DO_FRAME_NOTES 0
88 #endif
89 #endif
91 static void pa_option_override (void);
92 static void copy_reg_pointer (rtx, rtx);
93 static void fix_range (const char *);
94 static int hppa_register_move_cost (machine_mode mode, reg_class_t,
95 reg_class_t);
96 static int hppa_address_cost (rtx, machine_mode mode, addr_space_t, bool);
97 static bool hppa_rtx_costs (rtx, machine_mode, int, int, int *, bool);
98 static inline rtx force_mode (machine_mode, rtx);
99 static void pa_reorg (void);
100 static void pa_combine_instructions (void);
101 static int pa_can_combine_p (rtx_insn *, rtx_insn *, rtx_insn *, int, rtx,
102 rtx, rtx);
103 static bool forward_branch_p (rtx_insn *);
104 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
105 static void compute_zdepdi_operands (unsigned HOST_WIDE_INT, unsigned *);
106 static int compute_movmem_length (rtx_insn *);
107 static int compute_clrmem_length (rtx_insn *);
108 static bool pa_assemble_integer (rtx, unsigned int, int);
109 static void remove_useless_addtr_insns (int);
110 static void store_reg (int, HOST_WIDE_INT, int);
111 static void store_reg_modify (int, int, HOST_WIDE_INT);
112 static void load_reg (int, HOST_WIDE_INT, int);
113 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
114 static rtx pa_function_value (const_tree, const_tree, bool);
115 static rtx pa_libcall_value (machine_mode, const_rtx);
116 static bool pa_function_value_regno_p (const unsigned int);
117 static void pa_output_function_prologue (FILE *, HOST_WIDE_INT);
118 static void update_total_code_bytes (unsigned int);
119 static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT);
120 static int pa_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
121 static int pa_adjust_priority (rtx_insn *, int);
122 static int pa_issue_rate (void);
123 static int pa_reloc_rw_mask (void);
124 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
125 static section *pa_som_tm_clone_table_section (void) ATTRIBUTE_UNUSED;
126 static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
127 ATTRIBUTE_UNUSED;
128 static void pa_encode_section_info (tree, rtx, int);
129 static const char *pa_strip_name_encoding (const char *);
130 static bool pa_function_ok_for_sibcall (tree, tree);
131 static void pa_globalize_label (FILE *, const char *)
132 ATTRIBUTE_UNUSED;
133 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
134 HOST_WIDE_INT, tree);
135 #if !defined(USE_COLLECT2)
136 static void pa_asm_out_constructor (rtx, int);
137 static void pa_asm_out_destructor (rtx, int);
138 #endif
139 static void pa_init_builtins (void);
140 static rtx pa_expand_builtin (tree, rtx, rtx, machine_mode mode, int);
141 static rtx hppa_builtin_saveregs (void);
142 static void hppa_va_start (tree, rtx);
143 static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
144 static bool pa_scalar_mode_supported_p (machine_mode);
145 static bool pa_commutative_p (const_rtx x, int outer_code);
146 static void copy_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
147 static int length_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
148 static rtx hppa_legitimize_address (rtx, rtx, machine_mode);
149 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
150 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
151 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
152 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
153 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
154 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
155 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
156 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
157 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
158 static void output_deferred_plabels (void);
159 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
160 #ifdef ASM_OUTPUT_EXTERNAL_REAL
161 static void pa_hpux_file_end (void);
162 #endif
163 static void pa_init_libfuncs (void);
164 static rtx pa_struct_value_rtx (tree, int);
165 static bool pa_pass_by_reference (cumulative_args_t, machine_mode,
166 const_tree, bool);
167 static int pa_arg_partial_bytes (cumulative_args_t, machine_mode,
168 tree, bool);
169 static void pa_function_arg_advance (cumulative_args_t, machine_mode,
170 const_tree, bool);
171 static rtx pa_function_arg (cumulative_args_t, machine_mode,
172 const_tree, bool);
173 static unsigned int pa_function_arg_boundary (machine_mode, const_tree);
174 static struct machine_function * pa_init_machine_status (void);
175 static reg_class_t pa_secondary_reload (bool, rtx, reg_class_t,
176 machine_mode,
177 secondary_reload_info *);
178 static void pa_extra_live_on_entry (bitmap);
179 static machine_mode pa_promote_function_mode (const_tree,
180 machine_mode, int *,
181 const_tree, int);
183 static void pa_asm_trampoline_template (FILE *);
184 static void pa_trampoline_init (rtx, tree, rtx);
185 static rtx pa_trampoline_adjust_address (rtx);
186 static rtx pa_delegitimize_address (rtx);
187 static bool pa_print_operand_punct_valid_p (unsigned char);
188 static rtx pa_internal_arg_pointer (void);
189 static bool pa_can_eliminate (const int, const int);
190 static void pa_conditional_register_usage (void);
191 static machine_mode pa_c_mode_for_suffix (char);
192 static section *pa_function_section (tree, enum node_frequency, bool, bool);
193 static bool pa_cannot_force_const_mem (machine_mode, rtx);
194 static bool pa_legitimate_constant_p (machine_mode, rtx);
195 static unsigned int pa_section_type_flags (tree, const char *, int);
196 static bool pa_legitimate_address_p (machine_mode, rtx, bool);
198 /* The following extra sections are only used for SOM. */
199 static GTY(()) section *som_readonly_data_section;
200 static GTY(()) section *som_one_only_readonly_data_section;
201 static GTY(()) section *som_one_only_data_section;
202 static GTY(()) section *som_tm_clone_table_section;
204 /* Counts for the number of callee-saved general and floating point
205 registers which were saved by the current function's prologue. */
206 static int gr_saved, fr_saved;
208 /* Boolean indicating whether the return pointer was saved by the
209 current function's prologue. */
210 static bool rp_saved;
212 static rtx find_addr_reg (rtx);
214 /* Keep track of the number of bytes we have output in the CODE subspace
215 during this compilation so we'll know when to emit inline long-calls. */
216 unsigned long total_code_bytes;
218 /* The last address of the previous function plus the number of bytes in
219 associated thunks that have been output. This is used to determine if
220 a thunk can use an IA-relative branch to reach its target function. */
221 static unsigned int last_address;
223 /* Variables to handle plabels that we discover are necessary at assembly
224 output time. They are output after the current function. */
225 struct GTY(()) deferred_plabel
227 rtx internal_label;
228 rtx symbol;
230 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
231 deferred_plabels;
232 static size_t n_deferred_plabels = 0;
234 /* Initialize the GCC target structure. */
236 #undef TARGET_OPTION_OVERRIDE
237 #define TARGET_OPTION_OVERRIDE pa_option_override
239 #undef TARGET_ASM_ALIGNED_HI_OP
240 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
241 #undef TARGET_ASM_ALIGNED_SI_OP
242 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
243 #undef TARGET_ASM_ALIGNED_DI_OP
244 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
245 #undef TARGET_ASM_UNALIGNED_HI_OP
246 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
247 #undef TARGET_ASM_UNALIGNED_SI_OP
248 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
249 #undef TARGET_ASM_UNALIGNED_DI_OP
250 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
251 #undef TARGET_ASM_INTEGER
252 #define TARGET_ASM_INTEGER pa_assemble_integer
254 #undef TARGET_ASM_FUNCTION_PROLOGUE
255 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
256 #undef TARGET_ASM_FUNCTION_EPILOGUE
257 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
259 #undef TARGET_FUNCTION_VALUE
260 #define TARGET_FUNCTION_VALUE pa_function_value
261 #undef TARGET_LIBCALL_VALUE
262 #define TARGET_LIBCALL_VALUE pa_libcall_value
263 #undef TARGET_FUNCTION_VALUE_REGNO_P
264 #define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p
266 #undef TARGET_LEGITIMIZE_ADDRESS
267 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
269 #undef TARGET_SCHED_ADJUST_COST
270 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
271 #undef TARGET_SCHED_ADJUST_PRIORITY
272 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
273 #undef TARGET_SCHED_ISSUE_RATE
274 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
276 #undef TARGET_ENCODE_SECTION_INFO
277 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
278 #undef TARGET_STRIP_NAME_ENCODING
279 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
281 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
282 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
284 #undef TARGET_COMMUTATIVE_P
285 #define TARGET_COMMUTATIVE_P pa_commutative_p
287 #undef TARGET_ASM_OUTPUT_MI_THUNK
288 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
289 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
290 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
292 #undef TARGET_ASM_FILE_END
293 #ifdef ASM_OUTPUT_EXTERNAL_REAL
294 #define TARGET_ASM_FILE_END pa_hpux_file_end
295 #else
296 #define TARGET_ASM_FILE_END output_deferred_plabels
297 #endif
299 #undef TARGET_ASM_RELOC_RW_MASK
300 #define TARGET_ASM_RELOC_RW_MASK pa_reloc_rw_mask
302 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
303 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p
305 #if !defined(USE_COLLECT2)
306 #undef TARGET_ASM_CONSTRUCTOR
307 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
308 #undef TARGET_ASM_DESTRUCTOR
309 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
310 #endif
312 #undef TARGET_INIT_BUILTINS
313 #define TARGET_INIT_BUILTINS pa_init_builtins
315 #undef TARGET_EXPAND_BUILTIN
316 #define TARGET_EXPAND_BUILTIN pa_expand_builtin
318 #undef TARGET_REGISTER_MOVE_COST
319 #define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
320 #undef TARGET_RTX_COSTS
321 #define TARGET_RTX_COSTS hppa_rtx_costs
322 #undef TARGET_ADDRESS_COST
323 #define TARGET_ADDRESS_COST hppa_address_cost
325 #undef TARGET_MACHINE_DEPENDENT_REORG
326 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
328 #undef TARGET_INIT_LIBFUNCS
329 #define TARGET_INIT_LIBFUNCS pa_init_libfuncs
331 #undef TARGET_PROMOTE_FUNCTION_MODE
332 #define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
333 #undef TARGET_PROMOTE_PROTOTYPES
334 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
336 #undef TARGET_STRUCT_VALUE_RTX
337 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
338 #undef TARGET_RETURN_IN_MEMORY
339 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
340 #undef TARGET_MUST_PASS_IN_STACK
341 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
342 #undef TARGET_PASS_BY_REFERENCE
343 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
344 #undef TARGET_CALLEE_COPIES
345 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true
346 #undef TARGET_ARG_PARTIAL_BYTES
347 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
348 #undef TARGET_FUNCTION_ARG
349 #define TARGET_FUNCTION_ARG pa_function_arg
350 #undef TARGET_FUNCTION_ARG_ADVANCE
351 #define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance
352 #undef TARGET_FUNCTION_ARG_BOUNDARY
353 #define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary
355 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
356 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
357 #undef TARGET_EXPAND_BUILTIN_VA_START
358 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
359 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
360 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
362 #undef TARGET_SCALAR_MODE_SUPPORTED_P
363 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
365 #undef TARGET_CANNOT_FORCE_CONST_MEM
366 #define TARGET_CANNOT_FORCE_CONST_MEM pa_cannot_force_const_mem
368 #undef TARGET_SECONDARY_RELOAD
369 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
371 #undef TARGET_EXTRA_LIVE_ON_ENTRY
372 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
374 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
375 #define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
376 #undef TARGET_TRAMPOLINE_INIT
377 #define TARGET_TRAMPOLINE_INIT pa_trampoline_init
378 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
379 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
380 #undef TARGET_DELEGITIMIZE_ADDRESS
381 #define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
382 #undef TARGET_INTERNAL_ARG_POINTER
383 #define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer
384 #undef TARGET_CAN_ELIMINATE
385 #define TARGET_CAN_ELIMINATE pa_can_eliminate
386 #undef TARGET_CONDITIONAL_REGISTER_USAGE
387 #define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage
388 #undef TARGET_C_MODE_FOR_SUFFIX
389 #define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix
390 #undef TARGET_ASM_FUNCTION_SECTION
391 #define TARGET_ASM_FUNCTION_SECTION pa_function_section
393 #undef TARGET_LEGITIMATE_CONSTANT_P
394 #define TARGET_LEGITIMATE_CONSTANT_P pa_legitimate_constant_p
395 #undef TARGET_SECTION_TYPE_FLAGS
396 #define TARGET_SECTION_TYPE_FLAGS pa_section_type_flags
397 #undef TARGET_LEGITIMATE_ADDRESS_P
398 #define TARGET_LEGITIMATE_ADDRESS_P pa_legitimate_address_p
400 struct gcc_target targetm = TARGET_INITIALIZER;
402 /* Parse the -mfixed-range= option string. */
404 static void
405 fix_range (const char *const_str)
407 int i, first, last;
408 char *str, *dash, *comma;
410 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
411 REG2 are either register names or register numbers. The effect
412 of this option is to mark the registers in the range from REG1 to
413 REG2 as ``fixed'' so they won't be used by the compiler. This is
414 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */
416 i = strlen (const_str);
417 str = (char *) alloca (i + 1);
418 memcpy (str, const_str, i + 1);
420 while (1)
422 dash = strchr (str, '-');
423 if (!dash)
425 warning (0, "value of -mfixed-range must have form REG1-REG2");
426 return;
428 *dash = '\0';
430 comma = strchr (dash + 1, ',');
431 if (comma)
432 *comma = '\0';
434 first = decode_reg_name (str);
435 if (first < 0)
437 warning (0, "unknown register name: %s", str);
438 return;
441 last = decode_reg_name (dash + 1);
442 if (last < 0)
444 warning (0, "unknown register name: %s", dash + 1);
445 return;
448 *dash = '-';
450 if (first > last)
452 warning (0, "%s-%s is an empty range", str, dash + 1);
453 return;
456 for (i = first; i <= last; ++i)
457 fixed_regs[i] = call_used_regs[i] = 1;
459 if (!comma)
460 break;
462 *comma = ',';
463 str = comma + 1;
466 /* Check if all floating point registers have been fixed. */
467 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
468 if (!fixed_regs[i])
469 break;
471 if (i > FP_REG_LAST)
472 target_flags |= MASK_DISABLE_FPREGS;
475 /* Implement the TARGET_OPTION_OVERRIDE hook. */
477 static void
478 pa_option_override (void)
480 unsigned int i;
481 cl_deferred_option *opt;
482 vec<cl_deferred_option> *v
483 = (vec<cl_deferred_option> *) pa_deferred_options;
485 if (v)
486 FOR_EACH_VEC_ELT (*v, i, opt)
488 switch (opt->opt_index)
490 case OPT_mfixed_range_:
491 fix_range (opt->arg);
492 break;
494 default:
495 gcc_unreachable ();
499 if (flag_pic && TARGET_PORTABLE_RUNTIME)
501 warning (0, "PIC code generation is not supported in the portable runtime model");
504 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
506 warning (0, "PIC code generation is not compatible with fast indirect calls");
509 if (! TARGET_GAS && write_symbols != NO_DEBUG)
511 warning (0, "-g is only supported when using GAS on this processor,");
512 warning (0, "-g option disabled");
513 write_symbols = NO_DEBUG;
516 /* We only support the "big PIC" model now. And we always generate PIC
517 code when in 64bit mode. */
518 if (flag_pic == 1 || TARGET_64BIT)
519 flag_pic = 2;
521 /* Disable -freorder-blocks-and-partition as we don't support hot and
522 cold partitioning. */
523 if (flag_reorder_blocks_and_partition)
525 inform (input_location,
526 "-freorder-blocks-and-partition does not work "
527 "on this architecture");
528 flag_reorder_blocks_and_partition = 0;
529 flag_reorder_blocks = 1;
532 /* We can't guarantee that .dword is available for 32-bit targets. */
533 if (UNITS_PER_WORD == 4)
534 targetm.asm_out.aligned_op.di = NULL;
536 /* The unaligned ops are only available when using GAS. */
537 if (!TARGET_GAS)
539 targetm.asm_out.unaligned_op.hi = NULL;
540 targetm.asm_out.unaligned_op.si = NULL;
541 targetm.asm_out.unaligned_op.di = NULL;
544 init_machine_status = pa_init_machine_status;
547 enum pa_builtins
549 PA_BUILTIN_COPYSIGNQ,
550 PA_BUILTIN_FABSQ,
551 PA_BUILTIN_INFQ,
552 PA_BUILTIN_HUGE_VALQ,
553 PA_BUILTIN_max
556 static GTY(()) tree pa_builtins[(int) PA_BUILTIN_max];
558 static void
559 pa_init_builtins (void)
561 #ifdef DONT_HAVE_FPUTC_UNLOCKED
563 tree decl = builtin_decl_explicit (BUILT_IN_PUTC_UNLOCKED);
564 set_builtin_decl (BUILT_IN_FPUTC_UNLOCKED, decl,
565 builtin_decl_implicit_p (BUILT_IN_PUTC_UNLOCKED));
567 #endif
568 #if TARGET_HPUX_11
570 tree decl;
572 if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
573 set_user_assembler_name (decl, "_Isfinite");
574 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
575 set_user_assembler_name (decl, "_Isfinitef");
577 #endif
579 if (HPUX_LONG_DOUBLE_LIBRARY)
581 tree decl, ftype;
583 /* Under HPUX, the __float128 type is a synonym for "long double". */
584 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
585 "__float128");
587 /* TFmode support builtins. */
588 ftype = build_function_type_list (long_double_type_node,
589 long_double_type_node,
590 NULL_TREE);
591 decl = add_builtin_function ("__builtin_fabsq", ftype,
592 PA_BUILTIN_FABSQ, BUILT_IN_MD,
593 "_U_Qfabs", NULL_TREE);
594 TREE_READONLY (decl) = 1;
595 pa_builtins[PA_BUILTIN_FABSQ] = decl;
597 ftype = build_function_type_list (long_double_type_node,
598 long_double_type_node,
599 long_double_type_node,
600 NULL_TREE);
601 decl = add_builtin_function ("__builtin_copysignq", ftype,
602 PA_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
603 "_U_Qfcopysign", NULL_TREE);
604 TREE_READONLY (decl) = 1;
605 pa_builtins[PA_BUILTIN_COPYSIGNQ] = decl;
607 ftype = build_function_type_list (long_double_type_node, NULL_TREE);
608 decl = add_builtin_function ("__builtin_infq", ftype,
609 PA_BUILTIN_INFQ, BUILT_IN_MD,
610 NULL, NULL_TREE);
611 pa_builtins[PA_BUILTIN_INFQ] = decl;
613 decl = add_builtin_function ("__builtin_huge_valq", ftype,
614 PA_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
615 NULL, NULL_TREE);
616 pa_builtins[PA_BUILTIN_HUGE_VALQ] = decl;
620 static rtx
621 pa_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
622 machine_mode mode ATTRIBUTE_UNUSED,
623 int ignore ATTRIBUTE_UNUSED)
625 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
626 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
628 switch (fcode)
630 case PA_BUILTIN_FABSQ:
631 case PA_BUILTIN_COPYSIGNQ:
632 return expand_call (exp, target, ignore);
634 case PA_BUILTIN_INFQ:
635 case PA_BUILTIN_HUGE_VALQ:
637 machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
638 REAL_VALUE_TYPE inf;
639 rtx tmp;
641 real_inf (&inf);
642 tmp = const_double_from_real_value (inf, target_mode);
644 tmp = validize_mem (force_const_mem (target_mode, tmp));
646 if (target == 0)
647 target = gen_reg_rtx (target_mode);
649 emit_move_insn (target, tmp);
650 return target;
653 default:
654 gcc_unreachable ();
657 return NULL_RTX;
660 /* Function to init struct machine_function.
661 This will be called, via a pointer variable,
662 from push_function_context. */
664 static struct machine_function *
665 pa_init_machine_status (void)
667 return ggc_cleared_alloc<machine_function> ();
670 /* If FROM is a probable pointer register, mark TO as a probable
671 pointer register with the same pointer alignment as FROM. */
673 static void
674 copy_reg_pointer (rtx to, rtx from)
676 if (REG_POINTER (from))
677 mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
680 /* Return 1 if X contains a symbolic expression. We know these
681 expressions will have one of a few well defined forms, so
682 we need only check those forms. */
684 pa_symbolic_expression_p (rtx x)
687 /* Strip off any HIGH. */
688 if (GET_CODE (x) == HIGH)
689 x = XEXP (x, 0);
691 return symbolic_operand (x, VOIDmode);
694 /* Accept any constant that can be moved in one instruction into a
695 general register. */
697 pa_cint_ok_for_move (unsigned HOST_WIDE_INT ival)
699 /* OK if ldo, ldil, or zdepi, can be used. */
700 return (VAL_14_BITS_P (ival)
701 || pa_ldil_cint_p (ival)
702 || pa_zdepi_cint_p (ival));
705 /* True iff ldil can be used to load this CONST_INT. The least
706 significant 11 bits of the value must be zero and the value must
707 not change sign when extended from 32 to 64 bits. */
709 pa_ldil_cint_p (unsigned HOST_WIDE_INT ival)
711 unsigned HOST_WIDE_INT x;
713 x = ival & (((unsigned HOST_WIDE_INT) -1 << 31) | 0x7ff);
714 return x == 0 || x == ((unsigned HOST_WIDE_INT) -1 << 31);
717 /* True iff zdepi can be used to generate this CONST_INT.
718 zdepi first sign extends a 5-bit signed number to a given field
719 length, then places this field anywhere in a zero. */
721 pa_zdepi_cint_p (unsigned HOST_WIDE_INT x)
723 unsigned HOST_WIDE_INT lsb_mask, t;
725 /* This might not be obvious, but it's at least fast.
726 This function is critical; we don't have the time loops would take. */
727 lsb_mask = x & -x;
728 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
729 /* Return true iff t is a power of two. */
730 return ((t & (t - 1)) == 0);
733 /* True iff depi or extru can be used to compute (reg & mask).
734 Accept bit pattern like these:
735 0....01....1
736 1....10....0
737 1..10..01..1 */
739 pa_and_mask_p (unsigned HOST_WIDE_INT mask)
741 mask = ~mask;
742 mask += mask & -mask;
743 return (mask & (mask - 1)) == 0;
746 /* True iff depi can be used to compute (reg | MASK). */
748 pa_ior_mask_p (unsigned HOST_WIDE_INT mask)
750 mask += mask & -mask;
751 return (mask & (mask - 1)) == 0;
754 /* Legitimize PIC addresses. If the address is already
755 position-independent, we return ORIG. Newly generated
756 position-independent addresses go to REG. If we need more
757 than one register, we lose. */
759 static rtx
760 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
762 rtx pic_ref = orig;
764 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
766 /* Labels need special handling. */
767 if (pic_label_operand (orig, mode))
769 rtx_insn *insn;
771 /* We do not want to go through the movXX expanders here since that
772 would create recursion.
774 Nor do we really want to call a generator for a named pattern
775 since that requires multiple patterns if we want to support
776 multiple word sizes.
778 So instead we just emit the raw set, which avoids the movXX
779 expanders completely. */
780 mark_reg_pointer (reg, BITS_PER_UNIT);
781 insn = emit_insn (gen_rtx_SET (reg, orig));
783 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
784 add_reg_note (insn, REG_EQUAL, orig);
786 /* During and after reload, we need to generate a REG_LABEL_OPERAND note
787 and update LABEL_NUSES because this is not done automatically. */
788 if (reload_in_progress || reload_completed)
790 /* Extract LABEL_REF. */
791 if (GET_CODE (orig) == CONST)
792 orig = XEXP (XEXP (orig, 0), 0);
793 /* Extract CODE_LABEL. */
794 orig = XEXP (orig, 0);
795 add_reg_note (insn, REG_LABEL_OPERAND, orig);
796 /* Make sure we have label and not a note. */
797 if (LABEL_P (orig))
798 LABEL_NUSES (orig)++;
800 crtl->uses_pic_offset_table = 1;
801 return reg;
803 if (GET_CODE (orig) == SYMBOL_REF)
805 rtx_insn *insn;
806 rtx tmp_reg;
808 gcc_assert (reg);
810 /* Before reload, allocate a temporary register for the intermediate
811 result. This allows the sequence to be deleted when the final
812 result is unused and the insns are trivially dead. */
813 tmp_reg = ((reload_in_progress || reload_completed)
814 ? reg : gen_reg_rtx (Pmode));
816 if (function_label_operand (orig, VOIDmode))
818 /* Force function label into memory in word mode. */
819 orig = XEXP (force_const_mem (word_mode, orig), 0);
820 /* Load plabel address from DLT. */
821 emit_move_insn (tmp_reg,
822 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
823 gen_rtx_HIGH (word_mode, orig)));
824 pic_ref
825 = gen_const_mem (Pmode,
826 gen_rtx_LO_SUM (Pmode, tmp_reg,
827 gen_rtx_UNSPEC (Pmode,
828 gen_rtvec (1, orig),
829 UNSPEC_DLTIND14R)));
830 emit_move_insn (reg, pic_ref);
831 /* Now load address of function descriptor. */
832 pic_ref = gen_rtx_MEM (Pmode, reg);
834 else
836 /* Load symbol reference from DLT. */
837 emit_move_insn (tmp_reg,
838 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
839 gen_rtx_HIGH (word_mode, orig)));
840 pic_ref
841 = gen_const_mem (Pmode,
842 gen_rtx_LO_SUM (Pmode, tmp_reg,
843 gen_rtx_UNSPEC (Pmode,
844 gen_rtvec (1, orig),
845 UNSPEC_DLTIND14R)));
848 crtl->uses_pic_offset_table = 1;
849 mark_reg_pointer (reg, BITS_PER_UNIT);
850 insn = emit_move_insn (reg, pic_ref);
852 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
853 set_unique_reg_note (insn, REG_EQUAL, orig);
855 return reg;
857 else if (GET_CODE (orig) == CONST)
859 rtx base;
861 if (GET_CODE (XEXP (orig, 0)) == PLUS
862 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
863 return orig;
865 gcc_assert (reg);
866 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
868 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
869 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
870 base == reg ? 0 : reg);
872 if (GET_CODE (orig) == CONST_INT)
874 if (INT_14_BITS (orig))
875 return plus_constant (Pmode, base, INTVAL (orig));
876 orig = force_reg (Pmode, orig);
878 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
879 /* Likewise, should we set special REG_NOTEs here? */
882 return pic_ref;
885 static GTY(()) rtx gen_tls_tga;
887 static rtx
888 gen_tls_get_addr (void)
890 if (!gen_tls_tga)
891 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
892 return gen_tls_tga;
895 static rtx
896 hppa_tls_call (rtx arg)
898 rtx ret;
900 ret = gen_reg_rtx (Pmode);
901 emit_library_call_value (gen_tls_get_addr (), ret,
902 LCT_CONST, Pmode, 1, arg, Pmode);
904 return ret;
907 static rtx
908 legitimize_tls_address (rtx addr)
910 rtx ret, tmp, t1, t2, tp;
911 rtx_insn *insn;
913 /* Currently, we can't handle anything but a SYMBOL_REF. */
914 if (GET_CODE (addr) != SYMBOL_REF)
915 return addr;
917 switch (SYMBOL_REF_TLS_MODEL (addr))
919 case TLS_MODEL_GLOBAL_DYNAMIC:
920 tmp = gen_reg_rtx (Pmode);
921 if (flag_pic)
922 emit_insn (gen_tgd_load_pic (tmp, addr));
923 else
924 emit_insn (gen_tgd_load (tmp, addr));
925 ret = hppa_tls_call (tmp);
926 break;
928 case TLS_MODEL_LOCAL_DYNAMIC:
929 ret = gen_reg_rtx (Pmode);
930 tmp = gen_reg_rtx (Pmode);
931 start_sequence ();
932 if (flag_pic)
933 emit_insn (gen_tld_load_pic (tmp, addr));
934 else
935 emit_insn (gen_tld_load (tmp, addr));
936 t1 = hppa_tls_call (tmp);
937 insn = get_insns ();
938 end_sequence ();
939 t2 = gen_reg_rtx (Pmode);
940 emit_libcall_block (insn, t2, t1,
941 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
942 UNSPEC_TLSLDBASE));
943 emit_insn (gen_tld_offset_load (ret, addr, t2));
944 break;
946 case TLS_MODEL_INITIAL_EXEC:
947 tp = gen_reg_rtx (Pmode);
948 tmp = gen_reg_rtx (Pmode);
949 ret = gen_reg_rtx (Pmode);
950 emit_insn (gen_tp_load (tp));
951 if (flag_pic)
952 emit_insn (gen_tie_load_pic (tmp, addr));
953 else
954 emit_insn (gen_tie_load (tmp, addr));
955 emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
956 break;
958 case TLS_MODEL_LOCAL_EXEC:
959 tp = gen_reg_rtx (Pmode);
960 ret = gen_reg_rtx (Pmode);
961 emit_insn (gen_tp_load (tp));
962 emit_insn (gen_tle_load (ret, addr, tp));
963 break;
965 default:
966 gcc_unreachable ();
969 return ret;
972 /* Helper for hppa_legitimize_address. Given X, return true if it
973 is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8.
975 This respectively represent canonical shift-add rtxs or scaled
976 memory addresses. */
977 static bool
978 mem_shadd_or_shadd_rtx_p (rtx x)
980 return ((GET_CODE (x) == ASHIFT
981 || GET_CODE (x) == MULT)
982 && GET_CODE (XEXP (x, 1)) == CONST_INT
983 && ((GET_CODE (x) == ASHIFT
984 && pa_shadd_constant_p (INTVAL (XEXP (x, 1))))
985 || (GET_CODE (x) == MULT
986 && pa_mem_shadd_constant_p (INTVAL (XEXP (x, 1))))));
989 /* Try machine-dependent ways of modifying an illegitimate address
990 to be legitimate. If we find one, return the new, valid address.
991 This macro is used in only one place: `memory_address' in explow.c.
993 OLDX is the address as it was before break_out_memory_refs was called.
994 In some cases it is useful to look at this to decide what needs to be done.
996 It is always safe for this macro to do nothing. It exists to recognize
997 opportunities to optimize the output.
999 For the PA, transform:
1001 memory(X + <large int>)
1003 into:
1005 if (<large int> & mask) >= 16
1006 Y = (<large int> & ~mask) + mask + 1 Round up.
1007 else
1008 Y = (<large int> & ~mask) Round down.
1009 Z = X + Y
1010 memory (Z + (<large int> - Y));
1012 This is for CSE to find several similar references, and only use one Z.
1014 X can either be a SYMBOL_REF or REG, but because combine cannot
1015 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
1016 D will not fit in 14 bits.
1018 MODE_FLOAT references allow displacements which fit in 5 bits, so use
1019 0x1f as the mask.
1021 MODE_INT references allow displacements which fit in 14 bits, so use
1022 0x3fff as the mask.
1024 This relies on the fact that most mode MODE_FLOAT references will use FP
1025 registers and most mode MODE_INT references will use integer registers.
1026 (In the rare case of an FP register used in an integer MODE, we depend
1027 on secondary reloads to clean things up.)
1030 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
1031 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
1032 addressing modes to be used).
1034 Note that the addresses passed into hppa_legitimize_address always
1035 come from a MEM, so we only have to match the MULT form on incoming
1036 addresses. But to be future proof we also match the ASHIFT form.
1038 However, this routine always places those shift-add sequences into
1039 registers, so we have to generate the ASHIFT form as our output.
1041 Put X and Z into registers. Then put the entire expression into
1042 a register. */
1045 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1046 machine_mode mode)
1048 rtx orig = x;
1050 /* We need to canonicalize the order of operands in unscaled indexed
1051 addresses since the code that checks if an address is valid doesn't
1052 always try both orders. */
1053 if (!TARGET_NO_SPACE_REGS
1054 && GET_CODE (x) == PLUS
1055 && GET_MODE (x) == Pmode
1056 && REG_P (XEXP (x, 0))
1057 && REG_P (XEXP (x, 1))
1058 && REG_POINTER (XEXP (x, 0))
1059 && !REG_POINTER (XEXP (x, 1)))
1060 return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
1062 if (tls_referenced_p (x))
1063 return legitimize_tls_address (x);
1064 else if (flag_pic)
1065 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
1067 /* Strip off CONST. */
1068 if (GET_CODE (x) == CONST)
1069 x = XEXP (x, 0);
1071 /* Special case. Get the SYMBOL_REF into a register and use indexing.
1072 That should always be safe. */
1073 if (GET_CODE (x) == PLUS
1074 && GET_CODE (XEXP (x, 0)) == REG
1075 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
1077 rtx reg = force_reg (Pmode, XEXP (x, 1));
1078 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
1081 /* Note we must reject symbols which represent function addresses
1082 since the assembler/linker can't handle arithmetic on plabels. */
1083 if (GET_CODE (x) == PLUS
1084 && GET_CODE (XEXP (x, 1)) == CONST_INT
1085 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
1086 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
1087 || GET_CODE (XEXP (x, 0)) == REG))
1089 rtx int_part, ptr_reg;
1090 int newoffset;
1091 int offset = INTVAL (XEXP (x, 1));
1092 int mask;
1094 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
1095 && !INT14_OK_STRICT ? 0x1f : 0x3fff);
1097 /* Choose which way to round the offset. Round up if we
1098 are >= halfway to the next boundary. */
1099 if ((offset & mask) >= ((mask + 1) / 2))
1100 newoffset = (offset & ~ mask) + mask + 1;
1101 else
1102 newoffset = (offset & ~ mask);
1104 /* If the newoffset will not fit in 14 bits (ldo), then
1105 handling this would take 4 or 5 instructions (2 to load
1106 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1107 add the new offset and the SYMBOL_REF.) Combine can
1108 not handle 4->2 or 5->2 combinations, so do not create
1109 them. */
1110 if (! VAL_14_BITS_P (newoffset)
1111 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1113 rtx const_part = plus_constant (Pmode, XEXP (x, 0), newoffset);
1114 rtx tmp_reg
1115 = force_reg (Pmode,
1116 gen_rtx_HIGH (Pmode, const_part));
1117 ptr_reg
1118 = force_reg (Pmode,
1119 gen_rtx_LO_SUM (Pmode,
1120 tmp_reg, const_part));
1122 else
1124 if (! VAL_14_BITS_P (newoffset))
1125 int_part = force_reg (Pmode, GEN_INT (newoffset));
1126 else
1127 int_part = GEN_INT (newoffset);
1129 ptr_reg = force_reg (Pmode,
1130 gen_rtx_PLUS (Pmode,
1131 force_reg (Pmode, XEXP (x, 0)),
1132 int_part));
1134 return plus_constant (Pmode, ptr_reg, offset - newoffset);
1137 /* Handle (plus (mult (a) (mem_shadd_constant)) (b)). */
1139 if (GET_CODE (x) == PLUS
1140 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1141 && (OBJECT_P (XEXP (x, 1))
1142 || GET_CODE (XEXP (x, 1)) == SUBREG)
1143 && GET_CODE (XEXP (x, 1)) != CONST)
1145 /* If we were given a MULT, we must fix the constant
1146 as we're going to create the ASHIFT form. */
1147 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1148 if (GET_CODE (XEXP (x, 0)) == MULT)
1149 shift_val = exact_log2 (shift_val);
1151 rtx reg1, reg2;
1152 reg1 = XEXP (x, 1);
1153 if (GET_CODE (reg1) != REG)
1154 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1156 reg2 = XEXP (XEXP (x, 0), 0);
1157 if (GET_CODE (reg2) != REG)
1158 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1160 return force_reg (Pmode,
1161 gen_rtx_PLUS (Pmode,
1162 gen_rtx_ASHIFT (Pmode, reg2,
1163 GEN_INT (shift_val)),
1164 reg1));
1167 /* Similarly for (plus (plus (mult (a) (mem_shadd_constant)) (b)) (c)).
1169 Only do so for floating point modes since this is more speculative
1170 and we lose if it's an integer store. */
1171 if (GET_CODE (x) == PLUS
1172 && GET_CODE (XEXP (x, 0)) == PLUS
1173 && mem_shadd_or_shadd_rtx_p (XEXP (XEXP (x, 0), 0))
1174 && (mode == SFmode || mode == DFmode))
1176 int shift_val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
1178 /* If we were given a MULT, we must fix the constant
1179 as we're going to create the ASHIFT form. */
1180 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
1181 shift_val = exact_log2 (shift_val);
1183 /* Try and figure out what to use as a base register. */
1184 rtx reg1, reg2, base, idx;
1186 reg1 = XEXP (XEXP (x, 0), 1);
1187 reg2 = XEXP (x, 1);
1188 base = NULL_RTX;
1189 idx = NULL_RTX;
1191 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1192 then pa_emit_move_sequence will turn on REG_POINTER so we'll know
1193 it's a base register below. */
1194 if (GET_CODE (reg1) != REG)
1195 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1197 if (GET_CODE (reg2) != REG)
1198 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1200 /* Figure out what the base and index are. */
1202 if (GET_CODE (reg1) == REG
1203 && REG_POINTER (reg1))
1205 base = reg1;
1206 idx = gen_rtx_PLUS (Pmode,
1207 gen_rtx_ASHIFT (Pmode,
1208 XEXP (XEXP (XEXP (x, 0), 0), 0),
1209 GEN_INT (shift_val)),
1210 XEXP (x, 1));
1212 else if (GET_CODE (reg2) == REG
1213 && REG_POINTER (reg2))
1215 base = reg2;
1216 idx = XEXP (x, 0);
1219 if (base == 0)
1220 return orig;
1222 /* If the index adds a large constant, try to scale the
1223 constant so that it can be loaded with only one insn. */
1224 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1225 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1226 / INTVAL (XEXP (XEXP (idx, 0), 1)))
1227 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1229 /* Divide the CONST_INT by the scale factor, then add it to A. */
1230 int val = INTVAL (XEXP (idx, 1));
1231 val /= (1 << shift_val);
1233 reg1 = XEXP (XEXP (idx, 0), 0);
1234 if (GET_CODE (reg1) != REG)
1235 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1237 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1239 /* We can now generate a simple scaled indexed address. */
1240 return
1241 force_reg
1242 (Pmode, gen_rtx_PLUS (Pmode,
1243 gen_rtx_ASHIFT (Pmode, reg1,
1244 GEN_INT (shift_val)),
1245 base));
1248 /* If B + C is still a valid base register, then add them. */
1249 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1250 && INTVAL (XEXP (idx, 1)) <= 4096
1251 && INTVAL (XEXP (idx, 1)) >= -4096)
1253 rtx reg1, reg2;
1255 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1257 reg2 = XEXP (XEXP (idx, 0), 0);
1258 if (GET_CODE (reg2) != CONST_INT)
1259 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1261 return force_reg (Pmode,
1262 gen_rtx_PLUS (Pmode,
1263 gen_rtx_ASHIFT (Pmode, reg2,
1264 GEN_INT (shift_val)),
1265 reg1));
1268 /* Get the index into a register, then add the base + index and
1269 return a register holding the result. */
1271 /* First get A into a register. */
1272 reg1 = XEXP (XEXP (idx, 0), 0);
1273 if (GET_CODE (reg1) != REG)
1274 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1276 /* And get B into a register. */
1277 reg2 = XEXP (idx, 1);
1278 if (GET_CODE (reg2) != REG)
1279 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1281 reg1 = force_reg (Pmode,
1282 gen_rtx_PLUS (Pmode,
1283 gen_rtx_ASHIFT (Pmode, reg1,
1284 GEN_INT (shift_val)),
1285 reg2));
1287 /* Add the result to our base register and return. */
1288 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1292 /* Uh-oh. We might have an address for x[n-100000]. This needs
1293 special handling to avoid creating an indexed memory address
1294 with x-100000 as the base.
1296 If the constant part is small enough, then it's still safe because
1297 there is a guard page at the beginning and end of the data segment.
1299 Scaled references are common enough that we want to try and rearrange the
1300 terms so that we can use indexing for these addresses too. Only
1301 do the optimization for floatint point modes. */
1303 if (GET_CODE (x) == PLUS
1304 && pa_symbolic_expression_p (XEXP (x, 1)))
1306 /* Ugly. We modify things here so that the address offset specified
1307 by the index expression is computed first, then added to x to form
1308 the entire address. */
1310 rtx regx1, regx2, regy1, regy2, y;
1312 /* Strip off any CONST. */
1313 y = XEXP (x, 1);
1314 if (GET_CODE (y) == CONST)
1315 y = XEXP (y, 0);
1317 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1319 /* See if this looks like
1320 (plus (mult (reg) (mem_shadd_const))
1321 (const (plus (symbol_ref) (const_int))))
1323 Where const_int is small. In that case the const
1324 expression is a valid pointer for indexing.
1326 If const_int is big, but can be divided evenly by shadd_const
1327 and added to (reg). This allows more scaled indexed addresses. */
1328 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1329 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1330 && GET_CODE (XEXP (y, 1)) == CONST_INT
1331 && INTVAL (XEXP (y, 1)) >= -4096
1332 && INTVAL (XEXP (y, 1)) <= 4095)
1334 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1336 /* If we were given a MULT, we must fix the constant
1337 as we're going to create the ASHIFT form. */
1338 if (GET_CODE (XEXP (x, 0)) == MULT)
1339 shift_val = exact_log2 (shift_val);
1341 rtx reg1, reg2;
1343 reg1 = XEXP (x, 1);
1344 if (GET_CODE (reg1) != REG)
1345 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1347 reg2 = XEXP (XEXP (x, 0), 0);
1348 if (GET_CODE (reg2) != REG)
1349 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1351 return
1352 force_reg (Pmode,
1353 gen_rtx_PLUS (Pmode,
1354 gen_rtx_ASHIFT (Pmode,
1355 reg2,
1356 GEN_INT (shift_val)),
1357 reg1));
1359 else if ((mode == DFmode || mode == SFmode)
1360 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1361 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1362 && GET_CODE (XEXP (y, 1)) == CONST_INT
1363 && INTVAL (XEXP (y, 1)) % (1 << INTVAL (XEXP (XEXP (x, 0), 1))) == 0)
1365 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1367 /* If we were given a MULT, we must fix the constant
1368 as we're going to create the ASHIFT form. */
1369 if (GET_CODE (XEXP (x, 0)) == MULT)
1370 shift_val = exact_log2 (shift_val);
1372 regx1
1373 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1374 / INTVAL (XEXP (XEXP (x, 0), 1))));
1375 regx2 = XEXP (XEXP (x, 0), 0);
1376 if (GET_CODE (regx2) != REG)
1377 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1378 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1379 regx2, regx1));
1380 return
1381 force_reg (Pmode,
1382 gen_rtx_PLUS (Pmode,
1383 gen_rtx_ASHIFT (Pmode, regx2,
1384 GEN_INT (shift_val)),
1385 force_reg (Pmode, XEXP (y, 0))));
1387 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1388 && INTVAL (XEXP (y, 1)) >= -4096
1389 && INTVAL (XEXP (y, 1)) <= 4095)
1391 /* This is safe because of the guard page at the
1392 beginning and end of the data space. Just
1393 return the original address. */
1394 return orig;
1396 else
1398 /* Doesn't look like one we can optimize. */
1399 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1400 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1401 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1402 regx1 = force_reg (Pmode,
1403 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1404 regx1, regy2));
1405 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1410 return orig;
1413 /* Implement the TARGET_REGISTER_MOVE_COST hook.
1415 Compute extra cost of moving data between one register class
1416 and another.
1418 Make moves from SAR so expensive they should never happen. We used to
1419 have 0xffff here, but that generates overflow in rare cases.
1421 Copies involving a FP register and a non-FP register are relatively
1422 expensive because they must go through memory.
1424 Other copies are reasonably cheap. */
1426 static int
1427 hppa_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
1428 reg_class_t from, reg_class_t to)
1430 if (from == SHIFT_REGS)
1431 return 0x100;
1432 else if (to == SHIFT_REGS && FP_REG_CLASS_P (from))
1433 return 18;
1434 else if ((FP_REG_CLASS_P (from) && ! FP_REG_CLASS_P (to))
1435 || (FP_REG_CLASS_P (to) && ! FP_REG_CLASS_P (from)))
1436 return 16;
1437 else
1438 return 2;
1441 /* For the HPPA, REG and REG+CONST is cost 0
1442 and addresses involving symbolic constants are cost 2.
1444 PIC addresses are very expensive.
1446 It is no coincidence that this has the same structure
1447 as pa_legitimate_address_p. */
1449 static int
1450 hppa_address_cost (rtx X, machine_mode mode ATTRIBUTE_UNUSED,
1451 addr_space_t as ATTRIBUTE_UNUSED,
1452 bool speed ATTRIBUTE_UNUSED)
1454 switch (GET_CODE (X))
1456 case REG:
1457 case PLUS:
1458 case LO_SUM:
1459 return 1;
1460 case HIGH:
1461 return 2;
1462 default:
1463 return 4;
1467 /* Compute a (partial) cost for rtx X. Return true if the complete
1468 cost has been computed, and false if subexpressions should be
1469 scanned. In either case, *TOTAL contains the cost result. */
1471 static bool
1472 hppa_rtx_costs (rtx x, machine_mode mode, int outer_code,
1473 int opno ATTRIBUTE_UNUSED,
1474 int *total, bool speed ATTRIBUTE_UNUSED)
1476 int factor;
1477 int code = GET_CODE (x);
1479 switch (code)
1481 case CONST_INT:
1482 if (INTVAL (x) == 0)
1483 *total = 0;
1484 else if (INT_14_BITS (x))
1485 *total = 1;
1486 else
1487 *total = 2;
1488 return true;
1490 case HIGH:
1491 *total = 2;
1492 return true;
1494 case CONST:
1495 case LABEL_REF:
1496 case SYMBOL_REF:
1497 *total = 4;
1498 return true;
1500 case CONST_DOUBLE:
1501 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1502 && outer_code != SET)
1503 *total = 0;
1504 else
1505 *total = 8;
1506 return true;
1508 case MULT:
1509 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1511 *total = COSTS_N_INSNS (3);
1512 return true;
1515 /* A mode size N times larger than SImode needs O(N*N) more insns. */
1516 factor = GET_MODE_SIZE (mode) / 4;
1517 if (factor == 0)
1518 factor = 1;
1520 if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1521 *total = factor * factor * COSTS_N_INSNS (8);
1522 else
1523 *total = factor * factor * COSTS_N_INSNS (20);
1524 return true;
1526 case DIV:
1527 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1529 *total = COSTS_N_INSNS (14);
1530 return true;
1532 /* FALLTHRU */
1534 case UDIV:
1535 case MOD:
1536 case UMOD:
1537 /* A mode size N times larger than SImode needs O(N*N) more insns. */
1538 factor = GET_MODE_SIZE (mode) / 4;
1539 if (factor == 0)
1540 factor = 1;
1542 *total = factor * factor * COSTS_N_INSNS (60);
1543 return true;
1545 case PLUS: /* this includes shNadd insns */
1546 case MINUS:
1547 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1549 *total = COSTS_N_INSNS (3);
1550 return true;
1553 /* A size N times larger than UNITS_PER_WORD needs N times as
1554 many insns, taking N times as long. */
1555 factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
1556 if (factor == 0)
1557 factor = 1;
1558 *total = factor * COSTS_N_INSNS (1);
1559 return true;
1561 case ASHIFT:
1562 case ASHIFTRT:
1563 case LSHIFTRT:
1564 *total = COSTS_N_INSNS (1);
1565 return true;
1567 default:
1568 return false;
1572 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1573 new rtx with the correct mode. */
1574 static inline rtx
1575 force_mode (machine_mode mode, rtx orig)
1577 if (mode == GET_MODE (orig))
1578 return orig;
1580 gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
1582 return gen_rtx_REG (mode, REGNO (orig));
1585 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
1587 static bool
1588 pa_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1590 return tls_referenced_p (x);
1593 /* Emit insns to move operands[1] into operands[0].
1595 Return 1 if we have written out everything that needs to be done to
1596 do the move. Otherwise, return 0 and the caller will emit the move
1597 normally.
1599 Note SCRATCH_REG may not be in the proper mode depending on how it
1600 will be used. This routine is responsible for creating a new copy
1601 of SCRATCH_REG in the proper mode. */
1604 pa_emit_move_sequence (rtx *operands, machine_mode mode, rtx scratch_reg)
1606 register rtx operand0 = operands[0];
1607 register rtx operand1 = operands[1];
1608 register rtx tem;
1610 /* We can only handle indexed addresses in the destination operand
1611 of floating point stores. Thus, we need to break out indexed
1612 addresses from the destination operand. */
1613 if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1615 gcc_assert (can_create_pseudo_p ());
1617 tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1618 operand0 = replace_equiv_address (operand0, tem);
1621 /* On targets with non-equivalent space registers, break out unscaled
1622 indexed addresses from the source operand before the final CSE.
1623 We have to do this because the REG_POINTER flag is not correctly
1624 carried through various optimization passes and CSE may substitute
1625 a pseudo without the pointer set for one with the pointer set. As
1626 a result, we loose various opportunities to create insns with
1627 unscaled indexed addresses. */
1628 if (!TARGET_NO_SPACE_REGS
1629 && !cse_not_expected
1630 && GET_CODE (operand1) == MEM
1631 && GET_CODE (XEXP (operand1, 0)) == PLUS
1632 && REG_P (XEXP (XEXP (operand1, 0), 0))
1633 && REG_P (XEXP (XEXP (operand1, 0), 1)))
1634 operand1
1635 = replace_equiv_address (operand1,
1636 copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1638 if (scratch_reg
1639 && reload_in_progress && GET_CODE (operand0) == REG
1640 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1641 operand0 = reg_equiv_mem (REGNO (operand0));
1642 else if (scratch_reg
1643 && reload_in_progress && GET_CODE (operand0) == SUBREG
1644 && GET_CODE (SUBREG_REG (operand0)) == REG
1645 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1647 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1648 the code which tracks sets/uses for delete_output_reload. */
1649 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1650 reg_equiv_mem (REGNO (SUBREG_REG (operand0))),
1651 SUBREG_BYTE (operand0));
1652 operand0 = alter_subreg (&temp, true);
1655 if (scratch_reg
1656 && reload_in_progress && GET_CODE (operand1) == REG
1657 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1658 operand1 = reg_equiv_mem (REGNO (operand1));
1659 else if (scratch_reg
1660 && reload_in_progress && GET_CODE (operand1) == SUBREG
1661 && GET_CODE (SUBREG_REG (operand1)) == REG
1662 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1664 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1665 the code which tracks sets/uses for delete_output_reload. */
1666 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1667 reg_equiv_mem (REGNO (SUBREG_REG (operand1))),
1668 SUBREG_BYTE (operand1));
1669 operand1 = alter_subreg (&temp, true);
1672 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1673 && ((tem = find_replacement (&XEXP (operand0, 0)))
1674 != XEXP (operand0, 0)))
1675 operand0 = replace_equiv_address (operand0, tem);
1677 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1678 && ((tem = find_replacement (&XEXP (operand1, 0)))
1679 != XEXP (operand1, 0)))
1680 operand1 = replace_equiv_address (operand1, tem);
1682 /* Handle secondary reloads for loads/stores of FP registers from
1683 REG+D addresses where D does not fit in 5 or 14 bits, including
1684 (subreg (mem (addr))) cases, and reloads for other unsupported
1685 memory operands. */
1686 if (scratch_reg
1687 && FP_REG_P (operand0)
1688 && (MEM_P (operand1)
1689 || (GET_CODE (operand1) == SUBREG
1690 && MEM_P (XEXP (operand1, 0)))))
1692 rtx op1 = operand1;
1694 if (GET_CODE (op1) == SUBREG)
1695 op1 = XEXP (op1, 0);
1697 if (reg_plus_base_memory_operand (op1, GET_MODE (op1)))
1699 if (!(TARGET_PA_20
1700 && !TARGET_ELF32
1701 && INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
1702 && !INT_5_BITS (XEXP (XEXP (op1, 0), 1)))
1704 /* SCRATCH_REG will hold an address and maybe the actual data.
1705 We want it in WORD_MODE regardless of what mode it was
1706 originally given to us. */
1707 scratch_reg = force_mode (word_mode, scratch_reg);
1709 /* D might not fit in 14 bits either; for such cases load D
1710 into scratch reg. */
1711 if (!INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
1713 emit_move_insn (scratch_reg, XEXP (XEXP (op1, 0), 1));
1714 emit_move_insn (scratch_reg,
1715 gen_rtx_fmt_ee (GET_CODE (XEXP (op1, 0)),
1716 Pmode,
1717 XEXP (XEXP (op1, 0), 0),
1718 scratch_reg));
1720 else
1721 emit_move_insn (scratch_reg, XEXP (op1, 0));
1722 emit_insn (gen_rtx_SET (operand0,
1723 replace_equiv_address (op1, scratch_reg)));
1724 return 1;
1727 else if ((!INT14_OK_STRICT && symbolic_memory_operand (op1, VOIDmode))
1728 || IS_LO_SUM_DLT_ADDR_P (XEXP (op1, 0))
1729 || IS_INDEX_ADDR_P (XEXP (op1, 0)))
1731 /* Load memory address into SCRATCH_REG. */
1732 scratch_reg = force_mode (word_mode, scratch_reg);
1733 emit_move_insn (scratch_reg, XEXP (op1, 0));
1734 emit_insn (gen_rtx_SET (operand0,
1735 replace_equiv_address (op1, scratch_reg)));
1736 return 1;
1739 else if (scratch_reg
1740 && FP_REG_P (operand1)
1741 && (MEM_P (operand0)
1742 || (GET_CODE (operand0) == SUBREG
1743 && MEM_P (XEXP (operand0, 0)))))
1745 rtx op0 = operand0;
1747 if (GET_CODE (op0) == SUBREG)
1748 op0 = XEXP (op0, 0);
1750 if (reg_plus_base_memory_operand (op0, GET_MODE (op0)))
1752 if (!(TARGET_PA_20
1753 && !TARGET_ELF32
1754 && INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
1755 && !INT_5_BITS (XEXP (XEXP (op0, 0), 1)))
1757 /* SCRATCH_REG will hold an address and maybe the actual data.
1758 We want it in WORD_MODE regardless of what mode it was
1759 originally given to us. */
1760 scratch_reg = force_mode (word_mode, scratch_reg);
1762 /* D might not fit in 14 bits either; for such cases load D
1763 into scratch reg. */
1764 if (!INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
1766 emit_move_insn (scratch_reg, XEXP (XEXP (op0, 0), 1));
1767 emit_move_insn (scratch_reg,
1768 gen_rtx_fmt_ee (GET_CODE (XEXP (op0, 0)),
1769 Pmode,
1770 XEXP (XEXP (op0, 0), 0),
1771 scratch_reg));
1773 else
1774 emit_move_insn (scratch_reg, XEXP (op0, 0));
1775 emit_insn (gen_rtx_SET (replace_equiv_address (op0, scratch_reg),
1776 operand1));
1777 return 1;
1780 else if ((!INT14_OK_STRICT && symbolic_memory_operand (op0, VOIDmode))
1781 || IS_LO_SUM_DLT_ADDR_P (XEXP (op0, 0))
1782 || IS_INDEX_ADDR_P (XEXP (op0, 0)))
1784 /* Load memory address into SCRATCH_REG. */
1785 scratch_reg = force_mode (word_mode, scratch_reg);
1786 emit_move_insn (scratch_reg, XEXP (op0, 0));
1787 emit_insn (gen_rtx_SET (replace_equiv_address (op0, scratch_reg),
1788 operand1));
1789 return 1;
1792 /* Handle secondary reloads for loads of FP registers from constant
1793 expressions by forcing the constant into memory. For the most part,
1794 this is only necessary for SImode and DImode.
1796 Use scratch_reg to hold the address of the memory location. */
1797 else if (scratch_reg
1798 && CONSTANT_P (operand1)
1799 && FP_REG_P (operand0))
1801 rtx const_mem, xoperands[2];
1803 if (operand1 == CONST0_RTX (mode))
1805 emit_insn (gen_rtx_SET (operand0, operand1));
1806 return 1;
1809 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1810 it in WORD_MODE regardless of what mode it was originally given
1811 to us. */
1812 scratch_reg = force_mode (word_mode, scratch_reg);
1814 /* Force the constant into memory and put the address of the
1815 memory location into scratch_reg. */
1816 const_mem = force_const_mem (mode, operand1);
1817 xoperands[0] = scratch_reg;
1818 xoperands[1] = XEXP (const_mem, 0);
1819 pa_emit_move_sequence (xoperands, Pmode, 0);
1821 /* Now load the destination register. */
1822 emit_insn (gen_rtx_SET (operand0,
1823 replace_equiv_address (const_mem, scratch_reg)));
1824 return 1;
1826 /* Handle secondary reloads for SAR. These occur when trying to load
1827 the SAR from memory or a constant. */
1828 else if (scratch_reg
1829 && GET_CODE (operand0) == REG
1830 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1831 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1832 && (GET_CODE (operand1) == MEM || GET_CODE (operand1) == CONST_INT))
1834 /* D might not fit in 14 bits either; for such cases load D into
1835 scratch reg. */
1836 if (GET_CODE (operand1) == MEM
1837 && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0)))
1839 /* We are reloading the address into the scratch register, so we
1840 want to make sure the scratch register is a full register. */
1841 scratch_reg = force_mode (word_mode, scratch_reg);
1843 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1844 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1845 0)),
1846 Pmode,
1847 XEXP (XEXP (operand1, 0),
1849 scratch_reg));
1851 /* Now we are going to load the scratch register from memory,
1852 we want to load it in the same width as the original MEM,
1853 which must be the same as the width of the ultimate destination,
1854 OPERAND0. */
1855 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1857 emit_move_insn (scratch_reg,
1858 replace_equiv_address (operand1, scratch_reg));
1860 else
1862 /* We want to load the scratch register using the same mode as
1863 the ultimate destination. */
1864 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1866 emit_move_insn (scratch_reg, operand1);
1869 /* And emit the insn to set the ultimate destination. We know that
1870 the scratch register has the same mode as the destination at this
1871 point. */
1872 emit_move_insn (operand0, scratch_reg);
1873 return 1;
1876 /* Handle the most common case: storing into a register. */
1877 if (register_operand (operand0, mode))
1879 /* Legitimize TLS symbol references. This happens for references
1880 that aren't a legitimate constant. */
1881 if (PA_SYMBOL_REF_TLS_P (operand1))
1882 operand1 = legitimize_tls_address (operand1);
1884 if (register_operand (operand1, mode)
1885 || (GET_CODE (operand1) == CONST_INT
1886 && pa_cint_ok_for_move (UINTVAL (operand1)))
1887 || (operand1 == CONST0_RTX (mode))
1888 || (GET_CODE (operand1) == HIGH
1889 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1890 /* Only `general_operands' can come here, so MEM is ok. */
1891 || GET_CODE (operand1) == MEM)
1893 /* Various sets are created during RTL generation which don't
1894 have the REG_POINTER flag correctly set. After the CSE pass,
1895 instruction recognition can fail if we don't consistently
1896 set this flag when performing register copies. This should
1897 also improve the opportunities for creating insns that use
1898 unscaled indexing. */
1899 if (REG_P (operand0) && REG_P (operand1))
1901 if (REG_POINTER (operand1)
1902 && !REG_POINTER (operand0)
1903 && !HARD_REGISTER_P (operand0))
1904 copy_reg_pointer (operand0, operand1);
1907 /* When MEMs are broken out, the REG_POINTER flag doesn't
1908 get set. In some cases, we can set the REG_POINTER flag
1909 from the declaration for the MEM. */
1910 if (REG_P (operand0)
1911 && GET_CODE (operand1) == MEM
1912 && !REG_POINTER (operand0))
1914 tree decl = MEM_EXPR (operand1);
1916 /* Set the register pointer flag and register alignment
1917 if the declaration for this memory reference is a
1918 pointer type. */
1919 if (decl)
1921 tree type;
1923 /* If this is a COMPONENT_REF, use the FIELD_DECL from
1924 tree operand 1. */
1925 if (TREE_CODE (decl) == COMPONENT_REF)
1926 decl = TREE_OPERAND (decl, 1);
1928 type = TREE_TYPE (decl);
1929 type = strip_array_types (type);
1931 if (POINTER_TYPE_P (type))
1932 mark_reg_pointer (operand0, BITS_PER_UNIT);
1936 emit_insn (gen_rtx_SET (operand0, operand1));
1937 return 1;
1940 else if (GET_CODE (operand0) == MEM)
1942 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1943 && !(reload_in_progress || reload_completed))
1945 rtx temp = gen_reg_rtx (DFmode);
1947 emit_insn (gen_rtx_SET (temp, operand1));
1948 emit_insn (gen_rtx_SET (operand0, temp));
1949 return 1;
1951 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1953 /* Run this case quickly. */
1954 emit_insn (gen_rtx_SET (operand0, operand1));
1955 return 1;
1957 if (! (reload_in_progress || reload_completed))
1959 operands[0] = validize_mem (operand0);
1960 operands[1] = operand1 = force_reg (mode, operand1);
1964 /* Simplify the source if we need to.
1965 Note we do have to handle function labels here, even though we do
1966 not consider them legitimate constants. Loop optimizations can
1967 call the emit_move_xxx with one as a source. */
1968 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1969 || (GET_CODE (operand1) == HIGH
1970 && symbolic_operand (XEXP (operand1, 0), mode))
1971 || function_label_operand (operand1, VOIDmode)
1972 || tls_referenced_p (operand1))
1974 int ishighonly = 0;
1976 if (GET_CODE (operand1) == HIGH)
1978 ishighonly = 1;
1979 operand1 = XEXP (operand1, 0);
1981 if (symbolic_operand (operand1, mode))
1983 /* Argh. The assembler and linker can't handle arithmetic
1984 involving plabels.
1986 So we force the plabel into memory, load operand0 from
1987 the memory location, then add in the constant part. */
1988 if ((GET_CODE (operand1) == CONST
1989 && GET_CODE (XEXP (operand1, 0)) == PLUS
1990 && function_label_operand (XEXP (XEXP (operand1, 0), 0),
1991 VOIDmode))
1992 || function_label_operand (operand1, VOIDmode))
1994 rtx temp, const_part;
1996 /* Figure out what (if any) scratch register to use. */
1997 if (reload_in_progress || reload_completed)
1999 scratch_reg = scratch_reg ? scratch_reg : operand0;
2000 /* SCRATCH_REG will hold an address and maybe the actual
2001 data. We want it in WORD_MODE regardless of what mode it
2002 was originally given to us. */
2003 scratch_reg = force_mode (word_mode, scratch_reg);
2005 else if (flag_pic)
2006 scratch_reg = gen_reg_rtx (Pmode);
2008 if (GET_CODE (operand1) == CONST)
2010 /* Save away the constant part of the expression. */
2011 const_part = XEXP (XEXP (operand1, 0), 1);
2012 gcc_assert (GET_CODE (const_part) == CONST_INT);
2014 /* Force the function label into memory. */
2015 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
2017 else
2019 /* No constant part. */
2020 const_part = NULL_RTX;
2022 /* Force the function label into memory. */
2023 temp = force_const_mem (mode, operand1);
2027 /* Get the address of the memory location. PIC-ify it if
2028 necessary. */
2029 temp = XEXP (temp, 0);
2030 if (flag_pic)
2031 temp = legitimize_pic_address (temp, mode, scratch_reg);
2033 /* Put the address of the memory location into our destination
2034 register. */
2035 operands[1] = temp;
2036 pa_emit_move_sequence (operands, mode, scratch_reg);
2038 /* Now load from the memory location into our destination
2039 register. */
2040 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
2041 pa_emit_move_sequence (operands, mode, scratch_reg);
2043 /* And add back in the constant part. */
2044 if (const_part != NULL_RTX)
2045 expand_inc (operand0, const_part);
2047 return 1;
2050 if (flag_pic)
2052 rtx_insn *insn;
2053 rtx temp;
2055 if (reload_in_progress || reload_completed)
2057 temp = scratch_reg ? scratch_reg : operand0;
2058 /* TEMP will hold an address and maybe the actual
2059 data. We want it in WORD_MODE regardless of what mode it
2060 was originally given to us. */
2061 temp = force_mode (word_mode, temp);
2063 else
2064 temp = gen_reg_rtx (Pmode);
2066 /* Force (const (plus (symbol) (const_int))) to memory
2067 if the const_int will not fit in 14 bits. Although
2068 this requires a relocation, the instruction sequence
2069 needed to load the value is shorter. */
2070 if (GET_CODE (operand1) == CONST
2071 && GET_CODE (XEXP (operand1, 0)) == PLUS
2072 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
2073 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1)))
2075 rtx x, m = force_const_mem (mode, operand1);
2077 x = legitimize_pic_address (XEXP (m, 0), mode, temp);
2078 x = replace_equiv_address (m, x);
2079 insn = emit_move_insn (operand0, x);
2081 else
2083 operands[1] = legitimize_pic_address (operand1, mode, temp);
2084 if (REG_P (operand0) && REG_P (operands[1]))
2085 copy_reg_pointer (operand0, operands[1]);
2086 insn = emit_move_insn (operand0, operands[1]);
2089 /* Put a REG_EQUAL note on this insn. */
2090 set_unique_reg_note (insn, REG_EQUAL, operand1);
2092 /* On the HPPA, references to data space are supposed to use dp,
2093 register 27, but showing it in the RTL inhibits various cse
2094 and loop optimizations. */
2095 else
2097 rtx temp, set;
2099 if (reload_in_progress || reload_completed)
2101 temp = scratch_reg ? scratch_reg : operand0;
2102 /* TEMP will hold an address and maybe the actual
2103 data. We want it in WORD_MODE regardless of what mode it
2104 was originally given to us. */
2105 temp = force_mode (word_mode, temp);
2107 else
2108 temp = gen_reg_rtx (mode);
2110 /* Loading a SYMBOL_REF into a register makes that register
2111 safe to be used as the base in an indexed address.
2113 Don't mark hard registers though. That loses. */
2114 if (GET_CODE (operand0) == REG
2115 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
2116 mark_reg_pointer (operand0, BITS_PER_UNIT);
2117 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
2118 mark_reg_pointer (temp, BITS_PER_UNIT);
2120 if (ishighonly)
2121 set = gen_rtx_SET (operand0, temp);
2122 else
2123 set = gen_rtx_SET (operand0,
2124 gen_rtx_LO_SUM (mode, temp, operand1));
2126 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
2127 emit_insn (set);
2130 return 1;
2132 else if (tls_referenced_p (operand1))
2134 rtx tmp = operand1;
2135 rtx addend = NULL;
2137 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
2139 addend = XEXP (XEXP (tmp, 0), 1);
2140 tmp = XEXP (XEXP (tmp, 0), 0);
2143 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
2144 tmp = legitimize_tls_address (tmp);
2145 if (addend)
2147 tmp = gen_rtx_PLUS (mode, tmp, addend);
2148 tmp = force_operand (tmp, operands[0]);
2150 operands[1] = tmp;
2152 else if (GET_CODE (operand1) != CONST_INT
2153 || !pa_cint_ok_for_move (UINTVAL (operand1)))
2155 rtx temp;
2156 rtx_insn *insn;
2157 rtx op1 = operand1;
2158 HOST_WIDE_INT value = 0;
2159 HOST_WIDE_INT insv = 0;
2160 int insert = 0;
2162 if (GET_CODE (operand1) == CONST_INT)
2163 value = INTVAL (operand1);
2165 if (TARGET_64BIT
2166 && GET_CODE (operand1) == CONST_INT
2167 && HOST_BITS_PER_WIDE_INT > 32
2168 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
2170 HOST_WIDE_INT nval;
2172 /* Extract the low order 32 bits of the value and sign extend.
2173 If the new value is the same as the original value, we can
2174 can use the original value as-is. If the new value is
2175 different, we use it and insert the most-significant 32-bits
2176 of the original value into the final result. */
2177 nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
2178 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
2179 if (value != nval)
2181 #if HOST_BITS_PER_WIDE_INT > 32
2182 insv = value >= 0 ? value >> 32 : ~(~value >> 32);
2183 #endif
2184 insert = 1;
2185 value = nval;
2186 operand1 = GEN_INT (nval);
2190 if (reload_in_progress || reload_completed)
2191 temp = scratch_reg ? scratch_reg : operand0;
2192 else
2193 temp = gen_reg_rtx (mode);
2195 /* We don't directly split DImode constants on 32-bit targets
2196 because PLUS uses an 11-bit immediate and the insn sequence
2197 generated is not as efficient as the one using HIGH/LO_SUM. */
2198 if (GET_CODE (operand1) == CONST_INT
2199 && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
2200 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
2201 && !insert)
2203 /* Directly break constant into high and low parts. This
2204 provides better optimization opportunities because various
2205 passes recognize constants split with PLUS but not LO_SUM.
2206 We use a 14-bit signed low part except when the addition
2207 of 0x4000 to the high part might change the sign of the
2208 high part. */
2209 HOST_WIDE_INT low = value & 0x3fff;
2210 HOST_WIDE_INT high = value & ~ 0x3fff;
2212 if (low >= 0x2000)
2214 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
2215 high += 0x2000;
2216 else
2217 high += 0x4000;
2220 low = value - high;
2222 emit_insn (gen_rtx_SET (temp, GEN_INT (high)));
2223 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
2225 else
2227 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
2228 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
2231 insn = emit_move_insn (operands[0], operands[1]);
2233 /* Now insert the most significant 32 bits of the value
2234 into the register. When we don't have a second register
2235 available, it could take up to nine instructions to load
2236 a 64-bit integer constant. Prior to reload, we force
2237 constants that would take more than three instructions
2238 to load to the constant pool. During and after reload,
2239 we have to handle all possible values. */
2240 if (insert)
2242 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2243 register and the value to be inserted is outside the
2244 range that can be loaded with three depdi instructions. */
2245 if (temp != operand0 && (insv >= 16384 || insv < -16384))
2247 operand1 = GEN_INT (insv);
2249 emit_insn (gen_rtx_SET (temp,
2250 gen_rtx_HIGH (mode, operand1)));
2251 emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
2252 if (mode == DImode)
2253 insn = emit_insn (gen_insvdi (operand0, GEN_INT (32),
2254 const0_rtx, temp));
2255 else
2256 insn = emit_insn (gen_insvsi (operand0, GEN_INT (32),
2257 const0_rtx, temp));
2259 else
2261 int len = 5, pos = 27;
2263 /* Insert the bits using the depdi instruction. */
2264 while (pos >= 0)
2266 HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2267 HOST_WIDE_INT sign = v5 < 0;
2269 /* Left extend the insertion. */
2270 insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2271 while (pos > 0 && (insv & 1) == sign)
2273 insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2274 len += 1;
2275 pos -= 1;
2278 if (mode == DImode)
2279 insn = emit_insn (gen_insvdi (operand0,
2280 GEN_INT (len),
2281 GEN_INT (pos),
2282 GEN_INT (v5)));
2283 else
2284 insn = emit_insn (gen_insvsi (operand0,
2285 GEN_INT (len),
2286 GEN_INT (pos),
2287 GEN_INT (v5)));
2289 len = pos > 0 && pos < 5 ? pos : 5;
2290 pos -= len;
2295 set_unique_reg_note (insn, REG_EQUAL, op1);
2297 return 1;
2300 /* Now have insn-emit do whatever it normally does. */
2301 return 0;
2304 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2305 it will need a link/runtime reloc). */
2308 pa_reloc_needed (tree exp)
2310 int reloc = 0;
2312 switch (TREE_CODE (exp))
2314 case ADDR_EXPR:
2315 return 1;
2317 case POINTER_PLUS_EXPR:
2318 case PLUS_EXPR:
2319 case MINUS_EXPR:
2320 reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2321 reloc |= pa_reloc_needed (TREE_OPERAND (exp, 1));
2322 break;
2324 CASE_CONVERT:
2325 case NON_LVALUE_EXPR:
2326 reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2327 break;
2329 case CONSTRUCTOR:
2331 tree value;
2332 unsigned HOST_WIDE_INT ix;
2334 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2335 if (value)
2336 reloc |= pa_reloc_needed (value);
2338 break;
2340 case ERROR_MARK:
2341 break;
2343 default:
2344 break;
2346 return reloc;
2350 /* Return the best assembler insn template
2351 for moving operands[1] into operands[0] as a fullword. */
2352 const char *
2353 pa_singlemove_string (rtx *operands)
2355 HOST_WIDE_INT intval;
2357 if (GET_CODE (operands[0]) == MEM)
2358 return "stw %r1,%0";
2359 if (GET_CODE (operands[1]) == MEM)
2360 return "ldw %1,%0";
2361 if (GET_CODE (operands[1]) == CONST_DOUBLE)
2363 long i;
2365 gcc_assert (GET_MODE (operands[1]) == SFmode);
2367 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2368 bit pattern. */
2369 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (operands[1]), i);
2371 operands[1] = GEN_INT (i);
2372 /* Fall through to CONST_INT case. */
2374 if (GET_CODE (operands[1]) == CONST_INT)
2376 intval = INTVAL (operands[1]);
2378 if (VAL_14_BITS_P (intval))
2379 return "ldi %1,%0";
2380 else if ((intval & 0x7ff) == 0)
2381 return "ldil L'%1,%0";
2382 else if (pa_zdepi_cint_p (intval))
2383 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2384 else
2385 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2387 return "copy %1,%0";
2391 /* Compute position (in OP[1]) and width (in OP[2])
2392 useful for copying IMM to a register using the zdepi
2393 instructions. Store the immediate value to insert in OP[0]. */
2394 static void
2395 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2397 int lsb, len;
2399 /* Find the least significant set bit in IMM. */
2400 for (lsb = 0; lsb < 32; lsb++)
2402 if ((imm & 1) != 0)
2403 break;
2404 imm >>= 1;
2407 /* Choose variants based on *sign* of the 5-bit field. */
2408 if ((imm & 0x10) == 0)
2409 len = (lsb <= 28) ? 4 : 32 - lsb;
2410 else
2412 /* Find the width of the bitstring in IMM. */
2413 for (len = 5; len < 32 - lsb; len++)
2415 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2416 break;
2419 /* Sign extend IMM as a 5-bit value. */
2420 imm = (imm & 0xf) - 0x10;
2423 op[0] = imm;
2424 op[1] = 31 - lsb;
2425 op[2] = len;
2428 /* Compute position (in OP[1]) and width (in OP[2])
2429 useful for copying IMM to a register using the depdi,z
2430 instructions. Store the immediate value to insert in OP[0]. */
2432 static void
2433 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2435 int lsb, len, maxlen;
2437 maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64);
2439 /* Find the least significant set bit in IMM. */
2440 for (lsb = 0; lsb < maxlen; lsb++)
2442 if ((imm & 1) != 0)
2443 break;
2444 imm >>= 1;
2447 /* Choose variants based on *sign* of the 5-bit field. */
2448 if ((imm & 0x10) == 0)
2449 len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb;
2450 else
2452 /* Find the width of the bitstring in IMM. */
2453 for (len = 5; len < maxlen - lsb; len++)
2455 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2456 break;
2459 /* Extend length if host is narrow and IMM is negative. */
2460 if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb)
2461 len += 32;
2463 /* Sign extend IMM as a 5-bit value. */
2464 imm = (imm & 0xf) - 0x10;
2467 op[0] = imm;
2468 op[1] = 63 - lsb;
2469 op[2] = len;
2472 /* Output assembler code to perform a doubleword move insn
2473 with operands OPERANDS. */
2475 const char *
2476 pa_output_move_double (rtx *operands)
2478 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2479 rtx latehalf[2];
2480 rtx addreg0 = 0, addreg1 = 0;
2481 int highonly = 0;
2483 /* First classify both operands. */
2485 if (REG_P (operands[0]))
2486 optype0 = REGOP;
2487 else if (offsettable_memref_p (operands[0]))
2488 optype0 = OFFSOP;
2489 else if (GET_CODE (operands[0]) == MEM)
2490 optype0 = MEMOP;
2491 else
2492 optype0 = RNDOP;
2494 if (REG_P (operands[1]))
2495 optype1 = REGOP;
2496 else if (CONSTANT_P (operands[1]))
2497 optype1 = CNSTOP;
2498 else if (offsettable_memref_p (operands[1]))
2499 optype1 = OFFSOP;
2500 else if (GET_CODE (operands[1]) == MEM)
2501 optype1 = MEMOP;
2502 else
2503 optype1 = RNDOP;
2505 /* Check for the cases that the operand constraints are not
2506 supposed to allow to happen. */
2507 gcc_assert (optype0 == REGOP || optype1 == REGOP);
2509 /* Handle copies between general and floating registers. */
2511 if (optype0 == REGOP && optype1 == REGOP
2512 && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2514 if (FP_REG_P (operands[0]))
2516 output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2517 output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2518 return "{fldds|fldd} -16(%%sp),%0";
2520 else
2522 output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2523 output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2524 return "{ldws|ldw} -12(%%sp),%R0";
2528 /* Handle auto decrementing and incrementing loads and stores
2529 specifically, since the structure of the function doesn't work
2530 for them without major modification. Do it better when we learn
2531 this port about the general inc/dec addressing of PA.
2532 (This was written by tege. Chide him if it doesn't work.) */
2534 if (optype0 == MEMOP)
2536 /* We have to output the address syntax ourselves, since print_operand
2537 doesn't deal with the addresses we want to use. Fix this later. */
2539 rtx addr = XEXP (operands[0], 0);
2540 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2542 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2544 operands[0] = XEXP (addr, 0);
2545 gcc_assert (GET_CODE (operands[1]) == REG
2546 && GET_CODE (operands[0]) == REG);
2548 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2550 /* No overlap between high target register and address
2551 register. (We do this in a non-obvious way to
2552 save a register file writeback) */
2553 if (GET_CODE (addr) == POST_INC)
2554 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2555 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2557 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2559 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2561 operands[0] = XEXP (addr, 0);
2562 gcc_assert (GET_CODE (operands[1]) == REG
2563 && GET_CODE (operands[0]) == REG);
2565 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2566 /* No overlap between high target register and address
2567 register. (We do this in a non-obvious way to save a
2568 register file writeback) */
2569 if (GET_CODE (addr) == PRE_INC)
2570 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2571 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2574 if (optype1 == MEMOP)
2576 /* We have to output the address syntax ourselves, since print_operand
2577 doesn't deal with the addresses we want to use. Fix this later. */
2579 rtx addr = XEXP (operands[1], 0);
2580 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2582 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2584 operands[1] = XEXP (addr, 0);
2585 gcc_assert (GET_CODE (operands[0]) == REG
2586 && GET_CODE (operands[1]) == REG);
2588 if (!reg_overlap_mentioned_p (high_reg, addr))
2590 /* No overlap between high target register and address
2591 register. (We do this in a non-obvious way to
2592 save a register file writeback) */
2593 if (GET_CODE (addr) == POST_INC)
2594 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2595 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2597 else
2599 /* This is an undefined situation. We should load into the
2600 address register *and* update that register. Probably
2601 we don't need to handle this at all. */
2602 if (GET_CODE (addr) == POST_INC)
2603 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2604 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2607 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2609 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2611 operands[1] = XEXP (addr, 0);
2612 gcc_assert (GET_CODE (operands[0]) == REG
2613 && GET_CODE (operands[1]) == REG);
2615 if (!reg_overlap_mentioned_p (high_reg, addr))
2617 /* No overlap between high target register and address
2618 register. (We do this in a non-obvious way to
2619 save a register file writeback) */
2620 if (GET_CODE (addr) == PRE_INC)
2621 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2622 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2624 else
2626 /* This is an undefined situation. We should load into the
2627 address register *and* update that register. Probably
2628 we don't need to handle this at all. */
2629 if (GET_CODE (addr) == PRE_INC)
2630 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2631 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2634 else if (GET_CODE (addr) == PLUS
2635 && GET_CODE (XEXP (addr, 0)) == MULT)
2637 rtx xoperands[4];
2639 /* Load address into left half of destination register. */
2640 xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2641 xoperands[1] = XEXP (addr, 1);
2642 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2643 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2644 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2645 xoperands);
2646 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2648 else if (GET_CODE (addr) == PLUS
2649 && REG_P (XEXP (addr, 0))
2650 && REG_P (XEXP (addr, 1)))
2652 rtx xoperands[3];
2654 /* Load address into left half of destination register. */
2655 xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2656 xoperands[1] = XEXP (addr, 0);
2657 xoperands[2] = XEXP (addr, 1);
2658 output_asm_insn ("{addl|add,l} %1,%2,%0",
2659 xoperands);
2660 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2664 /* If an operand is an unoffsettable memory ref, find a register
2665 we can increment temporarily to make it refer to the second word. */
2667 if (optype0 == MEMOP)
2668 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2670 if (optype1 == MEMOP)
2671 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2673 /* Ok, we can do one word at a time.
2674 Normally we do the low-numbered word first.
2676 In either case, set up in LATEHALF the operands to use
2677 for the high-numbered word and in some cases alter the
2678 operands in OPERANDS to be suitable for the low-numbered word. */
2680 if (optype0 == REGOP)
2681 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2682 else if (optype0 == OFFSOP)
2683 latehalf[0] = adjust_address_nv (operands[0], SImode, 4);
2684 else
2685 latehalf[0] = operands[0];
2687 if (optype1 == REGOP)
2688 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2689 else if (optype1 == OFFSOP)
2690 latehalf[1] = adjust_address_nv (operands[1], SImode, 4);
2691 else if (optype1 == CNSTOP)
2693 if (GET_CODE (operands[1]) == HIGH)
2695 operands[1] = XEXP (operands[1], 0);
2696 highonly = 1;
2698 split_double (operands[1], &operands[1], &latehalf[1]);
2700 else
2701 latehalf[1] = operands[1];
2703 /* If the first move would clobber the source of the second one,
2704 do them in the other order.
2706 This can happen in two cases:
2708 mem -> register where the first half of the destination register
2709 is the same register used in the memory's address. Reload
2710 can create such insns.
2712 mem in this case will be either register indirect or register
2713 indirect plus a valid offset.
2715 register -> register move where REGNO(dst) == REGNO(src + 1)
2716 someone (Tim/Tege?) claimed this can happen for parameter loads.
2718 Handle mem -> register case first. */
2719 if (optype0 == REGOP
2720 && (optype1 == MEMOP || optype1 == OFFSOP)
2721 && refers_to_regno_p (REGNO (operands[0]), operands[1]))
2723 /* Do the late half first. */
2724 if (addreg1)
2725 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2726 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2728 /* Then clobber. */
2729 if (addreg1)
2730 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2731 return pa_singlemove_string (operands);
2734 /* Now handle register -> register case. */
2735 if (optype0 == REGOP && optype1 == REGOP
2736 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2738 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2739 return pa_singlemove_string (operands);
2742 /* Normal case: do the two words, low-numbered first. */
2744 output_asm_insn (pa_singlemove_string (operands), operands);
2746 /* Make any unoffsettable addresses point at high-numbered word. */
2747 if (addreg0)
2748 output_asm_insn ("ldo 4(%0),%0", &addreg0);
2749 if (addreg1)
2750 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2752 /* Do high-numbered word. */
2753 if (highonly)
2754 output_asm_insn ("ldil L'%1,%0", latehalf);
2755 else
2756 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2758 /* Undo the adds we just did. */
2759 if (addreg0)
2760 output_asm_insn ("ldo -4(%0),%0", &addreg0);
2761 if (addreg1)
2762 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2764 return "";
2767 const char *
2768 pa_output_fp_move_double (rtx *operands)
2770 if (FP_REG_P (operands[0]))
2772 if (FP_REG_P (operands[1])
2773 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2774 output_asm_insn ("fcpy,dbl %f1,%0", operands);
2775 else
2776 output_asm_insn ("fldd%F1 %1,%0", operands);
2778 else if (FP_REG_P (operands[1]))
2780 output_asm_insn ("fstd%F0 %1,%0", operands);
2782 else
2784 rtx xoperands[2];
2786 gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2788 /* This is a pain. You have to be prepared to deal with an
2789 arbitrary address here including pre/post increment/decrement.
2791 so avoid this in the MD. */
2792 gcc_assert (GET_CODE (operands[0]) == REG);
2794 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2795 xoperands[0] = operands[0];
2796 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2798 return "";
2801 /* Return a REG that occurs in ADDR with coefficient 1.
2802 ADDR can be effectively incremented by incrementing REG. */
2804 static rtx
2805 find_addr_reg (rtx addr)
2807 while (GET_CODE (addr) == PLUS)
2809 if (GET_CODE (XEXP (addr, 0)) == REG)
2810 addr = XEXP (addr, 0);
2811 else if (GET_CODE (XEXP (addr, 1)) == REG)
2812 addr = XEXP (addr, 1);
2813 else if (CONSTANT_P (XEXP (addr, 0)))
2814 addr = XEXP (addr, 1);
2815 else if (CONSTANT_P (XEXP (addr, 1)))
2816 addr = XEXP (addr, 0);
2817 else
2818 gcc_unreachable ();
2820 gcc_assert (GET_CODE (addr) == REG);
2821 return addr;
2824 /* Emit code to perform a block move.
2826 OPERANDS[0] is the destination pointer as a REG, clobbered.
2827 OPERANDS[1] is the source pointer as a REG, clobbered.
2828 OPERANDS[2] is a register for temporary storage.
2829 OPERANDS[3] is a register for temporary storage.
2830 OPERANDS[4] is the size as a CONST_INT
2831 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2832 OPERANDS[6] is another temporary register. */
2834 const char *
2835 pa_output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2837 int align = INTVAL (operands[5]);
2838 unsigned long n_bytes = INTVAL (operands[4]);
2840 /* We can't move more than a word at a time because the PA
2841 has no longer integer move insns. (Could use fp mem ops?) */
2842 if (align > (TARGET_64BIT ? 8 : 4))
2843 align = (TARGET_64BIT ? 8 : 4);
2845 /* Note that we know each loop below will execute at least twice
2846 (else we would have open-coded the copy). */
2847 switch (align)
2849 case 8:
2850 /* Pre-adjust the loop counter. */
2851 operands[4] = GEN_INT (n_bytes - 16);
2852 output_asm_insn ("ldi %4,%2", operands);
2854 /* Copying loop. */
2855 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2856 output_asm_insn ("ldd,ma 8(%1),%6", operands);
2857 output_asm_insn ("std,ma %3,8(%0)", operands);
2858 output_asm_insn ("addib,>= -16,%2,.-12", operands);
2859 output_asm_insn ("std,ma %6,8(%0)", operands);
2861 /* Handle the residual. There could be up to 7 bytes of
2862 residual to copy! */
2863 if (n_bytes % 16 != 0)
2865 operands[4] = GEN_INT (n_bytes % 8);
2866 if (n_bytes % 16 >= 8)
2867 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2868 if (n_bytes % 8 != 0)
2869 output_asm_insn ("ldd 0(%1),%6", operands);
2870 if (n_bytes % 16 >= 8)
2871 output_asm_insn ("std,ma %3,8(%0)", operands);
2872 if (n_bytes % 8 != 0)
2873 output_asm_insn ("stdby,e %6,%4(%0)", operands);
2875 return "";
2877 case 4:
2878 /* Pre-adjust the loop counter. */
2879 operands[4] = GEN_INT (n_bytes - 8);
2880 output_asm_insn ("ldi %4,%2", operands);
2882 /* Copying loop. */
2883 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2884 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2885 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2886 output_asm_insn ("addib,>= -8,%2,.-12", operands);
2887 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2889 /* Handle the residual. There could be up to 7 bytes of
2890 residual to copy! */
2891 if (n_bytes % 8 != 0)
2893 operands[4] = GEN_INT (n_bytes % 4);
2894 if (n_bytes % 8 >= 4)
2895 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2896 if (n_bytes % 4 != 0)
2897 output_asm_insn ("ldw 0(%1),%6", operands);
2898 if (n_bytes % 8 >= 4)
2899 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2900 if (n_bytes % 4 != 0)
2901 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2903 return "";
2905 case 2:
2906 /* Pre-adjust the loop counter. */
2907 operands[4] = GEN_INT (n_bytes - 4);
2908 output_asm_insn ("ldi %4,%2", operands);
2910 /* Copying loop. */
2911 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2912 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2913 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2914 output_asm_insn ("addib,>= -4,%2,.-12", operands);
2915 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2917 /* Handle the residual. */
2918 if (n_bytes % 4 != 0)
2920 if (n_bytes % 4 >= 2)
2921 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2922 if (n_bytes % 2 != 0)
2923 output_asm_insn ("ldb 0(%1),%6", operands);
2924 if (n_bytes % 4 >= 2)
2925 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2926 if (n_bytes % 2 != 0)
2927 output_asm_insn ("stb %6,0(%0)", operands);
2929 return "";
2931 case 1:
2932 /* Pre-adjust the loop counter. */
2933 operands[4] = GEN_INT (n_bytes - 2);
2934 output_asm_insn ("ldi %4,%2", operands);
2936 /* Copying loop. */
2937 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2938 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2939 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2940 output_asm_insn ("addib,>= -2,%2,.-12", operands);
2941 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2943 /* Handle the residual. */
2944 if (n_bytes % 2 != 0)
2946 output_asm_insn ("ldb 0(%1),%3", operands);
2947 output_asm_insn ("stb %3,0(%0)", operands);
2949 return "";
2951 default:
2952 gcc_unreachable ();
2956 /* Count the number of insns necessary to handle this block move.
2958 Basic structure is the same as emit_block_move, except that we
2959 count insns rather than emit them. */
2961 static int
2962 compute_movmem_length (rtx_insn *insn)
2964 rtx pat = PATTERN (insn);
2965 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2966 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2967 unsigned int n_insns = 0;
2969 /* We can't move more than four bytes at a time because the PA
2970 has no longer integer move insns. (Could use fp mem ops?) */
2971 if (align > (TARGET_64BIT ? 8 : 4))
2972 align = (TARGET_64BIT ? 8 : 4);
2974 /* The basic copying loop. */
2975 n_insns = 6;
2977 /* Residuals. */
2978 if (n_bytes % (2 * align) != 0)
2980 if ((n_bytes % (2 * align)) >= align)
2981 n_insns += 2;
2983 if ((n_bytes % align) != 0)
2984 n_insns += 2;
2987 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2988 return n_insns * 4;
2991 /* Emit code to perform a block clear.
2993 OPERANDS[0] is the destination pointer as a REG, clobbered.
2994 OPERANDS[1] is a register for temporary storage.
2995 OPERANDS[2] is the size as a CONST_INT
2996 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
2998 const char *
2999 pa_output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
3001 int align = INTVAL (operands[3]);
3002 unsigned long n_bytes = INTVAL (operands[2]);
3004 /* We can't clear more than a word at a time because the PA
3005 has no longer integer move insns. */
3006 if (align > (TARGET_64BIT ? 8 : 4))
3007 align = (TARGET_64BIT ? 8 : 4);
3009 /* Note that we know each loop below will execute at least twice
3010 (else we would have open-coded the copy). */
3011 switch (align)
3013 case 8:
3014 /* Pre-adjust the loop counter. */
3015 operands[2] = GEN_INT (n_bytes - 16);
3016 output_asm_insn ("ldi %2,%1", operands);
3018 /* Loop. */
3019 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3020 output_asm_insn ("addib,>= -16,%1,.-4", operands);
3021 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3023 /* Handle the residual. There could be up to 7 bytes of
3024 residual to copy! */
3025 if (n_bytes % 16 != 0)
3027 operands[2] = GEN_INT (n_bytes % 8);
3028 if (n_bytes % 16 >= 8)
3029 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3030 if (n_bytes % 8 != 0)
3031 output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
3033 return "";
3035 case 4:
3036 /* Pre-adjust the loop counter. */
3037 operands[2] = GEN_INT (n_bytes - 8);
3038 output_asm_insn ("ldi %2,%1", operands);
3040 /* Loop. */
3041 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3042 output_asm_insn ("addib,>= -8,%1,.-4", operands);
3043 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3045 /* Handle the residual. There could be up to 7 bytes of
3046 residual to copy! */
3047 if (n_bytes % 8 != 0)
3049 operands[2] = GEN_INT (n_bytes % 4);
3050 if (n_bytes % 8 >= 4)
3051 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3052 if (n_bytes % 4 != 0)
3053 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
3055 return "";
3057 case 2:
3058 /* Pre-adjust the loop counter. */
3059 operands[2] = GEN_INT (n_bytes - 4);
3060 output_asm_insn ("ldi %2,%1", operands);
3062 /* Loop. */
3063 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3064 output_asm_insn ("addib,>= -4,%1,.-4", operands);
3065 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3067 /* Handle the residual. */
3068 if (n_bytes % 4 != 0)
3070 if (n_bytes % 4 >= 2)
3071 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3072 if (n_bytes % 2 != 0)
3073 output_asm_insn ("stb %%r0,0(%0)", operands);
3075 return "";
3077 case 1:
3078 /* Pre-adjust the loop counter. */
3079 operands[2] = GEN_INT (n_bytes - 2);
3080 output_asm_insn ("ldi %2,%1", operands);
3082 /* Loop. */
3083 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3084 output_asm_insn ("addib,>= -2,%1,.-4", operands);
3085 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3087 /* Handle the residual. */
3088 if (n_bytes % 2 != 0)
3089 output_asm_insn ("stb %%r0,0(%0)", operands);
3091 return "";
3093 default:
3094 gcc_unreachable ();
3098 /* Count the number of insns necessary to handle this block move.
3100 Basic structure is the same as emit_block_move, except that we
3101 count insns rather than emit them. */
3103 static int
3104 compute_clrmem_length (rtx_insn *insn)
3106 rtx pat = PATTERN (insn);
3107 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
3108 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
3109 unsigned int n_insns = 0;
3111 /* We can't clear more than a word at a time because the PA
3112 has no longer integer move insns. */
3113 if (align > (TARGET_64BIT ? 8 : 4))
3114 align = (TARGET_64BIT ? 8 : 4);
3116 /* The basic loop. */
3117 n_insns = 4;
3119 /* Residuals. */
3120 if (n_bytes % (2 * align) != 0)
3122 if ((n_bytes % (2 * align)) >= align)
3123 n_insns++;
3125 if ((n_bytes % align) != 0)
3126 n_insns++;
3129 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
3130 return n_insns * 4;
3134 const char *
3135 pa_output_and (rtx *operands)
3137 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3139 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3140 int ls0, ls1, ms0, p, len;
3142 for (ls0 = 0; ls0 < 32; ls0++)
3143 if ((mask & (1 << ls0)) == 0)
3144 break;
3146 for (ls1 = ls0; ls1 < 32; ls1++)
3147 if ((mask & (1 << ls1)) != 0)
3148 break;
3150 for (ms0 = ls1; ms0 < 32; ms0++)
3151 if ((mask & (1 << ms0)) == 0)
3152 break;
3154 gcc_assert (ms0 == 32);
3156 if (ls1 == 32)
3158 len = ls0;
3160 gcc_assert (len);
3162 operands[2] = GEN_INT (len);
3163 return "{extru|extrw,u} %1,31,%2,%0";
3165 else
3167 /* We could use this `depi' for the case above as well, but `depi'
3168 requires one more register file access than an `extru'. */
3170 p = 31 - ls0;
3171 len = ls1 - ls0;
3173 operands[2] = GEN_INT (p);
3174 operands[3] = GEN_INT (len);
3175 return "{depi|depwi} 0,%2,%3,%0";
3178 else
3179 return "and %1,%2,%0";
3182 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3183 storing the result in operands[0]. */
3184 const char *
3185 pa_output_64bit_and (rtx *operands)
3187 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3189 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3190 int ls0, ls1, ms0, p, len;
3192 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
3193 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
3194 break;
3196 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
3197 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
3198 break;
3200 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
3201 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
3202 break;
3204 gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
3206 if (ls1 == HOST_BITS_PER_WIDE_INT)
3208 len = ls0;
3210 gcc_assert (len);
3212 operands[2] = GEN_INT (len);
3213 return "extrd,u %1,63,%2,%0";
3215 else
3217 /* We could use this `depi' for the case above as well, but `depi'
3218 requires one more register file access than an `extru'. */
3220 p = 63 - ls0;
3221 len = ls1 - ls0;
3223 operands[2] = GEN_INT (p);
3224 operands[3] = GEN_INT (len);
3225 return "depdi 0,%2,%3,%0";
3228 else
3229 return "and %1,%2,%0";
3232 const char *
3233 pa_output_ior (rtx *operands)
3235 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3236 int bs0, bs1, p, len;
3238 if (INTVAL (operands[2]) == 0)
3239 return "copy %1,%0";
3241 for (bs0 = 0; bs0 < 32; bs0++)
3242 if ((mask & (1 << bs0)) != 0)
3243 break;
3245 for (bs1 = bs0; bs1 < 32; bs1++)
3246 if ((mask & (1 << bs1)) == 0)
3247 break;
3249 gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3251 p = 31 - bs0;
3252 len = bs1 - bs0;
3254 operands[2] = GEN_INT (p);
3255 operands[3] = GEN_INT (len);
3256 return "{depi|depwi} -1,%2,%3,%0";
3259 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3260 storing the result in operands[0]. */
3261 const char *
3262 pa_output_64bit_ior (rtx *operands)
3264 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3265 int bs0, bs1, p, len;
3267 if (INTVAL (operands[2]) == 0)
3268 return "copy %1,%0";
3270 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
3271 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
3272 break;
3274 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
3275 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
3276 break;
3278 gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
3279 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3281 p = 63 - bs0;
3282 len = bs1 - bs0;
3284 operands[2] = GEN_INT (p);
3285 operands[3] = GEN_INT (len);
3286 return "depdi -1,%2,%3,%0";
3289 /* Target hook for assembling integer objects. This code handles
3290 aligned SI and DI integers specially since function references
3291 must be preceded by P%. */
3293 static bool
3294 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
3296 if (size == UNITS_PER_WORD
3297 && aligned_p
3298 && function_label_operand (x, VOIDmode))
3300 fputs (size == 8? "\t.dword\t" : "\t.word\t", asm_out_file);
3302 /* We don't want an OPD when generating fast indirect calls. */
3303 if (!TARGET_FAST_INDIRECT_CALLS)
3304 fputs ("P%", asm_out_file);
3306 output_addr_const (asm_out_file, x);
3307 fputc ('\n', asm_out_file);
3308 return true;
3310 return default_assemble_integer (x, size, aligned_p);
3313 /* Output an ascii string. */
3314 void
3315 pa_output_ascii (FILE *file, const char *p, int size)
3317 int i;
3318 int chars_output;
3319 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
3321 /* The HP assembler can only take strings of 256 characters at one
3322 time. This is a limitation on input line length, *not* the
3323 length of the string. Sigh. Even worse, it seems that the
3324 restriction is in number of input characters (see \xnn &
3325 \whatever). So we have to do this very carefully. */
3327 fputs ("\t.STRING \"", file);
3329 chars_output = 0;
3330 for (i = 0; i < size; i += 4)
3332 int co = 0;
3333 int io = 0;
3334 for (io = 0, co = 0; io < MIN (4, size - i); io++)
3336 register unsigned int c = (unsigned char) p[i + io];
3338 if (c == '\"' || c == '\\')
3339 partial_output[co++] = '\\';
3340 if (c >= ' ' && c < 0177)
3341 partial_output[co++] = c;
3342 else
3344 unsigned int hexd;
3345 partial_output[co++] = '\\';
3346 partial_output[co++] = 'x';
3347 hexd = c / 16 - 0 + '0';
3348 if (hexd > '9')
3349 hexd -= '9' - 'a' + 1;
3350 partial_output[co++] = hexd;
3351 hexd = c % 16 - 0 + '0';
3352 if (hexd > '9')
3353 hexd -= '9' - 'a' + 1;
3354 partial_output[co++] = hexd;
3357 if (chars_output + co > 243)
3359 fputs ("\"\n\t.STRING \"", file);
3360 chars_output = 0;
3362 fwrite (partial_output, 1, (size_t) co, file);
3363 chars_output += co;
3364 co = 0;
3366 fputs ("\"\n", file);
3369 /* Try to rewrite floating point comparisons & branches to avoid
3370 useless add,tr insns.
3372 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3373 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3374 first attempt to remove useless add,tr insns. It is zero
3375 for the second pass as reorg sometimes leaves bogus REG_DEAD
3376 notes lying around.
3378 When CHECK_NOTES is zero we can only eliminate add,tr insns
3379 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3380 instructions. */
3381 static void
3382 remove_useless_addtr_insns (int check_notes)
3384 rtx_insn *insn;
3385 static int pass = 0;
3387 /* This is fairly cheap, so always run it when optimizing. */
3388 if (optimize > 0)
3390 int fcmp_count = 0;
3391 int fbranch_count = 0;
3393 /* Walk all the insns in this function looking for fcmp & fbranch
3394 instructions. Keep track of how many of each we find. */
3395 for (insn = get_insns (); insn; insn = next_insn (insn))
3397 rtx tmp;
3399 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
3400 if (! NONJUMP_INSN_P (insn) && ! JUMP_P (insn))
3401 continue;
3403 tmp = PATTERN (insn);
3405 /* It must be a set. */
3406 if (GET_CODE (tmp) != SET)
3407 continue;
3409 /* If the destination is CCFP, then we've found an fcmp insn. */
3410 tmp = SET_DEST (tmp);
3411 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3413 fcmp_count++;
3414 continue;
3417 tmp = PATTERN (insn);
3418 /* If this is an fbranch instruction, bump the fbranch counter. */
3419 if (GET_CODE (tmp) == SET
3420 && SET_DEST (tmp) == pc_rtx
3421 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3422 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3423 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3424 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3426 fbranch_count++;
3427 continue;
3432 /* Find all floating point compare + branch insns. If possible,
3433 reverse the comparison & the branch to avoid add,tr insns. */
3434 for (insn = get_insns (); insn; insn = next_insn (insn))
3436 rtx tmp;
3437 rtx_insn *next;
3439 /* Ignore anything that isn't an INSN. */
3440 if (! NONJUMP_INSN_P (insn))
3441 continue;
3443 tmp = PATTERN (insn);
3445 /* It must be a set. */
3446 if (GET_CODE (tmp) != SET)
3447 continue;
3449 /* The destination must be CCFP, which is register zero. */
3450 tmp = SET_DEST (tmp);
3451 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3452 continue;
3454 /* INSN should be a set of CCFP.
3456 See if the result of this insn is used in a reversed FP
3457 conditional branch. If so, reverse our condition and
3458 the branch. Doing so avoids useless add,tr insns. */
3459 next = next_insn (insn);
3460 while (next)
3462 /* Jumps, calls and labels stop our search. */
3463 if (JUMP_P (next) || CALL_P (next) || LABEL_P (next))
3464 break;
3466 /* As does another fcmp insn. */
3467 if (NONJUMP_INSN_P (next)
3468 && GET_CODE (PATTERN (next)) == SET
3469 && GET_CODE (SET_DEST (PATTERN (next))) == REG
3470 && REGNO (SET_DEST (PATTERN (next))) == 0)
3471 break;
3473 next = next_insn (next);
3476 /* Is NEXT_INSN a branch? */
3477 if (next && JUMP_P (next))
3479 rtx pattern = PATTERN (next);
3481 /* If it a reversed fp conditional branch (e.g. uses add,tr)
3482 and CCFP dies, then reverse our conditional and the branch
3483 to avoid the add,tr. */
3484 if (GET_CODE (pattern) == SET
3485 && SET_DEST (pattern) == pc_rtx
3486 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3487 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3488 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3489 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3490 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3491 && (fcmp_count == fbranch_count
3492 || (check_notes
3493 && find_regno_note (next, REG_DEAD, 0))))
3495 /* Reverse the branch. */
3496 tmp = XEXP (SET_SRC (pattern), 1);
3497 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3498 XEXP (SET_SRC (pattern), 2) = tmp;
3499 INSN_CODE (next) = -1;
3501 /* Reverse our condition. */
3502 tmp = PATTERN (insn);
3503 PUT_CODE (XEXP (tmp, 1),
3504 (reverse_condition_maybe_unordered
3505 (GET_CODE (XEXP (tmp, 1)))));
3511 pass = !pass;
3515 /* You may have trouble believing this, but this is the 32 bit HP-PA
3516 stack layout. Wow.
3518 Offset Contents
3520 Variable arguments (optional; any number may be allocated)
3522 SP-(4*(N+9)) arg word N
3524 SP-56 arg word 5
3525 SP-52 arg word 4
3527 Fixed arguments (must be allocated; may remain unused)
3529 SP-48 arg word 3
3530 SP-44 arg word 2
3531 SP-40 arg word 1
3532 SP-36 arg word 0
3534 Frame Marker
3536 SP-32 External Data Pointer (DP)
3537 SP-28 External sr4
3538 SP-24 External/stub RP (RP')
3539 SP-20 Current RP
3540 SP-16 Static Link
3541 SP-12 Clean up
3542 SP-8 Calling Stub RP (RP'')
3543 SP-4 Previous SP
3545 Top of Frame
3547 SP-0 Stack Pointer (points to next available address)
3551 /* This function saves registers as follows. Registers marked with ' are
3552 this function's registers (as opposed to the previous function's).
3553 If a frame_pointer isn't needed, r4 is saved as a general register;
3554 the space for the frame pointer is still allocated, though, to keep
3555 things simple.
3558 Top of Frame
3560 SP (FP') Previous FP
3561 SP + 4 Alignment filler (sigh)
3562 SP + 8 Space for locals reserved here.
3566 SP + n All call saved register used.
3570 SP + o All call saved fp registers used.
3574 SP + p (SP') points to next available address.
3578 /* Global variables set by output_function_prologue(). */
3579 /* Size of frame. Need to know this to emit return insns from
3580 leaf procedures. */
3581 static HOST_WIDE_INT actual_fsize, local_fsize;
3582 static int save_fregs;
3584 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3585 Handle case where DISP > 8k by using the add_high_const patterns.
3587 Note in DISP > 8k case, we will leave the high part of the address
3588 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3590 static void
3591 store_reg (int reg, HOST_WIDE_INT disp, int base)
3593 rtx dest, src, basereg;
3594 rtx_insn *insn;
3596 src = gen_rtx_REG (word_mode, reg);
3597 basereg = gen_rtx_REG (Pmode, base);
3598 if (VAL_14_BITS_P (disp))
3600 dest = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
3601 insn = emit_move_insn (dest, src);
3603 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3605 rtx delta = GEN_INT (disp);
3606 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3608 emit_move_insn (tmpreg, delta);
3609 insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3610 if (DO_FRAME_NOTES)
3612 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3613 gen_rtx_SET (tmpreg,
3614 gen_rtx_PLUS (Pmode, basereg, delta)));
3615 RTX_FRAME_RELATED_P (insn) = 1;
3617 dest = gen_rtx_MEM (word_mode, tmpreg);
3618 insn = emit_move_insn (dest, src);
3620 else
3622 rtx delta = GEN_INT (disp);
3623 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3624 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3626 emit_move_insn (tmpreg, high);
3627 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3628 insn = emit_move_insn (dest, src);
3629 if (DO_FRAME_NOTES)
3630 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3631 gen_rtx_SET (gen_rtx_MEM (word_mode,
3632 gen_rtx_PLUS (word_mode,
3633 basereg,
3634 delta)),
3635 src));
3638 if (DO_FRAME_NOTES)
3639 RTX_FRAME_RELATED_P (insn) = 1;
3642 /* Emit RTL to store REG at the memory location specified by BASE and then
3643 add MOD to BASE. MOD must be <= 8k. */
3645 static void
3646 store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3648 rtx basereg, srcreg, delta;
3649 rtx_insn *insn;
3651 gcc_assert (VAL_14_BITS_P (mod));
3653 basereg = gen_rtx_REG (Pmode, base);
3654 srcreg = gen_rtx_REG (word_mode, reg);
3655 delta = GEN_INT (mod);
3657 insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3658 if (DO_FRAME_NOTES)
3660 RTX_FRAME_RELATED_P (insn) = 1;
3662 /* RTX_FRAME_RELATED_P must be set on each frame related set
3663 in a parallel with more than one element. */
3664 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3665 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3669 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3670 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3671 whether to add a frame note or not.
3673 In the DISP > 8k case, we leave the high part of the address in %r1.
3674 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3676 static void
3677 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3679 rtx_insn *insn;
3681 if (VAL_14_BITS_P (disp))
3683 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3684 plus_constant (Pmode,
3685 gen_rtx_REG (Pmode, base), disp));
3687 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3689 rtx basereg = gen_rtx_REG (Pmode, base);
3690 rtx delta = GEN_INT (disp);
3691 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3693 emit_move_insn (tmpreg, delta);
3694 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3695 gen_rtx_PLUS (Pmode, tmpreg, basereg));
3696 if (DO_FRAME_NOTES)
3697 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3698 gen_rtx_SET (tmpreg,
3699 gen_rtx_PLUS (Pmode, basereg, delta)));
3701 else
3703 rtx basereg = gen_rtx_REG (Pmode, base);
3704 rtx delta = GEN_INT (disp);
3705 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3707 emit_move_insn (tmpreg,
3708 gen_rtx_PLUS (Pmode, basereg,
3709 gen_rtx_HIGH (Pmode, delta)));
3710 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3711 gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3714 if (DO_FRAME_NOTES && note)
3715 RTX_FRAME_RELATED_P (insn) = 1;
3718 HOST_WIDE_INT
3719 pa_compute_frame_size (HOST_WIDE_INT size, int *fregs_live)
3721 int freg_saved = 0;
3722 int i, j;
3724 /* The code in pa_expand_prologue and pa_expand_epilogue must
3725 be consistent with the rounding and size calculation done here.
3726 Change them at the same time. */
3728 /* We do our own stack alignment. First, round the size of the
3729 stack locals up to a word boundary. */
3730 size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3732 /* Space for previous frame pointer + filler. If any frame is
3733 allocated, we need to add in the STARTING_FRAME_OFFSET. We
3734 waste some space here for the sake of HP compatibility. The
3735 first slot is only used when the frame pointer is needed. */
3736 if (size || frame_pointer_needed)
3737 size += STARTING_FRAME_OFFSET;
3739 /* If the current function calls __builtin_eh_return, then we need
3740 to allocate stack space for registers that will hold data for
3741 the exception handler. */
3742 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3744 unsigned int i;
3746 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3747 continue;
3748 size += i * UNITS_PER_WORD;
3751 /* Account for space used by the callee general register saves. */
3752 for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3753 if (df_regs_ever_live_p (i))
3754 size += UNITS_PER_WORD;
3756 /* Account for space used by the callee floating point register saves. */
3757 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3758 if (df_regs_ever_live_p (i)
3759 || (!TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3761 freg_saved = 1;
3763 /* We always save both halves of the FP register, so always
3764 increment the frame size by 8 bytes. */
3765 size += 8;
3768 /* If any of the floating registers are saved, account for the
3769 alignment needed for the floating point register save block. */
3770 if (freg_saved)
3772 size = (size + 7) & ~7;
3773 if (fregs_live)
3774 *fregs_live = 1;
3777 /* The various ABIs include space for the outgoing parameters in the
3778 size of the current function's stack frame. We don't need to align
3779 for the outgoing arguments as their alignment is set by the final
3780 rounding for the frame as a whole. */
3781 size += crtl->outgoing_args_size;
3783 /* Allocate space for the fixed frame marker. This space must be
3784 allocated for any function that makes calls or allocates
3785 stack space. */
3786 if (!crtl->is_leaf || size)
3787 size += TARGET_64BIT ? 48 : 32;
3789 /* Finally, round to the preferred stack boundary. */
3790 return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3791 & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3794 /* Generate the assembly code for function entry. FILE is a stdio
3795 stream to output the code to. SIZE is an int: how many units of
3796 temporary storage to allocate.
3798 Refer to the array `regs_ever_live' to determine which registers to
3799 save; `regs_ever_live[I]' is nonzero if register number I is ever
3800 used in the function. This function is responsible for knowing
3801 which registers should not be saved even if used. */
3803 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3804 of memory. If any fpu reg is used in the function, we allocate
3805 such a block here, at the bottom of the frame, just in case it's needed.
3807 If this function is a leaf procedure, then we may choose not
3808 to do a "save" insn. The decision about whether or not
3809 to do this is made in regclass.c. */
3811 static void
3812 pa_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3814 /* The function's label and associated .PROC must never be
3815 separated and must be output *after* any profiling declarations
3816 to avoid changing spaces/subspaces within a procedure. */
3817 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3818 fputs ("\t.PROC\n", file);
3820 /* pa_expand_prologue does the dirty work now. We just need
3821 to output the assembler directives which denote the start
3822 of a function. */
3823 fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
3824 if (crtl->is_leaf)
3825 fputs (",NO_CALLS", file);
3826 else
3827 fputs (",CALLS", file);
3828 if (rp_saved)
3829 fputs (",SAVE_RP", file);
3831 /* The SAVE_SP flag is used to indicate that register %r3 is stored
3832 at the beginning of the frame and that it is used as the frame
3833 pointer for the frame. We do this because our current frame
3834 layout doesn't conform to that specified in the HP runtime
3835 documentation and we need a way to indicate to programs such as
3836 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
3837 isn't used by HP compilers but is supported by the assembler.
3838 However, SAVE_SP is supposed to indicate that the previous stack
3839 pointer has been saved in the frame marker. */
3840 if (frame_pointer_needed)
3841 fputs (",SAVE_SP", file);
3843 /* Pass on information about the number of callee register saves
3844 performed in the prologue.
3846 The compiler is supposed to pass the highest register number
3847 saved, the assembler then has to adjust that number before
3848 entering it into the unwind descriptor (to account for any
3849 caller saved registers with lower register numbers than the
3850 first callee saved register). */
3851 if (gr_saved)
3852 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3854 if (fr_saved)
3855 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3857 fputs ("\n\t.ENTRY\n", file);
3859 remove_useless_addtr_insns (0);
3862 void
3863 pa_expand_prologue (void)
3865 int merge_sp_adjust_with_store = 0;
3866 HOST_WIDE_INT size = get_frame_size ();
3867 HOST_WIDE_INT offset;
3868 int i;
3869 rtx tmpreg;
3870 rtx_insn *insn;
3872 gr_saved = 0;
3873 fr_saved = 0;
3874 save_fregs = 0;
3876 /* Compute total size for frame pointer, filler, locals and rounding to
3877 the next word boundary. Similar code appears in pa_compute_frame_size
3878 and must be changed in tandem with this code. */
3879 local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3880 if (local_fsize || frame_pointer_needed)
3881 local_fsize += STARTING_FRAME_OFFSET;
3883 actual_fsize = pa_compute_frame_size (size, &save_fregs);
3884 if (flag_stack_usage_info)
3885 current_function_static_stack_size = actual_fsize;
3887 /* Compute a few things we will use often. */
3888 tmpreg = gen_rtx_REG (word_mode, 1);
3890 /* Save RP first. The calling conventions manual states RP will
3891 always be stored into the caller's frame at sp - 20 or sp - 16
3892 depending on which ABI is in use. */
3893 if (df_regs_ever_live_p (2) || crtl->calls_eh_return)
3895 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3896 rp_saved = true;
3898 else
3899 rp_saved = false;
3901 /* Allocate the local frame and set up the frame pointer if needed. */
3902 if (actual_fsize != 0)
3904 if (frame_pointer_needed)
3906 /* Copy the old frame pointer temporarily into %r1. Set up the
3907 new stack pointer, then store away the saved old frame pointer
3908 into the stack at sp and at the same time update the stack
3909 pointer by actual_fsize bytes. Two versions, first
3910 handles small (<8k) frames. The second handles large (>=8k)
3911 frames. */
3912 insn = emit_move_insn (tmpreg, hard_frame_pointer_rtx);
3913 if (DO_FRAME_NOTES)
3914 RTX_FRAME_RELATED_P (insn) = 1;
3916 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3917 if (DO_FRAME_NOTES)
3918 RTX_FRAME_RELATED_P (insn) = 1;
3920 if (VAL_14_BITS_P (actual_fsize))
3921 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
3922 else
3924 /* It is incorrect to store the saved frame pointer at *sp,
3925 then increment sp (writes beyond the current stack boundary).
3927 So instead use stwm to store at *sp and post-increment the
3928 stack pointer as an atomic operation. Then increment sp to
3929 finish allocating the new frame. */
3930 HOST_WIDE_INT adjust1 = 8192 - 64;
3931 HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
3933 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
3934 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3935 adjust2, 1);
3938 /* We set SAVE_SP in frames that need a frame pointer. Thus,
3939 we need to store the previous stack pointer (frame pointer)
3940 into the frame marker on targets that use the HP unwind
3941 library. This allows the HP unwind library to be used to
3942 unwind GCC frames. However, we are not fully compatible
3943 with the HP library because our frame layout differs from
3944 that specified in the HP runtime specification.
3946 We don't want a frame note on this instruction as the frame
3947 marker moves during dynamic stack allocation.
3949 This instruction also serves as a blockage to prevent
3950 register spills from being scheduled before the stack
3951 pointer is raised. This is necessary as we store
3952 registers using the frame pointer as a base register,
3953 and the frame pointer is set before sp is raised. */
3954 if (TARGET_HPUX_UNWIND_LIBRARY)
3956 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
3957 GEN_INT (TARGET_64BIT ? -8 : -4));
3959 emit_move_insn (gen_rtx_MEM (word_mode, addr),
3960 hard_frame_pointer_rtx);
3962 else
3963 emit_insn (gen_blockage ());
3965 /* no frame pointer needed. */
3966 else
3968 /* In some cases we can perform the first callee register save
3969 and allocating the stack frame at the same time. If so, just
3970 make a note of it and defer allocating the frame until saving
3971 the callee registers. */
3972 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
3973 merge_sp_adjust_with_store = 1;
3974 /* Can not optimize. Adjust the stack frame by actual_fsize
3975 bytes. */
3976 else
3977 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3978 actual_fsize, 1);
3982 /* Normal register save.
3984 Do not save the frame pointer in the frame_pointer_needed case. It
3985 was done earlier. */
3986 if (frame_pointer_needed)
3988 offset = local_fsize;
3990 /* Saving the EH return data registers in the frame is the simplest
3991 way to get the frame unwind information emitted. We put them
3992 just before the general registers. */
3993 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3995 unsigned int i, regno;
3997 for (i = 0; ; ++i)
3999 regno = EH_RETURN_DATA_REGNO (i);
4000 if (regno == INVALID_REGNUM)
4001 break;
4003 store_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4004 offset += UNITS_PER_WORD;
4008 for (i = 18; i >= 4; i--)
4009 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4011 store_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4012 offset += UNITS_PER_WORD;
4013 gr_saved++;
4015 /* Account for %r3 which is saved in a special place. */
4016 gr_saved++;
4018 /* No frame pointer needed. */
4019 else
4021 offset = local_fsize - actual_fsize;
4023 /* Saving the EH return data registers in the frame is the simplest
4024 way to get the frame unwind information emitted. */
4025 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4027 unsigned int i, regno;
4029 for (i = 0; ; ++i)
4031 regno = EH_RETURN_DATA_REGNO (i);
4032 if (regno == INVALID_REGNUM)
4033 break;
4035 /* If merge_sp_adjust_with_store is nonzero, then we can
4036 optimize the first save. */
4037 if (merge_sp_adjust_with_store)
4039 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
4040 merge_sp_adjust_with_store = 0;
4042 else
4043 store_reg (regno, offset, STACK_POINTER_REGNUM);
4044 offset += UNITS_PER_WORD;
4048 for (i = 18; i >= 3; i--)
4049 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4051 /* If merge_sp_adjust_with_store is nonzero, then we can
4052 optimize the first GR save. */
4053 if (merge_sp_adjust_with_store)
4055 store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
4056 merge_sp_adjust_with_store = 0;
4058 else
4059 store_reg (i, offset, STACK_POINTER_REGNUM);
4060 offset += UNITS_PER_WORD;
4061 gr_saved++;
4064 /* If we wanted to merge the SP adjustment with a GR save, but we never
4065 did any GR saves, then just emit the adjustment here. */
4066 if (merge_sp_adjust_with_store)
4067 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4068 actual_fsize, 1);
4071 /* The hppa calling conventions say that %r19, the pic offset
4072 register, is saved at sp - 32 (in this function's frame)
4073 when generating PIC code. FIXME: What is the correct thing
4074 to do for functions which make no calls and allocate no
4075 frame? Do we need to allocate a frame, or can we just omit
4076 the save? For now we'll just omit the save.
4078 We don't want a note on this insn as the frame marker can
4079 move if there is a dynamic stack allocation. */
4080 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
4082 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
4084 emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
4088 /* Align pointer properly (doubleword boundary). */
4089 offset = (offset + 7) & ~7;
4091 /* Floating point register store. */
4092 if (save_fregs)
4094 rtx base;
4096 /* First get the frame or stack pointer to the start of the FP register
4097 save area. */
4098 if (frame_pointer_needed)
4100 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4101 base = hard_frame_pointer_rtx;
4103 else
4105 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4106 base = stack_pointer_rtx;
4109 /* Now actually save the FP registers. */
4110 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4112 if (df_regs_ever_live_p (i)
4113 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4115 rtx addr, reg;
4116 rtx_insn *insn;
4117 addr = gen_rtx_MEM (DFmode,
4118 gen_rtx_POST_INC (word_mode, tmpreg));
4119 reg = gen_rtx_REG (DFmode, i);
4120 insn = emit_move_insn (addr, reg);
4121 if (DO_FRAME_NOTES)
4123 RTX_FRAME_RELATED_P (insn) = 1;
4124 if (TARGET_64BIT)
4126 rtx mem = gen_rtx_MEM (DFmode,
4127 plus_constant (Pmode, base,
4128 offset));
4129 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4130 gen_rtx_SET (mem, reg));
4132 else
4134 rtx meml = gen_rtx_MEM (SFmode,
4135 plus_constant (Pmode, base,
4136 offset));
4137 rtx memr = gen_rtx_MEM (SFmode,
4138 plus_constant (Pmode, base,
4139 offset + 4));
4140 rtx regl = gen_rtx_REG (SFmode, i);
4141 rtx regr = gen_rtx_REG (SFmode, i + 1);
4142 rtx setl = gen_rtx_SET (meml, regl);
4143 rtx setr = gen_rtx_SET (memr, regr);
4144 rtvec vec;
4146 RTX_FRAME_RELATED_P (setl) = 1;
4147 RTX_FRAME_RELATED_P (setr) = 1;
4148 vec = gen_rtvec (2, setl, setr);
4149 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4150 gen_rtx_SEQUENCE (VOIDmode, vec));
4153 offset += GET_MODE_SIZE (DFmode);
4154 fr_saved++;
4160 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
4161 Handle case where DISP > 8k by using the add_high_const patterns. */
4163 static void
4164 load_reg (int reg, HOST_WIDE_INT disp, int base)
4166 rtx dest = gen_rtx_REG (word_mode, reg);
4167 rtx basereg = gen_rtx_REG (Pmode, base);
4168 rtx src;
4170 if (VAL_14_BITS_P (disp))
4171 src = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
4172 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
4174 rtx delta = GEN_INT (disp);
4175 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4177 emit_move_insn (tmpreg, delta);
4178 if (TARGET_DISABLE_INDEXING)
4180 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4181 src = gen_rtx_MEM (word_mode, tmpreg);
4183 else
4184 src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4186 else
4188 rtx delta = GEN_INT (disp);
4189 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
4190 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4192 emit_move_insn (tmpreg, high);
4193 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
4196 emit_move_insn (dest, src);
4199 /* Update the total code bytes output to the text section. */
4201 static void
4202 update_total_code_bytes (unsigned int nbytes)
4204 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
4205 && !IN_NAMED_SECTION_P (cfun->decl))
4207 unsigned int old_total = total_code_bytes;
4209 total_code_bytes += nbytes;
4211 /* Be prepared to handle overflows. */
4212 if (old_total > total_code_bytes)
4213 total_code_bytes = UINT_MAX;
4217 /* This function generates the assembly code for function exit.
4218 Args are as for output_function_prologue ().
4220 The function epilogue should not depend on the current stack
4221 pointer! It should use the frame pointer only. This is mandatory
4222 because of alloca; we also take advantage of it to omit stack
4223 adjustments before returning. */
4225 static void
4226 pa_output_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4228 rtx_insn *insn = get_last_insn ();
4229 bool extra_nop;
4231 /* pa_expand_epilogue does the dirty work now. We just need
4232 to output the assembler directives which denote the end
4233 of a function.
4235 To make debuggers happy, emit a nop if the epilogue was completely
4236 eliminated due to a volatile call as the last insn in the
4237 current function. That way the return address (in %r2) will
4238 always point to a valid instruction in the current function. */
4240 /* Get the last real insn. */
4241 if (NOTE_P (insn))
4242 insn = prev_real_insn (insn);
4244 /* If it is a sequence, then look inside. */
4245 if (insn && NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE)
4246 insn = as_a <rtx_sequence *> (PATTERN (insn))-> insn (0);
4248 /* If insn is a CALL_INSN, then it must be a call to a volatile
4249 function (otherwise there would be epilogue insns). */
4250 if (insn && CALL_P (insn))
4252 fputs ("\tnop\n", file);
4253 extra_nop = true;
4255 else
4256 extra_nop = false;
4258 fputs ("\t.EXIT\n\t.PROCEND\n", file);
4260 if (TARGET_SOM && TARGET_GAS)
4262 /* We are done with this subspace except possibly for some additional
4263 debug information. Forget that we are in this subspace to ensure
4264 that the next function is output in its own subspace. */
4265 in_section = NULL;
4266 cfun->machine->in_nsubspa = 2;
4269 /* Thunks do their own insn accounting. */
4270 if (cfun->is_thunk)
4271 return;
4273 if (INSN_ADDRESSES_SET_P ())
4275 last_address = extra_nop ? 4 : 0;
4276 insn = get_last_nonnote_insn ();
4277 if (insn)
4279 last_address += INSN_ADDRESSES (INSN_UID (insn));
4280 if (INSN_P (insn))
4281 last_address += insn_default_length (insn);
4283 last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4284 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
4286 else
4287 last_address = UINT_MAX;
4289 /* Finally, update the total number of code bytes output so far. */
4290 update_total_code_bytes (last_address);
4293 void
4294 pa_expand_epilogue (void)
4296 rtx tmpreg;
4297 HOST_WIDE_INT offset;
4298 HOST_WIDE_INT ret_off = 0;
4299 int i;
4300 int merge_sp_adjust_with_load = 0;
4302 /* We will use this often. */
4303 tmpreg = gen_rtx_REG (word_mode, 1);
4305 /* Try to restore RP early to avoid load/use interlocks when
4306 RP gets used in the return (bv) instruction. This appears to still
4307 be necessary even when we schedule the prologue and epilogue. */
4308 if (rp_saved)
4310 ret_off = TARGET_64BIT ? -16 : -20;
4311 if (frame_pointer_needed)
4313 load_reg (2, ret_off, HARD_FRAME_POINTER_REGNUM);
4314 ret_off = 0;
4316 else
4318 /* No frame pointer, and stack is smaller than 8k. */
4319 if (VAL_14_BITS_P (ret_off - actual_fsize))
4321 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4322 ret_off = 0;
4327 /* General register restores. */
4328 if (frame_pointer_needed)
4330 offset = local_fsize;
4332 /* If the current function calls __builtin_eh_return, then we need
4333 to restore the saved EH data registers. */
4334 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4336 unsigned int i, regno;
4338 for (i = 0; ; ++i)
4340 regno = EH_RETURN_DATA_REGNO (i);
4341 if (regno == INVALID_REGNUM)
4342 break;
4344 load_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4345 offset += UNITS_PER_WORD;
4349 for (i = 18; i >= 4; i--)
4350 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4352 load_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4353 offset += UNITS_PER_WORD;
4356 else
4358 offset = local_fsize - actual_fsize;
4360 /* If the current function calls __builtin_eh_return, then we need
4361 to restore the saved EH data registers. */
4362 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4364 unsigned int i, regno;
4366 for (i = 0; ; ++i)
4368 regno = EH_RETURN_DATA_REGNO (i);
4369 if (regno == INVALID_REGNUM)
4370 break;
4372 /* Only for the first load.
4373 merge_sp_adjust_with_load holds the register load
4374 with which we will merge the sp adjustment. */
4375 if (merge_sp_adjust_with_load == 0
4376 && local_fsize == 0
4377 && VAL_14_BITS_P (-actual_fsize))
4378 merge_sp_adjust_with_load = regno;
4379 else
4380 load_reg (regno, offset, STACK_POINTER_REGNUM);
4381 offset += UNITS_PER_WORD;
4385 for (i = 18; i >= 3; i--)
4387 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4389 /* Only for the first load.
4390 merge_sp_adjust_with_load holds the register load
4391 with which we will merge the sp adjustment. */
4392 if (merge_sp_adjust_with_load == 0
4393 && local_fsize == 0
4394 && VAL_14_BITS_P (-actual_fsize))
4395 merge_sp_adjust_with_load = i;
4396 else
4397 load_reg (i, offset, STACK_POINTER_REGNUM);
4398 offset += UNITS_PER_WORD;
4403 /* Align pointer properly (doubleword boundary). */
4404 offset = (offset + 7) & ~7;
4406 /* FP register restores. */
4407 if (save_fregs)
4409 /* Adjust the register to index off of. */
4410 if (frame_pointer_needed)
4411 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4412 else
4413 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4415 /* Actually do the restores now. */
4416 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4417 if (df_regs_ever_live_p (i)
4418 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4420 rtx src = gen_rtx_MEM (DFmode,
4421 gen_rtx_POST_INC (word_mode, tmpreg));
4422 rtx dest = gen_rtx_REG (DFmode, i);
4423 emit_move_insn (dest, src);
4427 /* Emit a blockage insn here to keep these insns from being moved to
4428 an earlier spot in the epilogue, or into the main instruction stream.
4430 This is necessary as we must not cut the stack back before all the
4431 restores are finished. */
4432 emit_insn (gen_blockage ());
4434 /* Reset stack pointer (and possibly frame pointer). The stack
4435 pointer is initially set to fp + 64 to avoid a race condition. */
4436 if (frame_pointer_needed)
4438 rtx delta = GEN_INT (-64);
4440 set_reg_plus_d (STACK_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM, 64, 0);
4441 emit_insn (gen_pre_load (hard_frame_pointer_rtx,
4442 stack_pointer_rtx, delta));
4444 /* If we were deferring a callee register restore, do it now. */
4445 else if (merge_sp_adjust_with_load)
4447 rtx delta = GEN_INT (-actual_fsize);
4448 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4450 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4452 else if (actual_fsize != 0)
4453 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4454 - actual_fsize, 0);
4456 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4457 frame greater than 8k), do so now. */
4458 if (ret_off != 0)
4459 load_reg (2, ret_off, STACK_POINTER_REGNUM);
4461 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4463 rtx sa = EH_RETURN_STACKADJ_RTX;
4465 emit_insn (gen_blockage ());
4466 emit_insn (TARGET_64BIT
4467 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4468 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4472 bool
4473 pa_can_use_return_insn (void)
4475 if (!reload_completed)
4476 return false;
4478 if (frame_pointer_needed)
4479 return false;
4481 if (df_regs_ever_live_p (2))
4482 return false;
4484 if (crtl->profile)
4485 return false;
4487 return pa_compute_frame_size (get_frame_size (), 0) == 0;
4491 hppa_pic_save_rtx (void)
4493 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4496 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4497 #define NO_DEFERRED_PROFILE_COUNTERS 0
4498 #endif
4501 /* Vector of funcdef numbers. */
4502 static vec<int> funcdef_nos;
4504 /* Output deferred profile counters. */
4505 static void
4506 output_deferred_profile_counters (void)
4508 unsigned int i;
4509 int align, n;
4511 if (funcdef_nos.is_empty ())
4512 return;
4514 switch_to_section (data_section);
4515 align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4516 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4518 for (i = 0; funcdef_nos.iterate (i, &n); i++)
4520 targetm.asm_out.internal_label (asm_out_file, "LP", n);
4521 assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4524 funcdef_nos.release ();
4527 void
4528 hppa_profile_hook (int label_no)
4530 /* We use SImode for the address of the function in both 32 and
4531 64-bit code to avoid having to provide DImode versions of the
4532 lcla2 and load_offset_label_address insn patterns. */
4533 rtx reg = gen_reg_rtx (SImode);
4534 rtx_code_label *label_rtx = gen_label_rtx ();
4535 rtx mcount = gen_rtx_MEM (Pmode, gen_rtx_SYMBOL_REF (Pmode, "_mcount"));
4536 int reg_parm_stack_space = REG_PARM_STACK_SPACE (NULL_TREE);
4537 rtx arg_bytes, begin_label_rtx;
4538 rtx_insn *call_insn;
4539 char begin_label_name[16];
4540 bool use_mcount_pcrel_call;
4542 /* If we can reach _mcount with a pc-relative call, we can optimize
4543 loading the address of the current function. This requires linker
4544 long branch stub support. */
4545 if (!TARGET_PORTABLE_RUNTIME
4546 && !TARGET_LONG_CALLS
4547 && (TARGET_SOM || flag_function_sections))
4548 use_mcount_pcrel_call = TRUE;
4549 else
4550 use_mcount_pcrel_call = FALSE;
4552 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4553 label_no);
4554 begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4556 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4558 if (!use_mcount_pcrel_call)
4560 /* The address of the function is loaded into %r25 with an instruction-
4561 relative sequence that avoids the use of relocations. The sequence
4562 is split so that the load_offset_label_address instruction can
4563 occupy the delay slot of the call to _mcount. */
4564 if (TARGET_PA_20)
4565 emit_insn (gen_lcla2 (reg, label_rtx));
4566 else
4567 emit_insn (gen_lcla1 (reg, label_rtx));
4569 emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25),
4570 reg,
4571 begin_label_rtx,
4572 label_rtx));
4575 if (!NO_DEFERRED_PROFILE_COUNTERS)
4577 rtx count_label_rtx, addr, r24;
4578 char count_label_name[16];
4580 funcdef_nos.safe_push (label_no);
4581 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4582 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode,
4583 ggc_strdup (count_label_name));
4585 addr = force_reg (Pmode, count_label_rtx);
4586 r24 = gen_rtx_REG (Pmode, 24);
4587 emit_move_insn (r24, addr);
4589 arg_bytes = GEN_INT (TARGET_64BIT ? 24 : 12);
4590 if (use_mcount_pcrel_call)
4591 call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes,
4592 begin_label_rtx));
4593 else
4594 call_insn = emit_call_insn (gen_call (mcount, arg_bytes));
4596 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4598 else
4600 arg_bytes = GEN_INT (TARGET_64BIT ? 16 : 8);
4601 if (use_mcount_pcrel_call)
4602 call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes,
4603 begin_label_rtx));
4604 else
4605 call_insn = emit_call_insn (gen_call (mcount, arg_bytes));
4608 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4609 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4611 /* Indicate the _mcount call cannot throw, nor will it execute a
4612 non-local goto. */
4613 make_reg_eh_region_note_nothrow_nononlocal (call_insn);
4615 /* Allocate space for fixed arguments. */
4616 if (reg_parm_stack_space > crtl->outgoing_args_size)
4617 crtl->outgoing_args_size = reg_parm_stack_space;
4620 /* Fetch the return address for the frame COUNT steps up from
4621 the current frame, after the prologue. FRAMEADDR is the
4622 frame pointer of the COUNT frame.
4624 We want to ignore any export stub remnants here. To handle this,
4625 we examine the code at the return address, and if it is an export
4626 stub, we return a memory rtx for the stub return address stored
4627 at frame-24.
4629 The value returned is used in two different ways:
4631 1. To find a function's caller.
4633 2. To change the return address for a function.
4635 This function handles most instances of case 1; however, it will
4636 fail if there are two levels of stubs to execute on the return
4637 path. The only way I believe that can happen is if the return value
4638 needs a parameter relocation, which never happens for C code.
4640 This function handles most instances of case 2; however, it will
4641 fail if we did not originally have stub code on the return path
4642 but will need stub code on the new return path. This can happen if
4643 the caller & callee are both in the main program, but the new
4644 return location is in a shared library. */
4647 pa_return_addr_rtx (int count, rtx frameaddr)
4649 rtx label;
4650 rtx rp;
4651 rtx saved_rp;
4652 rtx ins;
4654 /* The instruction stream at the return address of a PA1.X export stub is:
4656 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4657 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4658 0x00011820 | stub+16: mtsp r1,sr0
4659 0xe0400002 | stub+20: be,n 0(sr0,rp)
4661 0xe0400002 must be specified as -532676606 so that it won't be
4662 rejected as an invalid immediate operand on 64-bit hosts.
4664 The instruction stream at the return address of a PA2.0 export stub is:
4666 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4667 0xe840d002 | stub+12: bve,n (rp)
4670 HOST_WIDE_INT insns[4];
4671 int i, len;
4673 if (count != 0)
4674 return NULL_RTX;
4676 rp = get_hard_reg_initial_val (Pmode, 2);
4678 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4679 return rp;
4681 /* If there is no export stub then just use the value saved from
4682 the return pointer register. */
4684 saved_rp = gen_reg_rtx (Pmode);
4685 emit_move_insn (saved_rp, rp);
4687 /* Get pointer to the instruction stream. We have to mask out the
4688 privilege level from the two low order bits of the return address
4689 pointer here so that ins will point to the start of the first
4690 instruction that would have been executed if we returned. */
4691 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4692 label = gen_label_rtx ();
4694 if (TARGET_PA_20)
4696 insns[0] = 0x4bc23fd1;
4697 insns[1] = -398405630;
4698 len = 2;
4700 else
4702 insns[0] = 0x4bc23fd1;
4703 insns[1] = 0x004010a1;
4704 insns[2] = 0x00011820;
4705 insns[3] = -532676606;
4706 len = 4;
4709 /* Check the instruction stream at the normal return address for the
4710 export stub. If it is an export stub, than our return address is
4711 really in -24[frameaddr]. */
4713 for (i = 0; i < len; i++)
4715 rtx op0 = gen_rtx_MEM (SImode, plus_constant (Pmode, ins, i * 4));
4716 rtx op1 = GEN_INT (insns[i]);
4717 emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label);
4720 /* Here we know that our return address points to an export
4721 stub. We don't want to return the address of the export stub,
4722 but rather the return address of the export stub. That return
4723 address is stored at -24[frameaddr]. */
4725 emit_move_insn (saved_rp,
4726 gen_rtx_MEM (Pmode,
4727 memory_address (Pmode,
4728 plus_constant (Pmode, frameaddr,
4729 -24))));
4731 emit_label (label);
4733 return saved_rp;
4736 void
4737 pa_emit_bcond_fp (rtx operands[])
4739 enum rtx_code code = GET_CODE (operands[0]);
4740 rtx operand0 = operands[1];
4741 rtx operand1 = operands[2];
4742 rtx label = operands[3];
4744 emit_insn (gen_rtx_SET (gen_rtx_REG (CCFPmode, 0),
4745 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1)));
4747 emit_jump_insn (gen_rtx_SET (pc_rtx,
4748 gen_rtx_IF_THEN_ELSE (VOIDmode,
4749 gen_rtx_fmt_ee (NE,
4750 VOIDmode,
4751 gen_rtx_REG (CCFPmode, 0),
4752 const0_rtx),
4753 gen_rtx_LABEL_REF (VOIDmode, label),
4754 pc_rtx)));
4758 /* Adjust the cost of a scheduling dependency. Return the new cost of
4759 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4761 static int
4762 pa_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
4763 unsigned int)
4765 enum attr_type attr_type;
4767 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4768 true dependencies as they are described with bypasses now. */
4769 if (pa_cpu >= PROCESSOR_8000 || dep_type == 0)
4770 return cost;
4772 if (! recog_memoized (insn))
4773 return 0;
4775 attr_type = get_attr_type (insn);
4777 switch (dep_type)
4779 case REG_DEP_ANTI:
4780 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4781 cycles later. */
4783 if (attr_type == TYPE_FPLOAD)
4785 rtx pat = PATTERN (insn);
4786 rtx dep_pat = PATTERN (dep_insn);
4787 if (GET_CODE (pat) == PARALLEL)
4789 /* This happens for the fldXs,mb patterns. */
4790 pat = XVECEXP (pat, 0, 0);
4792 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4793 /* If this happens, we have to extend this to schedule
4794 optimally. Return 0 for now. */
4795 return 0;
4797 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4799 if (! recog_memoized (dep_insn))
4800 return 0;
4801 switch (get_attr_type (dep_insn))
4803 case TYPE_FPALU:
4804 case TYPE_FPMULSGL:
4805 case TYPE_FPMULDBL:
4806 case TYPE_FPDIVSGL:
4807 case TYPE_FPDIVDBL:
4808 case TYPE_FPSQRTSGL:
4809 case TYPE_FPSQRTDBL:
4810 /* A fpload can't be issued until one cycle before a
4811 preceding arithmetic operation has finished if
4812 the target of the fpload is any of the sources
4813 (or destination) of the arithmetic operation. */
4814 return insn_default_latency (dep_insn) - 1;
4816 default:
4817 return 0;
4821 else if (attr_type == TYPE_FPALU)
4823 rtx pat = PATTERN (insn);
4824 rtx dep_pat = PATTERN (dep_insn);
4825 if (GET_CODE (pat) == PARALLEL)
4827 /* This happens for the fldXs,mb patterns. */
4828 pat = XVECEXP (pat, 0, 0);
4830 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4831 /* If this happens, we have to extend this to schedule
4832 optimally. Return 0 for now. */
4833 return 0;
4835 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4837 if (! recog_memoized (dep_insn))
4838 return 0;
4839 switch (get_attr_type (dep_insn))
4841 case TYPE_FPDIVSGL:
4842 case TYPE_FPDIVDBL:
4843 case TYPE_FPSQRTSGL:
4844 case TYPE_FPSQRTDBL:
4845 /* An ALU flop can't be issued until two cycles before a
4846 preceding divide or sqrt operation has finished if
4847 the target of the ALU flop is any of the sources
4848 (or destination) of the divide or sqrt operation. */
4849 return insn_default_latency (dep_insn) - 2;
4851 default:
4852 return 0;
4857 /* For other anti dependencies, the cost is 0. */
4858 return 0;
4860 case REG_DEP_OUTPUT:
4861 /* Output dependency; DEP_INSN writes a register that INSN writes some
4862 cycles later. */
4863 if (attr_type == TYPE_FPLOAD)
4865 rtx pat = PATTERN (insn);
4866 rtx dep_pat = PATTERN (dep_insn);
4867 if (GET_CODE (pat) == PARALLEL)
4869 /* This happens for the fldXs,mb patterns. */
4870 pat = XVECEXP (pat, 0, 0);
4872 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4873 /* If this happens, we have to extend this to schedule
4874 optimally. Return 0 for now. */
4875 return 0;
4877 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4879 if (! recog_memoized (dep_insn))
4880 return 0;
4881 switch (get_attr_type (dep_insn))
4883 case TYPE_FPALU:
4884 case TYPE_FPMULSGL:
4885 case TYPE_FPMULDBL:
4886 case TYPE_FPDIVSGL:
4887 case TYPE_FPDIVDBL:
4888 case TYPE_FPSQRTSGL:
4889 case TYPE_FPSQRTDBL:
4890 /* A fpload can't be issued until one cycle before a
4891 preceding arithmetic operation has finished if
4892 the target of the fpload is the destination of the
4893 arithmetic operation.
4895 Exception: For PA7100LC, PA7200 and PA7300, the cost
4896 is 3 cycles, unless they bundle together. We also
4897 pay the penalty if the second insn is a fpload. */
4898 return insn_default_latency (dep_insn) - 1;
4900 default:
4901 return 0;
4905 else if (attr_type == TYPE_FPALU)
4907 rtx pat = PATTERN (insn);
4908 rtx dep_pat = PATTERN (dep_insn);
4909 if (GET_CODE (pat) == PARALLEL)
4911 /* This happens for the fldXs,mb patterns. */
4912 pat = XVECEXP (pat, 0, 0);
4914 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4915 /* If this happens, we have to extend this to schedule
4916 optimally. Return 0 for now. */
4917 return 0;
4919 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4921 if (! recog_memoized (dep_insn))
4922 return 0;
4923 switch (get_attr_type (dep_insn))
4925 case TYPE_FPDIVSGL:
4926 case TYPE_FPDIVDBL:
4927 case TYPE_FPSQRTSGL:
4928 case TYPE_FPSQRTDBL:
4929 /* An ALU flop can't be issued until two cycles before a
4930 preceding divide or sqrt operation has finished if
4931 the target of the ALU flop is also the target of
4932 the divide or sqrt operation. */
4933 return insn_default_latency (dep_insn) - 2;
4935 default:
4936 return 0;
4941 /* For other output dependencies, the cost is 0. */
4942 return 0;
4944 default:
4945 gcc_unreachable ();
4949 /* Adjust scheduling priorities. We use this to try and keep addil
4950 and the next use of %r1 close together. */
4951 static int
4952 pa_adjust_priority (rtx_insn *insn, int priority)
4954 rtx set = single_set (insn);
4955 rtx src, dest;
4956 if (set)
4958 src = SET_SRC (set);
4959 dest = SET_DEST (set);
4960 if (GET_CODE (src) == LO_SUM
4961 && symbolic_operand (XEXP (src, 1), VOIDmode)
4962 && ! read_only_operand (XEXP (src, 1), VOIDmode))
4963 priority >>= 3;
4965 else if (GET_CODE (src) == MEM
4966 && GET_CODE (XEXP (src, 0)) == LO_SUM
4967 && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
4968 && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
4969 priority >>= 1;
4971 else if (GET_CODE (dest) == MEM
4972 && GET_CODE (XEXP (dest, 0)) == LO_SUM
4973 && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
4974 && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
4975 priority >>= 3;
4977 return priority;
4980 /* The 700 can only issue a single insn at a time.
4981 The 7XXX processors can issue two insns at a time.
4982 The 8000 can issue 4 insns at a time. */
4983 static int
4984 pa_issue_rate (void)
4986 switch (pa_cpu)
4988 case PROCESSOR_700: return 1;
4989 case PROCESSOR_7100: return 2;
4990 case PROCESSOR_7100LC: return 2;
4991 case PROCESSOR_7200: return 2;
4992 case PROCESSOR_7300: return 2;
4993 case PROCESSOR_8000: return 4;
4995 default:
4996 gcc_unreachable ();
5002 /* Return any length plus adjustment needed by INSN which already has
5003 its length computed as LENGTH. Return LENGTH if no adjustment is
5004 necessary.
5006 Also compute the length of an inline block move here as it is too
5007 complicated to express as a length attribute in pa.md. */
5009 pa_adjust_insn_length (rtx_insn *insn, int length)
5011 rtx pat = PATTERN (insn);
5013 /* If length is negative or undefined, provide initial length. */
5014 if ((unsigned int) length >= INT_MAX)
5016 if (GET_CODE (pat) == SEQUENCE)
5017 insn = as_a <rtx_insn *> (XVECEXP (pat, 0, 0));
5019 switch (get_attr_type (insn))
5021 case TYPE_MILLI:
5022 length = pa_attr_length_millicode_call (insn);
5023 break;
5024 case TYPE_CALL:
5025 length = pa_attr_length_call (insn, 0);
5026 break;
5027 case TYPE_SIBCALL:
5028 length = pa_attr_length_call (insn, 1);
5029 break;
5030 case TYPE_DYNCALL:
5031 length = pa_attr_length_indirect_call (insn);
5032 break;
5033 case TYPE_SH_FUNC_ADRS:
5034 length = pa_attr_length_millicode_call (insn) + 20;
5035 break;
5036 default:
5037 gcc_unreachable ();
5041 /* Block move pattern. */
5042 if (NONJUMP_INSN_P (insn)
5043 && GET_CODE (pat) == PARALLEL
5044 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5045 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5046 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
5047 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
5048 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
5049 length += compute_movmem_length (insn) - 4;
5050 /* Block clear pattern. */
5051 else if (NONJUMP_INSN_P (insn)
5052 && GET_CODE (pat) == PARALLEL
5053 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5054 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5055 && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
5056 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
5057 length += compute_clrmem_length (insn) - 4;
5058 /* Conditional branch with an unfilled delay slot. */
5059 else if (JUMP_P (insn) && ! simplejump_p (insn))
5061 /* Adjust a short backwards conditional with an unfilled delay slot. */
5062 if (GET_CODE (pat) == SET
5063 && length == 4
5064 && JUMP_LABEL (insn) != NULL_RTX
5065 && ! forward_branch_p (insn))
5066 length += 4;
5067 else if (GET_CODE (pat) == PARALLEL
5068 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
5069 && length == 4)
5070 length += 4;
5071 /* Adjust dbra insn with short backwards conditional branch with
5072 unfilled delay slot -- only for case where counter is in a
5073 general register register. */
5074 else if (GET_CODE (pat) == PARALLEL
5075 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
5076 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
5077 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
5078 && length == 4
5079 && ! forward_branch_p (insn))
5080 length += 4;
5082 return length;
5085 /* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook. */
5087 static bool
5088 pa_print_operand_punct_valid_p (unsigned char code)
5090 if (code == '@'
5091 || code == '#'
5092 || code == '*'
5093 || code == '^')
5094 return true;
5096 return false;
5099 /* Print operand X (an rtx) in assembler syntax to file FILE.
5100 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
5101 For `%' followed by punctuation, CODE is the punctuation and X is null. */
5103 void
5104 pa_print_operand (FILE *file, rtx x, int code)
5106 switch (code)
5108 case '#':
5109 /* Output a 'nop' if there's nothing for the delay slot. */
5110 if (dbr_sequence_length () == 0)
5111 fputs ("\n\tnop", file);
5112 return;
5113 case '*':
5114 /* Output a nullification completer if there's nothing for the */
5115 /* delay slot or nullification is requested. */
5116 if (dbr_sequence_length () == 0 ||
5117 (final_sequence &&
5118 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
5119 fputs (",n", file);
5120 return;
5121 case 'R':
5122 /* Print out the second register name of a register pair.
5123 I.e., R (6) => 7. */
5124 fputs (reg_names[REGNO (x) + 1], file);
5125 return;
5126 case 'r':
5127 /* A register or zero. */
5128 if (x == const0_rtx
5129 || (x == CONST0_RTX (DFmode))
5130 || (x == CONST0_RTX (SFmode)))
5132 fputs ("%r0", file);
5133 return;
5135 else
5136 break;
5137 case 'f':
5138 /* A register or zero (floating point). */
5139 if (x == const0_rtx
5140 || (x == CONST0_RTX (DFmode))
5141 || (x == CONST0_RTX (SFmode)))
5143 fputs ("%fr0", file);
5144 return;
5146 else
5147 break;
5148 case 'A':
5150 rtx xoperands[2];
5152 xoperands[0] = XEXP (XEXP (x, 0), 0);
5153 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
5154 pa_output_global_address (file, xoperands[1], 0);
5155 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
5156 return;
5159 case 'C': /* Plain (C)ondition */
5160 case 'X':
5161 switch (GET_CODE (x))
5163 case EQ:
5164 fputs ("=", file); break;
5165 case NE:
5166 fputs ("<>", file); break;
5167 case GT:
5168 fputs (">", file); break;
5169 case GE:
5170 fputs (">=", file); break;
5171 case GEU:
5172 fputs (">>=", file); break;
5173 case GTU:
5174 fputs (">>", file); break;
5175 case LT:
5176 fputs ("<", file); break;
5177 case LE:
5178 fputs ("<=", file); break;
5179 case LEU:
5180 fputs ("<<=", file); break;
5181 case LTU:
5182 fputs ("<<", file); break;
5183 default:
5184 gcc_unreachable ();
5186 return;
5187 case 'N': /* Condition, (N)egated */
5188 switch (GET_CODE (x))
5190 case EQ:
5191 fputs ("<>", file); break;
5192 case NE:
5193 fputs ("=", file); break;
5194 case GT:
5195 fputs ("<=", file); break;
5196 case GE:
5197 fputs ("<", file); break;
5198 case GEU:
5199 fputs ("<<", file); break;
5200 case GTU:
5201 fputs ("<<=", file); break;
5202 case LT:
5203 fputs (">=", file); break;
5204 case LE:
5205 fputs (">", file); break;
5206 case LEU:
5207 fputs (">>", file); break;
5208 case LTU:
5209 fputs (">>=", file); break;
5210 default:
5211 gcc_unreachable ();
5213 return;
5214 /* For floating point comparisons. Note that the output
5215 predicates are the complement of the desired mode. The
5216 conditions for GT, GE, LT, LE and LTGT cause an invalid
5217 operation exception if the result is unordered and this
5218 exception is enabled in the floating-point status register. */
5219 case 'Y':
5220 switch (GET_CODE (x))
5222 case EQ:
5223 fputs ("!=", file); break;
5224 case NE:
5225 fputs ("=", file); break;
5226 case GT:
5227 fputs ("!>", file); break;
5228 case GE:
5229 fputs ("!>=", file); break;
5230 case LT:
5231 fputs ("!<", file); break;
5232 case LE:
5233 fputs ("!<=", file); break;
5234 case LTGT:
5235 fputs ("!<>", file); break;
5236 case UNLE:
5237 fputs ("!?<=", file); break;
5238 case UNLT:
5239 fputs ("!?<", file); break;
5240 case UNGE:
5241 fputs ("!?>=", file); break;
5242 case UNGT:
5243 fputs ("!?>", file); break;
5244 case UNEQ:
5245 fputs ("!?=", file); break;
5246 case UNORDERED:
5247 fputs ("!?", file); break;
5248 case ORDERED:
5249 fputs ("?", file); break;
5250 default:
5251 gcc_unreachable ();
5253 return;
5254 case 'S': /* Condition, operands are (S)wapped. */
5255 switch (GET_CODE (x))
5257 case EQ:
5258 fputs ("=", file); break;
5259 case NE:
5260 fputs ("<>", file); break;
5261 case GT:
5262 fputs ("<", file); break;
5263 case GE:
5264 fputs ("<=", file); break;
5265 case GEU:
5266 fputs ("<<=", file); break;
5267 case GTU:
5268 fputs ("<<", file); break;
5269 case LT:
5270 fputs (">", file); break;
5271 case LE:
5272 fputs (">=", file); break;
5273 case LEU:
5274 fputs (">>=", file); break;
5275 case LTU:
5276 fputs (">>", file); break;
5277 default:
5278 gcc_unreachable ();
5280 return;
5281 case 'B': /* Condition, (B)oth swapped and negate. */
5282 switch (GET_CODE (x))
5284 case EQ:
5285 fputs ("<>", file); break;
5286 case NE:
5287 fputs ("=", file); break;
5288 case GT:
5289 fputs (">=", file); break;
5290 case GE:
5291 fputs (">", file); break;
5292 case GEU:
5293 fputs (">>", file); break;
5294 case GTU:
5295 fputs (">>=", file); break;
5296 case LT:
5297 fputs ("<=", file); break;
5298 case LE:
5299 fputs ("<", file); break;
5300 case LEU:
5301 fputs ("<<", file); break;
5302 case LTU:
5303 fputs ("<<=", file); break;
5304 default:
5305 gcc_unreachable ();
5307 return;
5308 case 'k':
5309 gcc_assert (GET_CODE (x) == CONST_INT);
5310 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
5311 return;
5312 case 'Q':
5313 gcc_assert (GET_CODE (x) == CONST_INT);
5314 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
5315 return;
5316 case 'L':
5317 gcc_assert (GET_CODE (x) == CONST_INT);
5318 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
5319 return;
5320 case 'o':
5321 gcc_assert (GET_CODE (x) == CONST_INT
5322 && (INTVAL (x) == 1 || INTVAL (x) == 2 || INTVAL (x) == 3));
5323 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5324 return;
5325 case 'O':
5326 gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
5327 fprintf (file, "%d", exact_log2 (INTVAL (x)));
5328 return;
5329 case 'p':
5330 gcc_assert (GET_CODE (x) == CONST_INT);
5331 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
5332 return;
5333 case 'P':
5334 gcc_assert (GET_CODE (x) == CONST_INT);
5335 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
5336 return;
5337 case 'I':
5338 if (GET_CODE (x) == CONST_INT)
5339 fputs ("i", file);
5340 return;
5341 case 'M':
5342 case 'F':
5343 switch (GET_CODE (XEXP (x, 0)))
5345 case PRE_DEC:
5346 case PRE_INC:
5347 if (ASSEMBLER_DIALECT == 0)
5348 fputs ("s,mb", file);
5349 else
5350 fputs (",mb", file);
5351 break;
5352 case POST_DEC:
5353 case POST_INC:
5354 if (ASSEMBLER_DIALECT == 0)
5355 fputs ("s,ma", file);
5356 else
5357 fputs (",ma", file);
5358 break;
5359 case PLUS:
5360 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5361 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5363 if (ASSEMBLER_DIALECT == 0)
5364 fputs ("x", file);
5366 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5367 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5369 if (ASSEMBLER_DIALECT == 0)
5370 fputs ("x,s", file);
5371 else
5372 fputs (",s", file);
5374 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
5375 fputs ("s", file);
5376 break;
5377 default:
5378 if (code == 'F' && ASSEMBLER_DIALECT == 0)
5379 fputs ("s", file);
5380 break;
5382 return;
5383 case 'G':
5384 pa_output_global_address (file, x, 0);
5385 return;
5386 case 'H':
5387 pa_output_global_address (file, x, 1);
5388 return;
5389 case 0: /* Don't do anything special */
5390 break;
5391 case 'Z':
5393 unsigned op[3];
5394 compute_zdepwi_operands (INTVAL (x), op);
5395 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5396 return;
5398 case 'z':
5400 unsigned op[3];
5401 compute_zdepdi_operands (INTVAL (x), op);
5402 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5403 return;
5405 case 'c':
5406 /* We can get here from a .vtable_inherit due to our
5407 CONSTANT_ADDRESS_P rejecting perfectly good constant
5408 addresses. */
5409 break;
5410 default:
5411 gcc_unreachable ();
5413 if (GET_CODE (x) == REG)
5415 fputs (reg_names [REGNO (x)], file);
5416 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5418 fputs ("R", file);
5419 return;
5421 if (FP_REG_P (x)
5422 && GET_MODE_SIZE (GET_MODE (x)) <= 4
5423 && (REGNO (x) & 1) == 0)
5424 fputs ("L", file);
5426 else if (GET_CODE (x) == MEM)
5428 int size = GET_MODE_SIZE (GET_MODE (x));
5429 rtx base = NULL_RTX;
5430 switch (GET_CODE (XEXP (x, 0)))
5432 case PRE_DEC:
5433 case POST_DEC:
5434 base = XEXP (XEXP (x, 0), 0);
5435 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5436 break;
5437 case PRE_INC:
5438 case POST_INC:
5439 base = XEXP (XEXP (x, 0), 0);
5440 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5441 break;
5442 case PLUS:
5443 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5444 fprintf (file, "%s(%s)",
5445 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5446 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5447 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5448 fprintf (file, "%s(%s)",
5449 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5450 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5451 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5452 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5454 /* Because the REG_POINTER flag can get lost during reload,
5455 pa_legitimate_address_p canonicalizes the order of the
5456 index and base registers in the combined move patterns. */
5457 rtx base = XEXP (XEXP (x, 0), 1);
5458 rtx index = XEXP (XEXP (x, 0), 0);
5460 fprintf (file, "%s(%s)",
5461 reg_names [REGNO (index)], reg_names [REGNO (base)]);
5463 else
5464 output_address (GET_MODE (x), XEXP (x, 0));
5465 break;
5466 default:
5467 output_address (GET_MODE (x), XEXP (x, 0));
5468 break;
5471 else
5472 output_addr_const (file, x);
5475 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
5477 void
5478 pa_output_global_address (FILE *file, rtx x, int round_constant)
5481 /* Imagine (high (const (plus ...))). */
5482 if (GET_CODE (x) == HIGH)
5483 x = XEXP (x, 0);
5485 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5486 output_addr_const (file, x);
5487 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5489 output_addr_const (file, x);
5490 fputs ("-$global$", file);
5492 else if (GET_CODE (x) == CONST)
5494 const char *sep = "";
5495 int offset = 0; /* assembler wants -$global$ at end */
5496 rtx base = NULL_RTX;
5498 switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
5500 case LABEL_REF:
5501 case SYMBOL_REF:
5502 base = XEXP (XEXP (x, 0), 0);
5503 output_addr_const (file, base);
5504 break;
5505 case CONST_INT:
5506 offset = INTVAL (XEXP (XEXP (x, 0), 0));
5507 break;
5508 default:
5509 gcc_unreachable ();
5512 switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
5514 case LABEL_REF:
5515 case SYMBOL_REF:
5516 base = XEXP (XEXP (x, 0), 1);
5517 output_addr_const (file, base);
5518 break;
5519 case CONST_INT:
5520 offset = INTVAL (XEXP (XEXP (x, 0), 1));
5521 break;
5522 default:
5523 gcc_unreachable ();
5526 /* How bogus. The compiler is apparently responsible for
5527 rounding the constant if it uses an LR field selector.
5529 The linker and/or assembler seem a better place since
5530 they have to do this kind of thing already.
5532 If we fail to do this, HP's optimizing linker may eliminate
5533 an addil, but not update the ldw/stw/ldo instruction that
5534 uses the result of the addil. */
5535 if (round_constant)
5536 offset = ((offset + 0x1000) & ~0x1fff);
5538 switch (GET_CODE (XEXP (x, 0)))
5540 case PLUS:
5541 if (offset < 0)
5543 offset = -offset;
5544 sep = "-";
5546 else
5547 sep = "+";
5548 break;
5550 case MINUS:
5551 gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5552 sep = "-";
5553 break;
5555 default:
5556 gcc_unreachable ();
5559 if (!read_only_operand (base, VOIDmode) && !flag_pic)
5560 fputs ("-$global$", file);
5561 if (offset)
5562 fprintf (file, "%s%d", sep, offset);
5564 else
5565 output_addr_const (file, x);
5568 /* Output boilerplate text to appear at the beginning of the file.
5569 There are several possible versions. */
5570 #define aputs(x) fputs(x, asm_out_file)
5571 static inline void
5572 pa_file_start_level (void)
5574 if (TARGET_64BIT)
5575 aputs ("\t.LEVEL 2.0w\n");
5576 else if (TARGET_PA_20)
5577 aputs ("\t.LEVEL 2.0\n");
5578 else if (TARGET_PA_11)
5579 aputs ("\t.LEVEL 1.1\n");
5580 else
5581 aputs ("\t.LEVEL 1.0\n");
5584 static inline void
5585 pa_file_start_space (int sortspace)
5587 aputs ("\t.SPACE $PRIVATE$");
5588 if (sortspace)
5589 aputs (",SORT=16");
5590 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31");
5591 if (flag_tm)
5592 aputs ("\n\t.SUBSPA $TM_CLONE_TABLE$,QUAD=1,ALIGN=8,ACCESS=31");
5593 aputs ("\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5594 "\n\t.SPACE $TEXT$");
5595 if (sortspace)
5596 aputs (",SORT=8");
5597 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5598 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5601 static inline void
5602 pa_file_start_file (int want_version)
5604 if (write_symbols != NO_DEBUG)
5606 output_file_directive (asm_out_file, main_input_filename);
5607 if (want_version)
5608 aputs ("\t.version\t\"01.01\"\n");
5612 static inline void
5613 pa_file_start_mcount (const char *aswhat)
5615 if (profile_flag)
5616 fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5619 static void
5620 pa_elf_file_start (void)
5622 pa_file_start_level ();
5623 pa_file_start_mcount ("ENTRY");
5624 pa_file_start_file (0);
5627 static void
5628 pa_som_file_start (void)
5630 pa_file_start_level ();
5631 pa_file_start_space (0);
5632 aputs ("\t.IMPORT $global$,DATA\n"
5633 "\t.IMPORT $$dyncall,MILLICODE\n");
5634 pa_file_start_mcount ("CODE");
5635 pa_file_start_file (0);
5638 static void
5639 pa_linux_file_start (void)
5641 pa_file_start_file (1);
5642 pa_file_start_level ();
5643 pa_file_start_mcount ("CODE");
5646 static void
5647 pa_hpux64_gas_file_start (void)
5649 pa_file_start_level ();
5650 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5651 if (profile_flag)
5652 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5653 #endif
5654 pa_file_start_file (1);
5657 static void
5658 pa_hpux64_hpas_file_start (void)
5660 pa_file_start_level ();
5661 pa_file_start_space (1);
5662 pa_file_start_mcount ("CODE");
5663 pa_file_start_file (0);
5665 #undef aputs
5667 /* Search the deferred plabel list for SYMBOL and return its internal
5668 label. If an entry for SYMBOL is not found, a new entry is created. */
5671 pa_get_deferred_plabel (rtx symbol)
5673 const char *fname = XSTR (symbol, 0);
5674 size_t i;
5676 /* See if we have already put this function on the list of deferred
5677 plabels. This list is generally small, so a liner search is not
5678 too ugly. If it proves too slow replace it with something faster. */
5679 for (i = 0; i < n_deferred_plabels; i++)
5680 if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
5681 break;
5683 /* If the deferred plabel list is empty, or this entry was not found
5684 on the list, create a new entry on the list. */
5685 if (deferred_plabels == NULL || i == n_deferred_plabels)
5687 tree id;
5689 if (deferred_plabels == 0)
5690 deferred_plabels = ggc_alloc<deferred_plabel> ();
5691 else
5692 deferred_plabels = GGC_RESIZEVEC (struct deferred_plabel,
5693 deferred_plabels,
5694 n_deferred_plabels + 1);
5696 i = n_deferred_plabels++;
5697 deferred_plabels[i].internal_label = gen_label_rtx ();
5698 deferred_plabels[i].symbol = symbol;
5700 /* Gross. We have just implicitly taken the address of this
5701 function. Mark it in the same manner as assemble_name. */
5702 id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5703 if (id)
5704 mark_referenced (id);
5707 return deferred_plabels[i].internal_label;
5710 static void
5711 output_deferred_plabels (void)
5713 size_t i;
5715 /* If we have some deferred plabels, then we need to switch into the
5716 data or readonly data section, and align it to a 4 byte boundary
5717 before outputting the deferred plabels. */
5718 if (n_deferred_plabels)
5720 switch_to_section (flag_pic ? data_section : readonly_data_section);
5721 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5724 /* Now output the deferred plabels. */
5725 for (i = 0; i < n_deferred_plabels; i++)
5727 targetm.asm_out.internal_label (asm_out_file, "L",
5728 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5729 assemble_integer (deferred_plabels[i].symbol,
5730 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5734 /* Initialize optabs to point to emulation routines. */
5736 static void
5737 pa_init_libfuncs (void)
5739 if (HPUX_LONG_DOUBLE_LIBRARY)
5741 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5742 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5743 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5744 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5745 set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5746 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5747 set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5748 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5749 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5751 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5752 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5753 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5754 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5755 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5756 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5757 set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5759 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5760 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5761 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5762 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5764 set_conv_libfunc (sfix_optab, SImode, TFmode,
5765 TARGET_64BIT ? "__U_Qfcnvfxt_quad_to_sgl"
5766 : "_U_Qfcnvfxt_quad_to_sgl");
5767 set_conv_libfunc (sfix_optab, DImode, TFmode,
5768 "_U_Qfcnvfxt_quad_to_dbl");
5769 set_conv_libfunc (ufix_optab, SImode, TFmode,
5770 "_U_Qfcnvfxt_quad_to_usgl");
5771 set_conv_libfunc (ufix_optab, DImode, TFmode,
5772 "_U_Qfcnvfxt_quad_to_udbl");
5774 set_conv_libfunc (sfloat_optab, TFmode, SImode,
5775 "_U_Qfcnvxf_sgl_to_quad");
5776 set_conv_libfunc (sfloat_optab, TFmode, DImode,
5777 "_U_Qfcnvxf_dbl_to_quad");
5778 set_conv_libfunc (ufloat_optab, TFmode, SImode,
5779 "_U_Qfcnvxf_usgl_to_quad");
5780 set_conv_libfunc (ufloat_optab, TFmode, DImode,
5781 "_U_Qfcnvxf_udbl_to_quad");
5784 if (TARGET_SYNC_LIBCALL)
5785 init_sync_libfuncs (8);
5788 /* HP's millicode routines mean something special to the assembler.
5789 Keep track of which ones we have used. */
5791 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5792 static void import_milli (enum millicodes);
5793 static char imported[(int) end1000];
5794 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5795 static const char import_string[] = ".IMPORT $$....,MILLICODE";
5796 #define MILLI_START 10
5798 static void
5799 import_milli (enum millicodes code)
5801 char str[sizeof (import_string)];
5803 if (!imported[(int) code])
5805 imported[(int) code] = 1;
5806 strcpy (str, import_string);
5807 strncpy (str + MILLI_START, milli_names[(int) code], 4);
5808 output_asm_insn (str, 0);
5812 /* The register constraints have put the operands and return value in
5813 the proper registers. */
5815 const char *
5816 pa_output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx_insn *insn)
5818 import_milli (mulI);
5819 return pa_output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5822 /* Emit the rtl for doing a division by a constant. */
5824 /* Do magic division millicodes exist for this value? */
5825 const int pa_magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5827 /* We'll use an array to keep track of the magic millicodes and
5828 whether or not we've used them already. [n][0] is signed, [n][1] is
5829 unsigned. */
5831 static int div_milli[16][2];
5834 pa_emit_hpdiv_const (rtx *operands, int unsignedp)
5836 if (GET_CODE (operands[2]) == CONST_INT
5837 && INTVAL (operands[2]) > 0
5838 && INTVAL (operands[2]) < 16
5839 && pa_magic_milli[INTVAL (operands[2])])
5841 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5843 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
5844 emit
5845 (gen_rtx_PARALLEL
5846 (VOIDmode,
5847 gen_rtvec (6, gen_rtx_SET (gen_rtx_REG (SImode, 29),
5848 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5849 SImode,
5850 gen_rtx_REG (SImode, 26),
5851 operands[2])),
5852 gen_rtx_CLOBBER (VOIDmode, operands[4]),
5853 gen_rtx_CLOBBER (VOIDmode, operands[3]),
5854 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5855 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
5856 gen_rtx_CLOBBER (VOIDmode, ret))));
5857 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
5858 return 1;
5860 return 0;
5863 const char *
5864 pa_output_div_insn (rtx *operands, int unsignedp, rtx_insn *insn)
5866 int divisor;
5868 /* If the divisor is a constant, try to use one of the special
5869 opcodes .*/
5870 if (GET_CODE (operands[0]) == CONST_INT)
5872 static char buf[100];
5873 divisor = INTVAL (operands[0]);
5874 if (!div_milli[divisor][unsignedp])
5876 div_milli[divisor][unsignedp] = 1;
5877 if (unsignedp)
5878 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
5879 else
5880 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
5882 if (unsignedp)
5884 sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
5885 INTVAL (operands[0]));
5886 return pa_output_millicode_call (insn,
5887 gen_rtx_SYMBOL_REF (SImode, buf));
5889 else
5891 sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
5892 INTVAL (operands[0]));
5893 return pa_output_millicode_call (insn,
5894 gen_rtx_SYMBOL_REF (SImode, buf));
5897 /* Divisor isn't a special constant. */
5898 else
5900 if (unsignedp)
5902 import_milli (divU);
5903 return pa_output_millicode_call (insn,
5904 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
5906 else
5908 import_milli (divI);
5909 return pa_output_millicode_call (insn,
5910 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
5915 /* Output a $$rem millicode to do mod. */
5917 const char *
5918 pa_output_mod_insn (int unsignedp, rtx_insn *insn)
5920 if (unsignedp)
5922 import_milli (remU);
5923 return pa_output_millicode_call (insn,
5924 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
5926 else
5928 import_milli (remI);
5929 return pa_output_millicode_call (insn,
5930 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
5934 void
5935 pa_output_arg_descriptor (rtx_insn *call_insn)
5937 const char *arg_regs[4];
5938 machine_mode arg_mode;
5939 rtx link;
5940 int i, output_flag = 0;
5941 int regno;
5943 /* We neither need nor want argument location descriptors for the
5944 64bit runtime environment or the ELF32 environment. */
5945 if (TARGET_64BIT || TARGET_ELF32)
5946 return;
5948 for (i = 0; i < 4; i++)
5949 arg_regs[i] = 0;
5951 /* Specify explicitly that no argument relocations should take place
5952 if using the portable runtime calling conventions. */
5953 if (TARGET_PORTABLE_RUNTIME)
5955 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5956 asm_out_file);
5957 return;
5960 gcc_assert (CALL_P (call_insn));
5961 for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
5962 link; link = XEXP (link, 1))
5964 rtx use = XEXP (link, 0);
5966 if (! (GET_CODE (use) == USE
5967 && GET_CODE (XEXP (use, 0)) == REG
5968 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5969 continue;
5971 arg_mode = GET_MODE (XEXP (use, 0));
5972 regno = REGNO (XEXP (use, 0));
5973 if (regno >= 23 && regno <= 26)
5975 arg_regs[26 - regno] = "GR";
5976 if (arg_mode == DImode)
5977 arg_regs[25 - regno] = "GR";
5979 else if (regno >= 32 && regno <= 39)
5981 if (arg_mode == SFmode)
5982 arg_regs[(regno - 32) / 2] = "FR";
5983 else
5985 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
5986 arg_regs[(regno - 34) / 2] = "FR";
5987 arg_regs[(regno - 34) / 2 + 1] = "FU";
5988 #else
5989 arg_regs[(regno - 34) / 2] = "FU";
5990 arg_regs[(regno - 34) / 2 + 1] = "FR";
5991 #endif
5995 fputs ("\t.CALL ", asm_out_file);
5996 for (i = 0; i < 4; i++)
5998 if (arg_regs[i])
6000 if (output_flag++)
6001 fputc (',', asm_out_file);
6002 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
6005 fputc ('\n', asm_out_file);
6008 /* Inform reload about cases where moving X with a mode MODE to or from
6009 a register in RCLASS requires an extra scratch or immediate register.
6010 Return the class needed for the immediate register. */
6012 static reg_class_t
6013 pa_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
6014 machine_mode mode, secondary_reload_info *sri)
6016 int regno;
6017 enum reg_class rclass = (enum reg_class) rclass_i;
6019 /* Handle the easy stuff first. */
6020 if (rclass == R1_REGS)
6021 return NO_REGS;
6023 if (REG_P (x))
6025 regno = REGNO (x);
6026 if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
6027 return NO_REGS;
6029 else
6030 regno = -1;
6032 /* If we have something like (mem (mem (...)), we can safely assume the
6033 inner MEM will end up in a general register after reloading, so there's
6034 no need for a secondary reload. */
6035 if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
6036 return NO_REGS;
6038 /* Trying to load a constant into a FP register during PIC code
6039 generation requires %r1 as a scratch register. For float modes,
6040 the only legitimate constant is CONST0_RTX. However, there are
6041 a few patterns that accept constant double operands. */
6042 if (flag_pic
6043 && FP_REG_CLASS_P (rclass)
6044 && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
6046 switch (mode)
6048 case SImode:
6049 sri->icode = CODE_FOR_reload_insi_r1;
6050 break;
6052 case DImode:
6053 sri->icode = CODE_FOR_reload_indi_r1;
6054 break;
6056 case SFmode:
6057 sri->icode = CODE_FOR_reload_insf_r1;
6058 break;
6060 case DFmode:
6061 sri->icode = CODE_FOR_reload_indf_r1;
6062 break;
6064 default:
6065 gcc_unreachable ();
6067 return NO_REGS;
6070 /* Secondary reloads of symbolic expressions require %r1 as a scratch
6071 register when we're generating PIC code or when the operand isn't
6072 readonly. */
6073 if (pa_symbolic_expression_p (x))
6075 if (GET_CODE (x) == HIGH)
6076 x = XEXP (x, 0);
6078 if (flag_pic || !read_only_operand (x, VOIDmode))
6080 switch (mode)
6082 case SImode:
6083 sri->icode = CODE_FOR_reload_insi_r1;
6084 break;
6086 case DImode:
6087 sri->icode = CODE_FOR_reload_indi_r1;
6088 break;
6090 default:
6091 gcc_unreachable ();
6093 return NO_REGS;
6097 /* Profiling showed the PA port spends about 1.3% of its compilation
6098 time in true_regnum from calls inside pa_secondary_reload_class. */
6099 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
6100 regno = true_regnum (x);
6102 /* Handle reloads for floating point loads and stores. */
6103 if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
6104 && FP_REG_CLASS_P (rclass))
6106 if (MEM_P (x))
6108 x = XEXP (x, 0);
6110 /* We don't need a secondary reload for indexed memory addresses.
6112 When INT14_OK_STRICT is true, it might appear that we could
6113 directly allow register indirect memory addresses. However,
6114 this doesn't work because we don't support SUBREGs in
6115 floating-point register copies and reload doesn't tell us
6116 when it's going to use a SUBREG. */
6117 if (IS_INDEX_ADDR_P (x))
6118 return NO_REGS;
6121 /* Request a secondary reload with a general scratch register
6122 for everything else. ??? Could symbolic operands be handled
6123 directly when generating non-pic PA 2.0 code? */
6124 sri->icode = (in_p
6125 ? direct_optab_handler (reload_in_optab, mode)
6126 : direct_optab_handler (reload_out_optab, mode));
6127 return NO_REGS;
6130 /* A SAR<->FP register copy requires an intermediate general register
6131 and secondary memory. We need a secondary reload with a general
6132 scratch register for spills. */
6133 if (rclass == SHIFT_REGS)
6135 /* Handle spill. */
6136 if (regno >= FIRST_PSEUDO_REGISTER || regno < 0)
6138 sri->icode = (in_p
6139 ? direct_optab_handler (reload_in_optab, mode)
6140 : direct_optab_handler (reload_out_optab, mode));
6141 return NO_REGS;
6144 /* Handle FP copy. */
6145 if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))
6146 return GENERAL_REGS;
6149 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
6150 && REGNO_REG_CLASS (regno) == SHIFT_REGS
6151 && FP_REG_CLASS_P (rclass))
6152 return GENERAL_REGS;
6154 return NO_REGS;
6157 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY. The argument pointer
6158 is only marked as live on entry by df-scan when it is a fixed
6159 register. It isn't a fixed register in the 64-bit runtime,
6160 so we need to mark it here. */
6162 static void
6163 pa_extra_live_on_entry (bitmap regs)
6165 if (TARGET_64BIT)
6166 bitmap_set_bit (regs, ARG_POINTER_REGNUM);
6169 /* Implement EH_RETURN_HANDLER_RTX. The MEM needs to be volatile
6170 to prevent it from being deleted. */
6173 pa_eh_return_handler_rtx (void)
6175 rtx tmp;
6177 tmp = gen_rtx_PLUS (word_mode, hard_frame_pointer_rtx,
6178 TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20));
6179 tmp = gen_rtx_MEM (word_mode, tmp);
6180 tmp->volatil = 1;
6181 return tmp;
6184 /* In the 32-bit runtime, arguments larger than eight bytes are passed
6185 by invisible reference. As a GCC extension, we also pass anything
6186 with a zero or variable size by reference.
6188 The 64-bit runtime does not describe passing any types by invisible
6189 reference. The internals of GCC can't currently handle passing
6190 empty structures, and zero or variable length arrays when they are
6191 not passed entirely on the stack or by reference. Thus, as a GCC
6192 extension, we pass these types by reference. The HP compiler doesn't
6193 support these types, so hopefully there shouldn't be any compatibility
6194 issues. This may have to be revisited when HP releases a C99 compiler
6195 or updates the ABI. */
6197 static bool
6198 pa_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
6199 machine_mode mode, const_tree type,
6200 bool named ATTRIBUTE_UNUSED)
6202 HOST_WIDE_INT size;
6204 if (type)
6205 size = int_size_in_bytes (type);
6206 else
6207 size = GET_MODE_SIZE (mode);
6209 if (TARGET_64BIT)
6210 return size <= 0;
6211 else
6212 return size <= 0 || size > 8;
6215 enum direction
6216 pa_function_arg_padding (machine_mode mode, const_tree type)
6218 if (mode == BLKmode
6219 || (TARGET_64BIT
6220 && type
6221 && (AGGREGATE_TYPE_P (type)
6222 || TREE_CODE (type) == COMPLEX_TYPE
6223 || TREE_CODE (type) == VECTOR_TYPE)))
6225 /* Return none if justification is not required. */
6226 if (type
6227 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
6228 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
6229 return none;
6231 /* The directions set here are ignored when a BLKmode argument larger
6232 than a word is placed in a register. Different code is used for
6233 the stack and registers. This makes it difficult to have a
6234 consistent data representation for both the stack and registers.
6235 For both runtimes, the justification and padding for arguments on
6236 the stack and in registers should be identical. */
6237 if (TARGET_64BIT)
6238 /* The 64-bit runtime specifies left justification for aggregates. */
6239 return upward;
6240 else
6241 /* The 32-bit runtime architecture specifies right justification.
6242 When the argument is passed on the stack, the argument is padded
6243 with garbage on the left. The HP compiler pads with zeros. */
6244 return downward;
6247 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
6248 return downward;
6249 else
6250 return none;
6254 /* Do what is necessary for `va_start'. We look at the current function
6255 to determine if stdargs or varargs is used and fill in an initial
6256 va_list. A pointer to this constructor is returned. */
6258 static rtx
6259 hppa_builtin_saveregs (void)
6261 rtx offset, dest;
6262 tree fntype = TREE_TYPE (current_function_decl);
6263 int argadj = ((!stdarg_p (fntype))
6264 ? UNITS_PER_WORD : 0);
6266 if (argadj)
6267 offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, argadj);
6268 else
6269 offset = crtl->args.arg_offset_rtx;
6271 if (TARGET_64BIT)
6273 int i, off;
6275 /* Adjust for varargs/stdarg differences. */
6276 if (argadj)
6277 offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, -argadj);
6278 else
6279 offset = crtl->args.arg_offset_rtx;
6281 /* We need to save %r26 .. %r19 inclusive starting at offset -64
6282 from the incoming arg pointer and growing to larger addresses. */
6283 for (i = 26, off = -64; i >= 19; i--, off += 8)
6284 emit_move_insn (gen_rtx_MEM (word_mode,
6285 plus_constant (Pmode,
6286 arg_pointer_rtx, off)),
6287 gen_rtx_REG (word_mode, i));
6289 /* The incoming args pointer points just beyond the flushback area;
6290 normally this is not a serious concern. However, when we are doing
6291 varargs/stdargs we want to make the arg pointer point to the start
6292 of the incoming argument area. */
6293 emit_move_insn (virtual_incoming_args_rtx,
6294 plus_constant (Pmode, arg_pointer_rtx, -64));
6296 /* Now return a pointer to the first anonymous argument. */
6297 return copy_to_reg (expand_binop (Pmode, add_optab,
6298 virtual_incoming_args_rtx,
6299 offset, 0, 0, OPTAB_LIB_WIDEN));
6302 /* Store general registers on the stack. */
6303 dest = gen_rtx_MEM (BLKmode,
6304 plus_constant (Pmode, crtl->args.internal_arg_pointer,
6305 -16));
6306 set_mem_alias_set (dest, get_varargs_alias_set ());
6307 set_mem_align (dest, BITS_PER_WORD);
6308 move_block_from_reg (23, dest, 4);
6310 /* move_block_from_reg will emit code to store the argument registers
6311 individually as scalar stores.
6313 However, other insns may later load from the same addresses for
6314 a structure load (passing a struct to a varargs routine).
6316 The alias code assumes that such aliasing can never happen, so we
6317 have to keep memory referencing insns from moving up beyond the
6318 last argument register store. So we emit a blockage insn here. */
6319 emit_insn (gen_blockage ());
6321 return copy_to_reg (expand_binop (Pmode, add_optab,
6322 crtl->args.internal_arg_pointer,
6323 offset, 0, 0, OPTAB_LIB_WIDEN));
6326 static void
6327 hppa_va_start (tree valist, rtx nextarg)
6329 nextarg = expand_builtin_saveregs ();
6330 std_expand_builtin_va_start (valist, nextarg);
6333 static tree
6334 hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6335 gimple_seq *post_p)
6337 if (TARGET_64BIT)
6339 /* Args grow upward. We can use the generic routines. */
6340 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6342 else /* !TARGET_64BIT */
6344 tree ptr = build_pointer_type (type);
6345 tree valist_type;
6346 tree t, u;
6347 unsigned int size, ofs;
6348 bool indirect;
6350 indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0);
6351 if (indirect)
6353 type = ptr;
6354 ptr = build_pointer_type (type);
6356 size = int_size_in_bytes (type);
6357 valist_type = TREE_TYPE (valist);
6359 /* Args grow down. Not handled by generic routines. */
6361 u = fold_convert (sizetype, size_in_bytes (type));
6362 u = fold_build1 (NEGATE_EXPR, sizetype, u);
6363 t = fold_build_pointer_plus (valist, u);
6365 /* Align to 4 or 8 byte boundary depending on argument size. */
6367 u = build_int_cst (TREE_TYPE (t), (HOST_WIDE_INT)(size > 4 ? -8 : -4));
6368 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u);
6369 t = fold_convert (valist_type, t);
6371 t = build2 (MODIFY_EXPR, valist_type, valist, t);
6373 ofs = (8 - size) % 4;
6374 if (ofs != 0)
6375 t = fold_build_pointer_plus_hwi (t, ofs);
6377 t = fold_convert (ptr, t);
6378 t = build_va_arg_indirect_ref (t);
6380 if (indirect)
6381 t = build_va_arg_indirect_ref (t);
6383 return t;
6387 /* True if MODE is valid for the target. By "valid", we mean able to
6388 be manipulated in non-trivial ways. In particular, this means all
6389 the arithmetic is supported.
6391 Currently, TImode is not valid as the HP 64-bit runtime documentation
6392 doesn't document the alignment and calling conventions for this type.
6393 Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
6394 2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE. */
6396 static bool
6397 pa_scalar_mode_supported_p (machine_mode mode)
6399 int precision = GET_MODE_PRECISION (mode);
6401 switch (GET_MODE_CLASS (mode))
6403 case MODE_PARTIAL_INT:
6404 case MODE_INT:
6405 if (precision == CHAR_TYPE_SIZE)
6406 return true;
6407 if (precision == SHORT_TYPE_SIZE)
6408 return true;
6409 if (precision == INT_TYPE_SIZE)
6410 return true;
6411 if (precision == LONG_TYPE_SIZE)
6412 return true;
6413 if (precision == LONG_LONG_TYPE_SIZE)
6414 return true;
6415 return false;
6417 case MODE_FLOAT:
6418 if (precision == FLOAT_TYPE_SIZE)
6419 return true;
6420 if (precision == DOUBLE_TYPE_SIZE)
6421 return true;
6422 if (precision == LONG_DOUBLE_TYPE_SIZE)
6423 return true;
6424 return false;
6426 case MODE_DECIMAL_FLOAT:
6427 return false;
6429 default:
6430 gcc_unreachable ();
6434 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
6435 it branches into the delay slot. Otherwise, return FALSE. */
6437 static bool
6438 branch_to_delay_slot_p (rtx_insn *insn)
6440 rtx_insn *jump_insn;
6442 if (dbr_sequence_length ())
6443 return FALSE;
6445 jump_insn = next_active_insn (JUMP_LABEL (insn));
6446 while (insn)
6448 insn = next_active_insn (insn);
6449 if (jump_insn == insn)
6450 return TRUE;
6452 /* We can't rely on the length of asms. So, we return FALSE when
6453 the branch is followed by an asm. */
6454 if (!insn
6455 || GET_CODE (PATTERN (insn)) == ASM_INPUT
6456 || asm_noperands (PATTERN (insn)) >= 0
6457 || get_attr_length (insn) > 0)
6458 break;
6461 return FALSE;
6464 /* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot.
6466 This occurs when INSN has an unfilled delay slot and is followed
6467 by an asm. Disaster can occur if the asm is empty and the jump
6468 branches into the delay slot. So, we add a nop in the delay slot
6469 when this occurs. */
6471 static bool
6472 branch_needs_nop_p (rtx_insn *insn)
6474 rtx_insn *jump_insn;
6476 if (dbr_sequence_length ())
6477 return FALSE;
6479 jump_insn = next_active_insn (JUMP_LABEL (insn));
6480 while (insn)
6482 insn = next_active_insn (insn);
6483 if (!insn || jump_insn == insn)
6484 return TRUE;
6486 if (!(GET_CODE (PATTERN (insn)) == ASM_INPUT
6487 || asm_noperands (PATTERN (insn)) >= 0)
6488 && get_attr_length (insn) > 0)
6489 break;
6492 return FALSE;
6495 /* Return TRUE if INSN, a forward jump insn, can use nullification
6496 to skip the following instruction. This avoids an extra cycle due
6497 to a mis-predicted branch when we fall through. */
6499 static bool
6500 use_skip_p (rtx_insn *insn)
6502 rtx_insn *jump_insn = next_active_insn (JUMP_LABEL (insn));
6504 while (insn)
6506 insn = next_active_insn (insn);
6508 /* We can't rely on the length of asms, so we can't skip asms. */
6509 if (!insn
6510 || GET_CODE (PATTERN (insn)) == ASM_INPUT
6511 || asm_noperands (PATTERN (insn)) >= 0)
6512 break;
6513 if (get_attr_length (insn) == 4
6514 && jump_insn == next_active_insn (insn))
6515 return TRUE;
6516 if (get_attr_length (insn) > 0)
6517 break;
6520 return FALSE;
6523 /* This routine handles all the normal conditional branch sequences we
6524 might need to generate. It handles compare immediate vs compare
6525 register, nullification of delay slots, varying length branches,
6526 negated branches, and all combinations of the above. It returns the
6527 output appropriate to emit the branch corresponding to all given
6528 parameters. */
6530 const char *
6531 pa_output_cbranch (rtx *operands, int negated, rtx_insn *insn)
6533 static char buf[100];
6534 bool useskip;
6535 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6536 int length = get_attr_length (insn);
6537 int xdelay;
6539 /* A conditional branch to the following instruction (e.g. the delay slot)
6540 is asking for a disaster. This can happen when not optimizing and
6541 when jump optimization fails.
6543 While it is usually safe to emit nothing, this can fail if the
6544 preceding instruction is a nullified branch with an empty delay
6545 slot and the same branch target as this branch. We could check
6546 for this but jump optimization should eliminate nop jumps. It
6547 is always safe to emit a nop. */
6548 if (branch_to_delay_slot_p (insn))
6549 return "nop";
6551 /* The doubleword form of the cmpib instruction doesn't have the LEU
6552 and GTU conditions while the cmpb instruction does. Since we accept
6553 zero for cmpb, we must ensure that we use cmpb for the comparison. */
6554 if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6555 operands[2] = gen_rtx_REG (DImode, 0);
6556 if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
6557 operands[1] = gen_rtx_REG (DImode, 0);
6559 /* If this is a long branch with its delay slot unfilled, set `nullify'
6560 as it can nullify the delay slot and save a nop. */
6561 if (length == 8 && dbr_sequence_length () == 0)
6562 nullify = 1;
6564 /* If this is a short forward conditional branch which did not get
6565 its delay slot filled, the delay slot can still be nullified. */
6566 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6567 nullify = forward_branch_p (insn);
6569 /* A forward branch over a single nullified insn can be done with a
6570 comclr instruction. This avoids a single cycle penalty due to
6571 mis-predicted branch if we fall through (branch not taken). */
6572 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6574 switch (length)
6576 /* All short conditional branches except backwards with an unfilled
6577 delay slot. */
6578 case 4:
6579 if (useskip)
6580 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6581 else
6582 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6583 if (GET_MODE (operands[1]) == DImode)
6584 strcat (buf, "*");
6585 if (negated)
6586 strcat (buf, "%B3");
6587 else
6588 strcat (buf, "%S3");
6589 if (useskip)
6590 strcat (buf, " %2,%r1,%%r0");
6591 else if (nullify)
6593 if (branch_needs_nop_p (insn))
6594 strcat (buf, ",n %2,%r1,%0%#");
6595 else
6596 strcat (buf, ",n %2,%r1,%0");
6598 else
6599 strcat (buf, " %2,%r1,%0");
6600 break;
6602 /* All long conditionals. Note a short backward branch with an
6603 unfilled delay slot is treated just like a long backward branch
6604 with an unfilled delay slot. */
6605 case 8:
6606 /* Handle weird backwards branch with a filled delay slot
6607 which is nullified. */
6608 if (dbr_sequence_length () != 0
6609 && ! forward_branch_p (insn)
6610 && nullify)
6612 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6613 if (GET_MODE (operands[1]) == DImode)
6614 strcat (buf, "*");
6615 if (negated)
6616 strcat (buf, "%S3");
6617 else
6618 strcat (buf, "%B3");
6619 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6621 /* Handle short backwards branch with an unfilled delay slot.
6622 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6623 taken and untaken branches. */
6624 else if (dbr_sequence_length () == 0
6625 && ! forward_branch_p (insn)
6626 && INSN_ADDRESSES_SET_P ()
6627 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6628 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6630 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6631 if (GET_MODE (operands[1]) == DImode)
6632 strcat (buf, "*");
6633 if (negated)
6634 strcat (buf, "%B3 %2,%r1,%0%#");
6635 else
6636 strcat (buf, "%S3 %2,%r1,%0%#");
6638 else
6640 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6641 if (GET_MODE (operands[1]) == DImode)
6642 strcat (buf, "*");
6643 if (negated)
6644 strcat (buf, "%S3");
6645 else
6646 strcat (buf, "%B3");
6647 if (nullify)
6648 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6649 else
6650 strcat (buf, " %2,%r1,%%r0\n\tb %0");
6652 break;
6654 default:
6655 /* The reversed conditional branch must branch over one additional
6656 instruction if the delay slot is filled and needs to be extracted
6657 by pa_output_lbranch. If the delay slot is empty or this is a
6658 nullified forward branch, the instruction after the reversed
6659 condition branch must be nullified. */
6660 if (dbr_sequence_length () == 0
6661 || (nullify && forward_branch_p (insn)))
6663 nullify = 1;
6664 xdelay = 0;
6665 operands[4] = GEN_INT (length);
6667 else
6669 xdelay = 1;
6670 operands[4] = GEN_INT (length + 4);
6673 /* Create a reversed conditional branch which branches around
6674 the following insns. */
6675 if (GET_MODE (operands[1]) != DImode)
6677 if (nullify)
6679 if (negated)
6680 strcpy (buf,
6681 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6682 else
6683 strcpy (buf,
6684 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6686 else
6688 if (negated)
6689 strcpy (buf,
6690 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6691 else
6692 strcpy (buf,
6693 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6696 else
6698 if (nullify)
6700 if (negated)
6701 strcpy (buf,
6702 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6703 else
6704 strcpy (buf,
6705 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6707 else
6709 if (negated)
6710 strcpy (buf,
6711 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6712 else
6713 strcpy (buf,
6714 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6718 output_asm_insn (buf, operands);
6719 return pa_output_lbranch (operands[0], insn, xdelay);
6721 return buf;
6724 /* Output a PIC pc-relative instruction sequence to load the address of
6725 OPERANDS[0] to register OPERANDS[2]. OPERANDS[0] is a symbol ref
6726 or a code label. OPERANDS[1] specifies the register to use to load
6727 the program counter. OPERANDS[3] may be used for label generation
6728 The sequence is always three instructions in length. The program
6729 counter recorded for PA 1.X is eight bytes more than that for PA 2.0.
6730 Register %r1 is clobbered. */
6732 static void
6733 pa_output_pic_pcrel_sequence (rtx *operands)
6735 gcc_assert (SYMBOL_REF_P (operands[0]) || LABEL_P (operands[0]));
6736 if (TARGET_PA_20)
6738 /* We can use mfia to determine the current program counter. */
6739 if (TARGET_SOM || !TARGET_GAS)
6741 operands[3] = gen_label_rtx ();
6742 targetm.asm_out.internal_label (asm_out_file, "L",
6743 CODE_LABEL_NUMBER (operands[3]));
6744 output_asm_insn ("mfia %1", operands);
6745 output_asm_insn ("addil L'%0-%l3,%1", operands);
6746 output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands);
6748 else
6750 output_asm_insn ("mfia %1", operands);
6751 output_asm_insn ("addil L'%0-$PIC_pcrel$0+12,%1", operands);
6752 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+16(%%r1),%2", operands);
6755 else
6757 /* We need to use a branch to determine the current program counter. */
6758 output_asm_insn ("{bl|b,l} .+8,%1", operands);
6759 if (TARGET_SOM || !TARGET_GAS)
6761 operands[3] = gen_label_rtx ();
6762 output_asm_insn ("addil L'%0-%l3,%1", operands);
6763 targetm.asm_out.internal_label (asm_out_file, "L",
6764 CODE_LABEL_NUMBER (operands[3]));
6765 output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands);
6767 else
6769 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%1", operands);
6770 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%2", operands);
6775 /* This routine handles output of long unconditional branches that
6776 exceed the maximum range of a simple branch instruction. Since
6777 we don't have a register available for the branch, we save register
6778 %r1 in the frame marker, load the branch destination DEST into %r1,
6779 execute the branch, and restore %r1 in the delay slot of the branch.
6781 Since long branches may have an insn in the delay slot and the
6782 delay slot is used to restore %r1, we in general need to extract
6783 this insn and execute it before the branch. However, to facilitate
6784 use of this function by conditional branches, we also provide an
6785 option to not extract the delay insn so that it will be emitted
6786 after the long branch. So, if there is an insn in the delay slot,
6787 it is extracted if XDELAY is nonzero.
6789 The lengths of the various long-branch sequences are 20, 16 and 24
6790 bytes for the portable runtime, non-PIC and PIC cases, respectively. */
6792 const char *
6793 pa_output_lbranch (rtx dest, rtx_insn *insn, int xdelay)
6795 rtx xoperands[4];
6797 xoperands[0] = dest;
6799 /* First, free up the delay slot. */
6800 if (xdelay && dbr_sequence_length () != 0)
6802 /* We can't handle a jump in the delay slot. */
6803 gcc_assert (! JUMP_P (NEXT_INSN (insn)));
6805 final_scan_insn (NEXT_INSN (insn), asm_out_file,
6806 optimize, 0, NULL);
6808 /* Now delete the delay insn. */
6809 SET_INSN_DELETED (NEXT_INSN (insn));
6812 /* Output an insn to save %r1. The runtime documentation doesn't
6813 specify whether the "Clean Up" slot in the callers frame can
6814 be clobbered by the callee. It isn't copied by HP's builtin
6815 alloca, so this suggests that it can be clobbered if necessary.
6816 The "Static Link" location is copied by HP builtin alloca, so
6817 we avoid using it. Using the cleanup slot might be a problem
6818 if we have to interoperate with languages that pass cleanup
6819 information. However, it should be possible to handle these
6820 situations with GCC's asm feature.
6822 The "Current RP" slot is reserved for the called procedure, so
6823 we try to use it when we don't have a frame of our own. It's
6824 rather unlikely that we won't have a frame when we need to emit
6825 a very long branch.
6827 Really the way to go long term is a register scavenger; goto
6828 the target of the jump and find a register which we can use
6829 as a scratch to hold the value in %r1. Then, we wouldn't have
6830 to free up the delay slot or clobber a slot that may be needed
6831 for other purposes. */
6832 if (TARGET_64BIT)
6834 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6835 /* Use the return pointer slot in the frame marker. */
6836 output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
6837 else
6838 /* Use the slot at -40 in the frame marker since HP builtin
6839 alloca doesn't copy it. */
6840 output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
6842 else
6844 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6845 /* Use the return pointer slot in the frame marker. */
6846 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
6847 else
6848 /* Use the "Clean Up" slot in the frame marker. In GCC,
6849 the only other use of this location is for copying a
6850 floating point double argument from a floating-point
6851 register to two general registers. The copy is done
6852 as an "atomic" operation when outputting a call, so it
6853 won't interfere with our using the location here. */
6854 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
6857 if (TARGET_PORTABLE_RUNTIME)
6859 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6860 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6861 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6863 else if (flag_pic)
6865 xoperands[1] = gen_rtx_REG (Pmode, 1);
6866 xoperands[2] = xoperands[1];
6867 pa_output_pic_pcrel_sequence (xoperands);
6868 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6870 else
6871 /* Now output a very long branch to the original target. */
6872 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
6874 /* Now restore the value of %r1 in the delay slot. */
6875 if (TARGET_64BIT)
6877 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6878 return "ldd -16(%%r30),%%r1";
6879 else
6880 return "ldd -40(%%r30),%%r1";
6882 else
6884 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6885 return "ldw -20(%%r30),%%r1";
6886 else
6887 return "ldw -12(%%r30),%%r1";
6891 /* This routine handles all the branch-on-bit conditional branch sequences we
6892 might need to generate. It handles nullification of delay slots,
6893 varying length branches, negated branches and all combinations of the
6894 above. it returns the appropriate output template to emit the branch. */
6896 const char *
6897 pa_output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn, int which)
6899 static char buf[100];
6900 bool useskip;
6901 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6902 int length = get_attr_length (insn);
6903 int xdelay;
6905 /* A conditional branch to the following instruction (e.g. the delay slot) is
6906 asking for a disaster. I do not think this can happen as this pattern
6907 is only used when optimizing; jump optimization should eliminate the
6908 jump. But be prepared just in case. */
6910 if (branch_to_delay_slot_p (insn))
6911 return "nop";
6913 /* If this is a long branch with its delay slot unfilled, set `nullify'
6914 as it can nullify the delay slot and save a nop. */
6915 if (length == 8 && dbr_sequence_length () == 0)
6916 nullify = 1;
6918 /* If this is a short forward conditional branch which did not get
6919 its delay slot filled, the delay slot can still be nullified. */
6920 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6921 nullify = forward_branch_p (insn);
6923 /* A forward branch over a single nullified insn can be done with a
6924 extrs instruction. This avoids a single cycle penalty due to
6925 mis-predicted branch if we fall through (branch not taken). */
6926 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6928 switch (length)
6931 /* All short conditional branches except backwards with an unfilled
6932 delay slot. */
6933 case 4:
6934 if (useskip)
6935 strcpy (buf, "{extrs,|extrw,s,}");
6936 else
6937 strcpy (buf, "bb,");
6938 if (useskip && GET_MODE (operands[0]) == DImode)
6939 strcpy (buf, "extrd,s,*");
6940 else if (GET_MODE (operands[0]) == DImode)
6941 strcpy (buf, "bb,*");
6942 if ((which == 0 && negated)
6943 || (which == 1 && ! negated))
6944 strcat (buf, ">=");
6945 else
6946 strcat (buf, "<");
6947 if (useskip)
6948 strcat (buf, " %0,%1,1,%%r0");
6949 else if (nullify && negated)
6951 if (branch_needs_nop_p (insn))
6952 strcat (buf, ",n %0,%1,%3%#");
6953 else
6954 strcat (buf, ",n %0,%1,%3");
6956 else if (nullify && ! negated)
6958 if (branch_needs_nop_p (insn))
6959 strcat (buf, ",n %0,%1,%2%#");
6960 else
6961 strcat (buf, ",n %0,%1,%2");
6963 else if (! nullify && negated)
6964 strcat (buf, " %0,%1,%3");
6965 else if (! nullify && ! negated)
6966 strcat (buf, " %0,%1,%2");
6967 break;
6969 /* All long conditionals. Note a short backward branch with an
6970 unfilled delay slot is treated just like a long backward branch
6971 with an unfilled delay slot. */
6972 case 8:
6973 /* Handle weird backwards branch with a filled delay slot
6974 which is nullified. */
6975 if (dbr_sequence_length () != 0
6976 && ! forward_branch_p (insn)
6977 && nullify)
6979 strcpy (buf, "bb,");
6980 if (GET_MODE (operands[0]) == DImode)
6981 strcat (buf, "*");
6982 if ((which == 0 && negated)
6983 || (which == 1 && ! negated))
6984 strcat (buf, "<");
6985 else
6986 strcat (buf, ">=");
6987 if (negated)
6988 strcat (buf, ",n %0,%1,.+12\n\tb %3");
6989 else
6990 strcat (buf, ",n %0,%1,.+12\n\tb %2");
6992 /* Handle short backwards branch with an unfilled delay slot.
6993 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6994 taken and untaken branches. */
6995 else if (dbr_sequence_length () == 0
6996 && ! forward_branch_p (insn)
6997 && INSN_ADDRESSES_SET_P ()
6998 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6999 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7001 strcpy (buf, "bb,");
7002 if (GET_MODE (operands[0]) == DImode)
7003 strcat (buf, "*");
7004 if ((which == 0 && negated)
7005 || (which == 1 && ! negated))
7006 strcat (buf, ">=");
7007 else
7008 strcat (buf, "<");
7009 if (negated)
7010 strcat (buf, " %0,%1,%3%#");
7011 else
7012 strcat (buf, " %0,%1,%2%#");
7014 else
7016 if (GET_MODE (operands[0]) == DImode)
7017 strcpy (buf, "extrd,s,*");
7018 else
7019 strcpy (buf, "{extrs,|extrw,s,}");
7020 if ((which == 0 && negated)
7021 || (which == 1 && ! negated))
7022 strcat (buf, "<");
7023 else
7024 strcat (buf, ">=");
7025 if (nullify && negated)
7026 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
7027 else if (nullify && ! negated)
7028 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
7029 else if (negated)
7030 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
7031 else
7032 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
7034 break;
7036 default:
7037 /* The reversed conditional branch must branch over one additional
7038 instruction if the delay slot is filled and needs to be extracted
7039 by pa_output_lbranch. If the delay slot is empty or this is a
7040 nullified forward branch, the instruction after the reversed
7041 condition branch must be nullified. */
7042 if (dbr_sequence_length () == 0
7043 || (nullify && forward_branch_p (insn)))
7045 nullify = 1;
7046 xdelay = 0;
7047 operands[4] = GEN_INT (length);
7049 else
7051 xdelay = 1;
7052 operands[4] = GEN_INT (length + 4);
7055 if (GET_MODE (operands[0]) == DImode)
7056 strcpy (buf, "bb,*");
7057 else
7058 strcpy (buf, "bb,");
7059 if ((which == 0 && negated)
7060 || (which == 1 && !negated))
7061 strcat (buf, "<");
7062 else
7063 strcat (buf, ">=");
7064 if (nullify)
7065 strcat (buf, ",n %0,%1,.+%4");
7066 else
7067 strcat (buf, " %0,%1,.+%4");
7068 output_asm_insn (buf, operands);
7069 return pa_output_lbranch (negated ? operands[3] : operands[2],
7070 insn, xdelay);
7072 return buf;
7075 /* This routine handles all the branch-on-variable-bit conditional branch
7076 sequences we might need to generate. It handles nullification of delay
7077 slots, varying length branches, negated branches and all combinations
7078 of the above. it returns the appropriate output template to emit the
7079 branch. */
7081 const char *
7082 pa_output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn,
7083 int which)
7085 static char buf[100];
7086 bool useskip;
7087 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7088 int length = get_attr_length (insn);
7089 int xdelay;
7091 /* A conditional branch to the following instruction (e.g. the delay slot) is
7092 asking for a disaster. I do not think this can happen as this pattern
7093 is only used when optimizing; jump optimization should eliminate the
7094 jump. But be prepared just in case. */
7096 if (branch_to_delay_slot_p (insn))
7097 return "nop";
7099 /* If this is a long branch with its delay slot unfilled, set `nullify'
7100 as it can nullify the delay slot and save a nop. */
7101 if (length == 8 && dbr_sequence_length () == 0)
7102 nullify = 1;
7104 /* If this is a short forward conditional branch which did not get
7105 its delay slot filled, the delay slot can still be nullified. */
7106 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7107 nullify = forward_branch_p (insn);
7109 /* A forward branch over a single nullified insn can be done with a
7110 extrs instruction. This avoids a single cycle penalty due to
7111 mis-predicted branch if we fall through (branch not taken). */
7112 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
7114 switch (length)
7117 /* All short conditional branches except backwards with an unfilled
7118 delay slot. */
7119 case 4:
7120 if (useskip)
7121 strcpy (buf, "{vextrs,|extrw,s,}");
7122 else
7123 strcpy (buf, "{bvb,|bb,}");
7124 if (useskip && GET_MODE (operands[0]) == DImode)
7125 strcpy (buf, "extrd,s,*");
7126 else if (GET_MODE (operands[0]) == DImode)
7127 strcpy (buf, "bb,*");
7128 if ((which == 0 && negated)
7129 || (which == 1 && ! negated))
7130 strcat (buf, ">=");
7131 else
7132 strcat (buf, "<");
7133 if (useskip)
7134 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
7135 else if (nullify && negated)
7137 if (branch_needs_nop_p (insn))
7138 strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
7139 else
7140 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
7142 else if (nullify && ! negated)
7144 if (branch_needs_nop_p (insn))
7145 strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
7146 else
7147 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
7149 else if (! nullify && negated)
7150 strcat (buf, "{ %0,%3| %0,%%sar,%3}");
7151 else if (! nullify && ! negated)
7152 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
7153 break;
7155 /* All long conditionals. Note a short backward branch with an
7156 unfilled delay slot is treated just like a long backward branch
7157 with an unfilled delay slot. */
7158 case 8:
7159 /* Handle weird backwards branch with a filled delay slot
7160 which is nullified. */
7161 if (dbr_sequence_length () != 0
7162 && ! forward_branch_p (insn)
7163 && nullify)
7165 strcpy (buf, "{bvb,|bb,}");
7166 if (GET_MODE (operands[0]) == DImode)
7167 strcat (buf, "*");
7168 if ((which == 0 && negated)
7169 || (which == 1 && ! negated))
7170 strcat (buf, "<");
7171 else
7172 strcat (buf, ">=");
7173 if (negated)
7174 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
7175 else
7176 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
7178 /* Handle short backwards branch with an unfilled delay slot.
7179 Using a bb;nop rather than extrs;bl saves 1 cycle for both
7180 taken and untaken branches. */
7181 else if (dbr_sequence_length () == 0
7182 && ! forward_branch_p (insn)
7183 && INSN_ADDRESSES_SET_P ()
7184 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7185 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7187 strcpy (buf, "{bvb,|bb,}");
7188 if (GET_MODE (operands[0]) == DImode)
7189 strcat (buf, "*");
7190 if ((which == 0 && negated)
7191 || (which == 1 && ! negated))
7192 strcat (buf, ">=");
7193 else
7194 strcat (buf, "<");
7195 if (negated)
7196 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
7197 else
7198 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
7200 else
7202 strcpy (buf, "{vextrs,|extrw,s,}");
7203 if (GET_MODE (operands[0]) == DImode)
7204 strcpy (buf, "extrd,s,*");
7205 if ((which == 0 && negated)
7206 || (which == 1 && ! negated))
7207 strcat (buf, "<");
7208 else
7209 strcat (buf, ">=");
7210 if (nullify && negated)
7211 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
7212 else if (nullify && ! negated)
7213 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
7214 else if (negated)
7215 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
7216 else
7217 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
7219 break;
7221 default:
7222 /* The reversed conditional branch must branch over one additional
7223 instruction if the delay slot is filled and needs to be extracted
7224 by pa_output_lbranch. If the delay slot is empty or this is a
7225 nullified forward branch, the instruction after the reversed
7226 condition branch must be nullified. */
7227 if (dbr_sequence_length () == 0
7228 || (nullify && forward_branch_p (insn)))
7230 nullify = 1;
7231 xdelay = 0;
7232 operands[4] = GEN_INT (length);
7234 else
7236 xdelay = 1;
7237 operands[4] = GEN_INT (length + 4);
7240 if (GET_MODE (operands[0]) == DImode)
7241 strcpy (buf, "bb,*");
7242 else
7243 strcpy (buf, "{bvb,|bb,}");
7244 if ((which == 0 && negated)
7245 || (which == 1 && !negated))
7246 strcat (buf, "<");
7247 else
7248 strcat (buf, ">=");
7249 if (nullify)
7250 strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
7251 else
7252 strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
7253 output_asm_insn (buf, operands);
7254 return pa_output_lbranch (negated ? operands[3] : operands[2],
7255 insn, xdelay);
7257 return buf;
7260 /* Return the output template for emitting a dbra type insn.
7262 Note it may perform some output operations on its own before
7263 returning the final output string. */
7264 const char *
7265 pa_output_dbra (rtx *operands, rtx_insn *insn, int which_alternative)
7267 int length = get_attr_length (insn);
7269 /* A conditional branch to the following instruction (e.g. the delay slot) is
7270 asking for a disaster. Be prepared! */
7272 if (branch_to_delay_slot_p (insn))
7274 if (which_alternative == 0)
7275 return "ldo %1(%0),%0";
7276 else if (which_alternative == 1)
7278 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
7279 output_asm_insn ("ldw -16(%%r30),%4", operands);
7280 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7281 return "{fldws|fldw} -16(%%r30),%0";
7283 else
7285 output_asm_insn ("ldw %0,%4", operands);
7286 return "ldo %1(%4),%4\n\tstw %4,%0";
7290 if (which_alternative == 0)
7292 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7293 int xdelay;
7295 /* If this is a long branch with its delay slot unfilled, set `nullify'
7296 as it can nullify the delay slot and save a nop. */
7297 if (length == 8 && dbr_sequence_length () == 0)
7298 nullify = 1;
7300 /* If this is a short forward conditional branch which did not get
7301 its delay slot filled, the delay slot can still be nullified. */
7302 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7303 nullify = forward_branch_p (insn);
7305 switch (length)
7307 case 4:
7308 if (nullify)
7310 if (branch_needs_nop_p (insn))
7311 return "addib,%C2,n %1,%0,%3%#";
7312 else
7313 return "addib,%C2,n %1,%0,%3";
7315 else
7316 return "addib,%C2 %1,%0,%3";
7318 case 8:
7319 /* Handle weird backwards branch with a fulled delay slot
7320 which is nullified. */
7321 if (dbr_sequence_length () != 0
7322 && ! forward_branch_p (insn)
7323 && nullify)
7324 return "addib,%N2,n %1,%0,.+12\n\tb %3";
7325 /* Handle short backwards branch with an unfilled delay slot.
7326 Using a addb;nop rather than addi;bl saves 1 cycle for both
7327 taken and untaken branches. */
7328 else if (dbr_sequence_length () == 0
7329 && ! forward_branch_p (insn)
7330 && INSN_ADDRESSES_SET_P ()
7331 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7332 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7333 return "addib,%C2 %1,%0,%3%#";
7335 /* Handle normal cases. */
7336 if (nullify)
7337 return "addi,%N2 %1,%0,%0\n\tb,n %3";
7338 else
7339 return "addi,%N2 %1,%0,%0\n\tb %3";
7341 default:
7342 /* The reversed conditional branch must branch over one additional
7343 instruction if the delay slot is filled and needs to be extracted
7344 by pa_output_lbranch. If the delay slot is empty or this is a
7345 nullified forward branch, the instruction after the reversed
7346 condition branch must be nullified. */
7347 if (dbr_sequence_length () == 0
7348 || (nullify && forward_branch_p (insn)))
7350 nullify = 1;
7351 xdelay = 0;
7352 operands[4] = GEN_INT (length);
7354 else
7356 xdelay = 1;
7357 operands[4] = GEN_INT (length + 4);
7360 if (nullify)
7361 output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
7362 else
7363 output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
7365 return pa_output_lbranch (operands[3], insn, xdelay);
7369 /* Deal with gross reload from FP register case. */
7370 else if (which_alternative == 1)
7372 /* Move loop counter from FP register to MEM then into a GR,
7373 increment the GR, store the GR into MEM, and finally reload
7374 the FP register from MEM from within the branch's delay slot. */
7375 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
7376 operands);
7377 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7378 if (length == 24)
7379 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
7380 else if (length == 28)
7381 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7382 else
7384 operands[5] = GEN_INT (length - 16);
7385 output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
7386 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7387 return pa_output_lbranch (operands[3], insn, 0);
7390 /* Deal with gross reload from memory case. */
7391 else
7393 /* Reload loop counter from memory, the store back to memory
7394 happens in the branch's delay slot. */
7395 output_asm_insn ("ldw %0,%4", operands);
7396 if (length == 12)
7397 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
7398 else if (length == 16)
7399 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
7400 else
7402 operands[5] = GEN_INT (length - 4);
7403 output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
7404 return pa_output_lbranch (operands[3], insn, 0);
7409 /* Return the output template for emitting a movb type insn.
7411 Note it may perform some output operations on its own before
7412 returning the final output string. */
7413 const char *
7414 pa_output_movb (rtx *operands, rtx_insn *insn, int which_alternative,
7415 int reverse_comparison)
7417 int length = get_attr_length (insn);
7419 /* A conditional branch to the following instruction (e.g. the delay slot) is
7420 asking for a disaster. Be prepared! */
7422 if (branch_to_delay_slot_p (insn))
7424 if (which_alternative == 0)
7425 return "copy %1,%0";
7426 else if (which_alternative == 1)
7428 output_asm_insn ("stw %1,-16(%%r30)", operands);
7429 return "{fldws|fldw} -16(%%r30),%0";
7431 else if (which_alternative == 2)
7432 return "stw %1,%0";
7433 else
7434 return "mtsar %r1";
7437 /* Support the second variant. */
7438 if (reverse_comparison)
7439 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
7441 if (which_alternative == 0)
7443 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7444 int xdelay;
7446 /* If this is a long branch with its delay slot unfilled, set `nullify'
7447 as it can nullify the delay slot and save a nop. */
7448 if (length == 8 && dbr_sequence_length () == 0)
7449 nullify = 1;
7451 /* If this is a short forward conditional branch which did not get
7452 its delay slot filled, the delay slot can still be nullified. */
7453 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7454 nullify = forward_branch_p (insn);
7456 switch (length)
7458 case 4:
7459 if (nullify)
7461 if (branch_needs_nop_p (insn))
7462 return "movb,%C2,n %1,%0,%3%#";
7463 else
7464 return "movb,%C2,n %1,%0,%3";
7466 else
7467 return "movb,%C2 %1,%0,%3";
7469 case 8:
7470 /* Handle weird backwards branch with a filled delay slot
7471 which is nullified. */
7472 if (dbr_sequence_length () != 0
7473 && ! forward_branch_p (insn)
7474 && nullify)
7475 return "movb,%N2,n %1,%0,.+12\n\tb %3";
7477 /* Handle short backwards branch with an unfilled delay slot.
7478 Using a movb;nop rather than or;bl saves 1 cycle for both
7479 taken and untaken branches. */
7480 else if (dbr_sequence_length () == 0
7481 && ! forward_branch_p (insn)
7482 && INSN_ADDRESSES_SET_P ()
7483 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7484 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7485 return "movb,%C2 %1,%0,%3%#";
7486 /* Handle normal cases. */
7487 if (nullify)
7488 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
7489 else
7490 return "or,%N2 %1,%%r0,%0\n\tb %3";
7492 default:
7493 /* The reversed conditional branch must branch over one additional
7494 instruction if the delay slot is filled and needs to be extracted
7495 by pa_output_lbranch. If the delay slot is empty or this is a
7496 nullified forward branch, the instruction after the reversed
7497 condition branch must be nullified. */
7498 if (dbr_sequence_length () == 0
7499 || (nullify && forward_branch_p (insn)))
7501 nullify = 1;
7502 xdelay = 0;
7503 operands[4] = GEN_INT (length);
7505 else
7507 xdelay = 1;
7508 operands[4] = GEN_INT (length + 4);
7511 if (nullify)
7512 output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
7513 else
7514 output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
7516 return pa_output_lbranch (operands[3], insn, xdelay);
7519 /* Deal with gross reload for FP destination register case. */
7520 else if (which_alternative == 1)
7522 /* Move source register to MEM, perform the branch test, then
7523 finally load the FP register from MEM from within the branch's
7524 delay slot. */
7525 output_asm_insn ("stw %1,-16(%%r30)", operands);
7526 if (length == 12)
7527 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7528 else if (length == 16)
7529 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7530 else
7532 operands[4] = GEN_INT (length - 4);
7533 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
7534 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7535 return pa_output_lbranch (operands[3], insn, 0);
7538 /* Deal with gross reload from memory case. */
7539 else if (which_alternative == 2)
7541 /* Reload loop counter from memory, the store back to memory
7542 happens in the branch's delay slot. */
7543 if (length == 8)
7544 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7545 else if (length == 12)
7546 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7547 else
7549 operands[4] = GEN_INT (length);
7550 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7551 operands);
7552 return pa_output_lbranch (operands[3], insn, 0);
7555 /* Handle SAR as a destination. */
7556 else
7558 if (length == 8)
7559 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7560 else if (length == 12)
7561 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7562 else
7564 operands[4] = GEN_INT (length);
7565 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7566 operands);
7567 return pa_output_lbranch (operands[3], insn, 0);
7572 /* Copy any FP arguments in INSN into integer registers. */
7573 static void
7574 copy_fp_args (rtx_insn *insn)
7576 rtx link;
7577 rtx xoperands[2];
7579 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7581 int arg_mode, regno;
7582 rtx use = XEXP (link, 0);
7584 if (! (GET_CODE (use) == USE
7585 && GET_CODE (XEXP (use, 0)) == REG
7586 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7587 continue;
7589 arg_mode = GET_MODE (XEXP (use, 0));
7590 regno = REGNO (XEXP (use, 0));
7592 /* Is it a floating point register? */
7593 if (regno >= 32 && regno <= 39)
7595 /* Copy the FP register into an integer register via memory. */
7596 if (arg_mode == SFmode)
7598 xoperands[0] = XEXP (use, 0);
7599 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
7600 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
7601 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7603 else
7605 xoperands[0] = XEXP (use, 0);
7606 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
7607 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
7608 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
7609 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7615 /* Compute length of the FP argument copy sequence for INSN. */
7616 static int
7617 length_fp_args (rtx_insn *insn)
7619 int length = 0;
7620 rtx link;
7622 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7624 int arg_mode, regno;
7625 rtx use = XEXP (link, 0);
7627 if (! (GET_CODE (use) == USE
7628 && GET_CODE (XEXP (use, 0)) == REG
7629 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7630 continue;
7632 arg_mode = GET_MODE (XEXP (use, 0));
7633 regno = REGNO (XEXP (use, 0));
7635 /* Is it a floating point register? */
7636 if (regno >= 32 && regno <= 39)
7638 if (arg_mode == SFmode)
7639 length += 8;
7640 else
7641 length += 12;
7645 return length;
7648 /* Return the attribute length for the millicode call instruction INSN.
7649 The length must match the code generated by pa_output_millicode_call.
7650 We include the delay slot in the returned length as it is better to
7651 over estimate the length than to under estimate it. */
7654 pa_attr_length_millicode_call (rtx_insn *insn)
7656 unsigned long distance = -1;
7657 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7659 if (INSN_ADDRESSES_SET_P ())
7661 distance = (total + insn_current_reference_address (insn));
7662 if (distance < total)
7663 distance = -1;
7666 if (TARGET_64BIT)
7668 if (!TARGET_LONG_CALLS && distance < 7600000)
7669 return 8;
7671 return 20;
7673 else if (TARGET_PORTABLE_RUNTIME)
7674 return 24;
7675 else
7677 if (!TARGET_LONG_CALLS && distance < MAX_PCREL17F_OFFSET)
7678 return 8;
7680 if (!flag_pic)
7681 return 12;
7683 return 24;
7687 /* INSN is a function call.
7689 CALL_DEST is the routine we are calling. */
7691 const char *
7692 pa_output_millicode_call (rtx_insn *insn, rtx call_dest)
7694 int attr_length = get_attr_length (insn);
7695 int seq_length = dbr_sequence_length ();
7696 rtx xoperands[4];
7698 xoperands[0] = call_dest;
7700 /* Handle the common case where we are sure that the branch will
7701 reach the beginning of the $CODE$ subspace. The within reach
7702 form of the $$sh_func_adrs call has a length of 28. Because it
7703 has an attribute type of sh_func_adrs, it never has a nonzero
7704 sequence length (i.e., the delay slot is never filled). */
7705 if (!TARGET_LONG_CALLS
7706 && (attr_length == 8
7707 || (attr_length == 28
7708 && get_attr_type (insn) == TYPE_SH_FUNC_ADRS)))
7710 xoperands[1] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
7711 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7713 else
7715 if (TARGET_64BIT)
7717 /* It might seem that one insn could be saved by accessing
7718 the millicode function using the linkage table. However,
7719 this doesn't work in shared libraries and other dynamically
7720 loaded objects. Using a pc-relative sequence also avoids
7721 problems related to the implicit use of the gp register. */
7722 xoperands[1] = gen_rtx_REG (Pmode, 1);
7723 xoperands[2] = xoperands[1];
7724 pa_output_pic_pcrel_sequence (xoperands);
7725 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7727 else if (TARGET_PORTABLE_RUNTIME)
7729 /* Pure portable runtime doesn't allow be/ble; we also don't
7730 have PIC support in the assembler/linker, so this sequence
7731 is needed. */
7733 /* Get the address of our target into %r1. */
7734 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7735 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7737 /* Get our return address into %r31. */
7738 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7739 output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
7741 /* Jump to our target address in %r1. */
7742 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7744 else if (!flag_pic)
7746 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7747 if (TARGET_PA_20)
7748 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
7749 else
7750 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7752 else
7754 xoperands[1] = gen_rtx_REG (Pmode, 31);
7755 xoperands[2] = gen_rtx_REG (Pmode, 1);
7756 pa_output_pic_pcrel_sequence (xoperands);
7758 /* Adjust return address. */
7759 output_asm_insn ("ldo {16|24}(%%r31),%%r31", xoperands);
7761 /* Jump to our target address in %r1. */
7762 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7766 if (seq_length == 0)
7767 output_asm_insn ("nop", xoperands);
7769 return "";
7772 /* Return the attribute length of the call instruction INSN. The SIBCALL
7773 flag indicates whether INSN is a regular call or a sibling call. The
7774 length returned must be longer than the code actually generated by
7775 pa_output_call. Since branch shortening is done before delay branch
7776 sequencing, there is no way to determine whether or not the delay
7777 slot will be filled during branch shortening. Even when the delay
7778 slot is filled, we may have to add a nop if the delay slot contains
7779 a branch that can't reach its target. Thus, we always have to include
7780 the delay slot in the length estimate. This used to be done in
7781 pa_adjust_insn_length but we do it here now as some sequences always
7782 fill the delay slot and we can save four bytes in the estimate for
7783 these sequences. */
7786 pa_attr_length_call (rtx_insn *insn, int sibcall)
7788 int local_call;
7789 rtx call, call_dest;
7790 tree call_decl;
7791 int length = 0;
7792 rtx pat = PATTERN (insn);
7793 unsigned long distance = -1;
7795 gcc_assert (CALL_P (insn));
7797 if (INSN_ADDRESSES_SET_P ())
7799 unsigned long total;
7801 total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7802 distance = (total + insn_current_reference_address (insn));
7803 if (distance < total)
7804 distance = -1;
7807 gcc_assert (GET_CODE (pat) == PARALLEL);
7809 /* Get the call rtx. */
7810 call = XVECEXP (pat, 0, 0);
7811 if (GET_CODE (call) == SET)
7812 call = SET_SRC (call);
7814 gcc_assert (GET_CODE (call) == CALL);
7816 /* Determine if this is a local call. */
7817 call_dest = XEXP (XEXP (call, 0), 0);
7818 call_decl = SYMBOL_REF_DECL (call_dest);
7819 local_call = call_decl && targetm.binds_local_p (call_decl);
7821 /* pc-relative branch. */
7822 if (!TARGET_LONG_CALLS
7823 && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7824 || distance < MAX_PCREL17F_OFFSET))
7825 length += 8;
7827 /* 64-bit plabel sequence. */
7828 else if (TARGET_64BIT && !local_call)
7829 length += sibcall ? 28 : 24;
7831 /* non-pic long absolute branch sequence. */
7832 else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7833 length += 12;
7835 /* long pc-relative branch sequence. */
7836 else if (TARGET_LONG_PIC_SDIFF_CALL
7837 || (TARGET_GAS && !TARGET_SOM && local_call))
7839 length += 20;
7841 if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7842 length += 8;
7845 /* 32-bit plabel sequence. */
7846 else
7848 length += 32;
7850 if (TARGET_SOM)
7851 length += length_fp_args (insn);
7853 if (flag_pic)
7854 length += 4;
7856 if (!TARGET_PA_20)
7858 if (!sibcall)
7859 length += 8;
7861 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7862 length += 8;
7866 return length;
7869 /* INSN is a function call.
7871 CALL_DEST is the routine we are calling. */
7873 const char *
7874 pa_output_call (rtx_insn *insn, rtx call_dest, int sibcall)
7876 int seq_length = dbr_sequence_length ();
7877 tree call_decl = SYMBOL_REF_DECL (call_dest);
7878 int local_call = call_decl && targetm.binds_local_p (call_decl);
7879 rtx xoperands[4];
7881 xoperands[0] = call_dest;
7883 /* Handle the common case where we're sure that the branch will reach
7884 the beginning of the "$CODE$" subspace. This is the beginning of
7885 the current function if we are in a named section. */
7886 if (!TARGET_LONG_CALLS && pa_attr_length_call (insn, sibcall) == 8)
7888 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
7889 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7891 else
7893 if (TARGET_64BIT && !local_call)
7895 /* ??? As far as I can tell, the HP linker doesn't support the
7896 long pc-relative sequence described in the 64-bit runtime
7897 architecture. So, we use a slightly longer indirect call. */
7898 xoperands[0] = pa_get_deferred_plabel (call_dest);
7899 xoperands[1] = gen_label_rtx ();
7901 /* If this isn't a sibcall, we put the load of %r27 into the
7902 delay slot. We can't do this in a sibcall as we don't
7903 have a second call-clobbered scratch register available.
7904 We don't need to do anything when generating fast indirect
7905 calls. */
7906 if (seq_length != 0 && !sibcall)
7908 final_scan_insn (NEXT_INSN (insn), asm_out_file,
7909 optimize, 0, NULL);
7911 /* Now delete the delay insn. */
7912 SET_INSN_DELETED (NEXT_INSN (insn));
7913 seq_length = 0;
7916 output_asm_insn ("addil LT'%0,%%r27", xoperands);
7917 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
7918 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
7920 if (sibcall)
7922 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7923 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands);
7924 output_asm_insn ("bve (%%r1)", xoperands);
7926 else
7928 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
7929 output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
7930 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7931 seq_length = 1;
7934 else
7936 int indirect_call = 0;
7938 /* Emit a long call. There are several different sequences
7939 of increasing length and complexity. In most cases,
7940 they don't allow an instruction in the delay slot. */
7941 if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7942 && !TARGET_LONG_PIC_SDIFF_CALL
7943 && !(TARGET_GAS && !TARGET_SOM && local_call)
7944 && !TARGET_64BIT)
7945 indirect_call = 1;
7947 if (seq_length != 0
7948 && !sibcall
7949 && (!TARGET_PA_20
7950 || indirect_call
7951 || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)))
7953 /* A non-jump insn in the delay slot. By definition we can
7954 emit this insn before the call (and in fact before argument
7955 relocating. */
7956 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
7957 NULL);
7959 /* Now delete the delay insn. */
7960 SET_INSN_DELETED (NEXT_INSN (insn));
7961 seq_length = 0;
7964 if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7966 /* This is the best sequence for making long calls in
7967 non-pic code. Unfortunately, GNU ld doesn't provide
7968 the stub needed for external calls, and GAS's support
7969 for this with the SOM linker is buggy. It is safe
7970 to use this for local calls. */
7971 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7972 if (sibcall)
7973 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
7974 else
7976 if (TARGET_PA_20)
7977 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
7978 xoperands);
7979 else
7980 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7982 output_asm_insn ("copy %%r31,%%r2", xoperands);
7983 seq_length = 1;
7986 else
7988 /* The HP assembler and linker can handle relocations for
7989 the difference of two symbols. The HP assembler
7990 recognizes the sequence as a pc-relative call and
7991 the linker provides stubs when needed. */
7993 /* GAS currently can't generate the relocations that
7994 are needed for the SOM linker under HP-UX using this
7995 sequence. The GNU linker doesn't generate the stubs
7996 that are needed for external calls on TARGET_ELF32
7997 with this sequence. For now, we have to use a longer
7998 plabel sequence when using GAS for non local calls. */
7999 if (TARGET_LONG_PIC_SDIFF_CALL
8000 || (TARGET_GAS && !TARGET_SOM && local_call))
8002 xoperands[1] = gen_rtx_REG (Pmode, 1);
8003 xoperands[2] = xoperands[1];
8004 pa_output_pic_pcrel_sequence (xoperands);
8006 else
8008 /* Emit a long plabel-based call sequence. This is
8009 essentially an inline implementation of $$dyncall.
8010 We don't actually try to call $$dyncall as this is
8011 as difficult as calling the function itself. */
8012 xoperands[0] = pa_get_deferred_plabel (call_dest);
8013 xoperands[1] = gen_label_rtx ();
8015 /* Since the call is indirect, FP arguments in registers
8016 need to be copied to the general registers. Then, the
8017 argument relocation stub will copy them back. */
8018 if (TARGET_SOM)
8019 copy_fp_args (insn);
8021 if (flag_pic)
8023 output_asm_insn ("addil LT'%0,%%r19", xoperands);
8024 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
8025 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands);
8027 else
8029 output_asm_insn ("addil LR'%0-$global$,%%r27",
8030 xoperands);
8031 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
8032 xoperands);
8035 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands);
8036 output_asm_insn ("depi 0,31,2,%%r1", xoperands);
8037 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands);
8038 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands);
8040 if (!sibcall && !TARGET_PA_20)
8042 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
8043 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8044 output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
8045 else
8046 output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
8050 if (TARGET_PA_20)
8052 if (sibcall)
8053 output_asm_insn ("bve (%%r1)", xoperands);
8054 else
8056 if (indirect_call)
8058 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8059 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
8060 seq_length = 1;
8062 else
8063 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8066 else
8068 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
8069 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
8070 xoperands);
8072 if (sibcall)
8074 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8075 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
8076 else
8077 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
8079 else
8081 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8082 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
8083 else
8084 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
8086 if (indirect_call)
8087 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
8088 else
8089 output_asm_insn ("copy %%r31,%%r2", xoperands);
8090 seq_length = 1;
8097 if (seq_length == 0)
8098 output_asm_insn ("nop", xoperands);
8100 return "";
8103 /* Return the attribute length of the indirect call instruction INSN.
8104 The length must match the code generated by output_indirect call.
8105 The returned length includes the delay slot. Currently, the delay
8106 slot of an indirect call sequence is not exposed and it is used by
8107 the sequence itself. */
8110 pa_attr_length_indirect_call (rtx_insn *insn)
8112 unsigned long distance = -1;
8113 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
8115 if (INSN_ADDRESSES_SET_P ())
8117 distance = (total + insn_current_reference_address (insn));
8118 if (distance < total)
8119 distance = -1;
8122 if (TARGET_64BIT)
8123 return 12;
8125 if (TARGET_FAST_INDIRECT_CALLS)
8126 return 8;
8128 if (TARGET_PORTABLE_RUNTIME)
8129 return 16;
8131 /* Inline version of $$dyncall. */
8132 if ((TARGET_NO_SPACE_REGS || TARGET_PA_20) && !optimize_size)
8133 return 20;
8135 if (!TARGET_LONG_CALLS
8136 && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
8137 || distance < MAX_PCREL17F_OFFSET))
8138 return 8;
8140 /* Out of reach, can use ble. */
8141 if (!flag_pic)
8142 return 12;
8144 /* Inline version of $$dyncall. */
8145 if (TARGET_NO_SPACE_REGS || TARGET_PA_20)
8146 return 20;
8148 if (!optimize_size)
8149 return 36;
8151 /* Long PIC pc-relative call. */
8152 return 20;
8155 const char *
8156 pa_output_indirect_call (rtx_insn *insn, rtx call_dest)
8158 rtx xoperands[4];
8159 int length;
8161 if (TARGET_64BIT)
8163 xoperands[0] = call_dest;
8164 output_asm_insn ("ldd 16(%0),%%r2\n\t"
8165 "bve,l (%%r2),%%r2\n\t"
8166 "ldd 24(%0),%%r27", xoperands);
8167 return "";
8170 /* First the special case for kernels, level 0 systems, etc. */
8171 if (TARGET_FAST_INDIRECT_CALLS)
8173 pa_output_arg_descriptor (insn);
8174 if (TARGET_PA_20)
8175 return "bve,l,n (%%r22),%%r2\n\tnop";
8176 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8179 if (TARGET_PORTABLE_RUNTIME)
8181 output_asm_insn ("ldil L'$$dyncall,%%r31\n\t"
8182 "ldo R'$$dyncall(%%r31),%%r31", xoperands);
8183 pa_output_arg_descriptor (insn);
8184 return "blr %%r0,%%r2\n\tbv,n %%r0(%%r31)";
8187 /* Maybe emit a fast inline version of $$dyncall. */
8188 if ((TARGET_NO_SPACE_REGS || TARGET_PA_20) && !optimize_size)
8190 output_asm_insn ("bb,>=,n %%r22,30,.+12\n\t"
8191 "ldw 2(%%r22),%%r19\n\t"
8192 "ldw -2(%%r22),%%r22", xoperands);
8193 pa_output_arg_descriptor (insn);
8194 if (TARGET_NO_SPACE_REGS)
8196 if (TARGET_PA_20)
8197 return "bve,l,n (%%r22),%%r2\n\tnop";
8198 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8200 return "bve,l (%%r22),%%r2\n\tstw %%r2,-24(%%sp)";
8203 /* Now the normal case -- we can reach $$dyncall directly or
8204 we're sure that we can get there via a long-branch stub.
8206 No need to check target flags as the length uniquely identifies
8207 the remaining cases. */
8208 length = pa_attr_length_indirect_call (insn);
8209 if (length == 8)
8211 pa_output_arg_descriptor (insn);
8213 /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
8214 $$dyncall. Since BLE uses %r31 as the link register, the 22-bit
8215 variant of the B,L instruction can't be used on the SOM target. */
8216 if (TARGET_PA_20 && !TARGET_SOM)
8217 return "b,l,n $$dyncall,%%r2\n\tnop";
8218 else
8219 return "bl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
8222 /* Long millicode call, but we are not generating PIC or portable runtime
8223 code. */
8224 if (length == 12)
8226 output_asm_insn ("ldil L'$$dyncall,%%r2", xoperands);
8227 pa_output_arg_descriptor (insn);
8228 return "ble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
8231 /* Maybe emit a fast inline version of $$dyncall. The long PIC
8232 pc-relative call sequence is five instructions. The inline PA 2.0
8233 version of $$dyncall is also five instructions. The PA 1.X versions
8234 are longer but still an overall win. */
8235 if (TARGET_NO_SPACE_REGS || TARGET_PA_20 || !optimize_size)
8237 output_asm_insn ("bb,>=,n %%r22,30,.+12\n\t"
8238 "ldw 2(%%r22),%%r19\n\t"
8239 "ldw -2(%%r22),%%r22", xoperands);
8240 if (TARGET_NO_SPACE_REGS)
8242 pa_output_arg_descriptor (insn);
8243 if (TARGET_PA_20)
8244 return "bve,l,n (%%r22),%%r2\n\tnop";
8245 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8247 if (TARGET_PA_20)
8249 pa_output_arg_descriptor (insn);
8250 return "bve,l (%%r22),%%r2\n\tstw %%r2,-24(%%sp)";
8252 output_asm_insn ("bl .+8,%%r2\n\t"
8253 "ldo 16(%%r2),%%r2\n\t"
8254 "ldsid (%%r22),%%r1\n\t"
8255 "mtsp %%r1,%%sr0", xoperands);
8256 pa_output_arg_descriptor (insn);
8257 return "be 0(%%sr0,%%r22)\n\tstw %%r2,-24(%%sp)";
8260 /* We need a long PIC call to $$dyncall. */
8261 xoperands[0] = gen_rtx_SYMBOL_REF (Pmode, "$$dyncall");
8262 xoperands[1] = gen_rtx_REG (Pmode, 2);
8263 xoperands[2] = gen_rtx_REG (Pmode, 1);
8264 pa_output_pic_pcrel_sequence (xoperands);
8265 pa_output_arg_descriptor (insn);
8266 return "bv %%r0(%%r1)\n\tldo {12|20}(%%r2),%%r2";
8269 /* In HPUX 8.0's shared library scheme, special relocations are needed
8270 for function labels if they might be passed to a function
8271 in a shared library (because shared libraries don't live in code
8272 space), and special magic is needed to construct their address. */
8274 void
8275 pa_encode_label (rtx sym)
8277 const char *str = XSTR (sym, 0);
8278 int len = strlen (str) + 1;
8279 char *newstr, *p;
8281 p = newstr = XALLOCAVEC (char, len + 1);
8282 *p++ = '@';
8283 strcpy (p, str);
8285 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
8288 static void
8289 pa_encode_section_info (tree decl, rtx rtl, int first)
8291 int old_referenced = 0;
8293 if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF)
8294 old_referenced
8295 = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED;
8297 default_encode_section_info (decl, rtl, first);
8299 if (first && TEXT_SPACE_P (decl))
8301 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
8302 if (TREE_CODE (decl) == FUNCTION_DECL)
8303 pa_encode_label (XEXP (rtl, 0));
8305 else if (old_referenced)
8306 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced;
8309 /* This is sort of inverse to pa_encode_section_info. */
8311 static const char *
8312 pa_strip_name_encoding (const char *str)
8314 str += (*str == '@');
8315 str += (*str == '*');
8316 return str;
8319 /* Returns 1 if OP is a function label involved in a simple addition
8320 with a constant. Used to keep certain patterns from matching
8321 during instruction combination. */
8323 pa_is_function_label_plus_const (rtx op)
8325 /* Strip off any CONST. */
8326 if (GET_CODE (op) == CONST)
8327 op = XEXP (op, 0);
8329 return (GET_CODE (op) == PLUS
8330 && function_label_operand (XEXP (op, 0), VOIDmode)
8331 && GET_CODE (XEXP (op, 1)) == CONST_INT);
8334 /* Output assembly code for a thunk to FUNCTION. */
8336 static void
8337 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
8338 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
8339 tree function)
8341 static unsigned int current_thunk_number;
8342 int val_14 = VAL_14_BITS_P (delta);
8343 unsigned int old_last_address = last_address, nbytes = 0;
8344 char label[16];
8345 rtx xoperands[4];
8347 xoperands[0] = XEXP (DECL_RTL (function), 0);
8348 xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
8349 xoperands[2] = GEN_INT (delta);
8351 final_start_function (emit_barrier (), file, 1);
8353 /* Output the thunk. We know that the function is in the same
8354 translation unit (i.e., the same space) as the thunk, and that
8355 thunks are output after their method. Thus, we don't need an
8356 external branch to reach the function. With SOM and GAS,
8357 functions and thunks are effectively in different sections.
8358 Thus, we can always use a IA-relative branch and the linker
8359 will add a long branch stub if necessary.
8361 However, we have to be careful when generating PIC code on the
8362 SOM port to ensure that the sequence does not transfer to an
8363 import stub for the target function as this could clobber the
8364 return value saved at SP-24. This would also apply to the
8365 32-bit linux port if the multi-space model is implemented. */
8366 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8367 && !(flag_pic && TREE_PUBLIC (function))
8368 && (TARGET_GAS || last_address < 262132))
8369 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8370 && ((targetm_common.have_named_sections
8371 && DECL_SECTION_NAME (thunk_fndecl) != NULL
8372 /* The GNU 64-bit linker has rather poor stub management.
8373 So, we use a long branch from thunks that aren't in
8374 the same section as the target function. */
8375 && ((!TARGET_64BIT
8376 && (DECL_SECTION_NAME (thunk_fndecl)
8377 != DECL_SECTION_NAME (function)))
8378 || ((DECL_SECTION_NAME (thunk_fndecl)
8379 == DECL_SECTION_NAME (function))
8380 && last_address < 262132)))
8381 /* In this case, we need to be able to reach the start of
8382 the stub table even though the function is likely closer
8383 and can be jumped to directly. */
8384 || (targetm_common.have_named_sections
8385 && DECL_SECTION_NAME (thunk_fndecl) == NULL
8386 && DECL_SECTION_NAME (function) == NULL
8387 && total_code_bytes < MAX_PCREL17F_OFFSET)
8388 /* Likewise. */
8389 || (!targetm_common.have_named_sections
8390 && total_code_bytes < MAX_PCREL17F_OFFSET))))
8392 if (!val_14)
8393 output_asm_insn ("addil L'%2,%%r26", xoperands);
8395 output_asm_insn ("b %0", xoperands);
8397 if (val_14)
8399 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8400 nbytes += 8;
8402 else
8404 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8405 nbytes += 12;
8408 else if (TARGET_64BIT)
8410 rtx xop[4];
8412 /* We only have one call-clobbered scratch register, so we can't
8413 make use of the delay slot if delta doesn't fit in 14 bits. */
8414 if (!val_14)
8416 output_asm_insn ("addil L'%2,%%r26", xoperands);
8417 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8420 /* Load function address into %r1. */
8421 xop[0] = xoperands[0];
8422 xop[1] = gen_rtx_REG (Pmode, 1);
8423 xop[2] = xop[1];
8424 pa_output_pic_pcrel_sequence (xop);
8426 if (val_14)
8428 output_asm_insn ("bv %%r0(%%r1)", xoperands);
8429 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8430 nbytes += 20;
8432 else
8434 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8435 nbytes += 24;
8438 else if (TARGET_PORTABLE_RUNTIME)
8440 output_asm_insn ("ldil L'%0,%%r1", xoperands);
8441 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
8443 if (!val_14)
8444 output_asm_insn ("ldil L'%2,%%r26", xoperands);
8446 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8448 if (val_14)
8450 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8451 nbytes += 16;
8453 else
8455 output_asm_insn ("ldo R'%2(%%r26),%%r26", xoperands);
8456 nbytes += 20;
8459 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8461 /* The function is accessible from outside this module. The only
8462 way to avoid an import stub between the thunk and function is to
8463 call the function directly with an indirect sequence similar to
8464 that used by $$dyncall. This is possible because $$dyncall acts
8465 as the import stub in an indirect call. */
8466 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8467 xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8468 output_asm_insn ("addil LT'%3,%%r19", xoperands);
8469 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8470 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8471 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8472 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8473 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8474 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8476 if (!val_14)
8478 output_asm_insn ("addil L'%2,%%r26", xoperands);
8479 nbytes += 4;
8482 if (TARGET_PA_20)
8484 output_asm_insn ("bve (%%r22)", xoperands);
8485 nbytes += 36;
8487 else if (TARGET_NO_SPACE_REGS)
8489 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
8490 nbytes += 36;
8492 else
8494 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8495 output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8496 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
8497 nbytes += 44;
8500 if (val_14)
8501 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8502 else
8503 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8505 else if (flag_pic)
8507 rtx xop[4];
8509 /* Load function address into %r22. */
8510 xop[0] = xoperands[0];
8511 xop[1] = gen_rtx_REG (Pmode, 1);
8512 xop[2] = gen_rtx_REG (Pmode, 22);
8513 pa_output_pic_pcrel_sequence (xop);
8515 if (!val_14)
8516 output_asm_insn ("addil L'%2,%%r26", xoperands);
8518 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8520 if (val_14)
8522 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8523 nbytes += 20;
8525 else
8527 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8528 nbytes += 24;
8531 else
8533 if (!val_14)
8534 output_asm_insn ("addil L'%2,%%r26", xoperands);
8536 output_asm_insn ("ldil L'%0,%%r22", xoperands);
8537 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8539 if (val_14)
8541 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8542 nbytes += 12;
8544 else
8546 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8547 nbytes += 16;
8551 final_end_function ();
8553 if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8555 switch_to_section (data_section);
8556 output_asm_insn (".align 4", xoperands);
8557 ASM_OUTPUT_LABEL (file, label);
8558 output_asm_insn (".word P'%0", xoperands);
8561 current_thunk_number++;
8562 nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
8563 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
8564 last_address += nbytes;
8565 if (old_last_address > last_address)
8566 last_address = UINT_MAX;
8567 update_total_code_bytes (nbytes);
8570 /* Only direct calls to static functions are allowed to be sibling (tail)
8571 call optimized.
8573 This restriction is necessary because some linker generated stubs will
8574 store return pointers into rp' in some cases which might clobber a
8575 live value already in rp'.
8577 In a sibcall the current function and the target function share stack
8578 space. Thus if the path to the current function and the path to the
8579 target function save a value in rp', they save the value into the
8580 same stack slot, which has undesirable consequences.
8582 Because of the deferred binding nature of shared libraries any function
8583 with external scope could be in a different load module and thus require
8584 rp' to be saved when calling that function. So sibcall optimizations
8585 can only be safe for static function.
8587 Note that GCC never needs return value relocations, so we don't have to
8588 worry about static calls with return value relocations (which require
8589 saving rp').
8591 It is safe to perform a sibcall optimization when the target function
8592 will never return. */
8593 static bool
8594 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8596 if (TARGET_PORTABLE_RUNTIME)
8597 return false;
8599 /* Sibcalls are not ok because the arg pointer register is not a fixed
8600 register. This prevents the sibcall optimization from occurring. In
8601 addition, there are problems with stub placement using GNU ld. This
8602 is because a normal sibcall branch uses a 17-bit relocation while
8603 a regular call branch uses a 22-bit relocation. As a result, more
8604 care needs to be taken in the placement of long-branch stubs. */
8605 if (TARGET_64BIT)
8606 return false;
8608 /* Sibcalls are only ok within a translation unit. */
8609 return (decl && !TREE_PUBLIC (decl));
8612 /* ??? Addition is not commutative on the PA due to the weird implicit
8613 space register selection rules for memory addresses. Therefore, we
8614 don't consider a + b == b + a, as this might be inside a MEM. */
8615 static bool
8616 pa_commutative_p (const_rtx x, int outer_code)
8618 return (COMMUTATIVE_P (x)
8619 && (TARGET_NO_SPACE_REGS
8620 || (outer_code != UNKNOWN && outer_code != MEM)
8621 || GET_CODE (x) != PLUS));
8624 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8625 use in fmpyadd instructions. */
8627 pa_fmpyaddoperands (rtx *operands)
8629 machine_mode mode = GET_MODE (operands[0]);
8631 /* Must be a floating point mode. */
8632 if (mode != SFmode && mode != DFmode)
8633 return 0;
8635 /* All modes must be the same. */
8636 if (! (mode == GET_MODE (operands[1])
8637 && mode == GET_MODE (operands[2])
8638 && mode == GET_MODE (operands[3])
8639 && mode == GET_MODE (operands[4])
8640 && mode == GET_MODE (operands[5])))
8641 return 0;
8643 /* All operands must be registers. */
8644 if (! (GET_CODE (operands[1]) == REG
8645 && GET_CODE (operands[2]) == REG
8646 && GET_CODE (operands[3]) == REG
8647 && GET_CODE (operands[4]) == REG
8648 && GET_CODE (operands[5]) == REG))
8649 return 0;
8651 /* Only 2 real operands to the addition. One of the input operands must
8652 be the same as the output operand. */
8653 if (! rtx_equal_p (operands[3], operands[4])
8654 && ! rtx_equal_p (operands[3], operands[5]))
8655 return 0;
8657 /* Inout operand of add cannot conflict with any operands from multiply. */
8658 if (rtx_equal_p (operands[3], operands[0])
8659 || rtx_equal_p (operands[3], operands[1])
8660 || rtx_equal_p (operands[3], operands[2]))
8661 return 0;
8663 /* multiply cannot feed into addition operands. */
8664 if (rtx_equal_p (operands[4], operands[0])
8665 || rtx_equal_p (operands[5], operands[0]))
8666 return 0;
8668 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8669 if (mode == SFmode
8670 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8671 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8672 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8673 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8674 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8675 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8676 return 0;
8678 /* Passed. Operands are suitable for fmpyadd. */
8679 return 1;
8682 #if !defined(USE_COLLECT2)
8683 static void
8684 pa_asm_out_constructor (rtx symbol, int priority)
8686 if (!function_label_operand (symbol, VOIDmode))
8687 pa_encode_label (symbol);
8689 #ifdef CTORS_SECTION_ASM_OP
8690 default_ctor_section_asm_out_constructor (symbol, priority);
8691 #else
8692 # ifdef TARGET_ASM_NAMED_SECTION
8693 default_named_section_asm_out_constructor (symbol, priority);
8694 # else
8695 default_stabs_asm_out_constructor (symbol, priority);
8696 # endif
8697 #endif
8700 static void
8701 pa_asm_out_destructor (rtx symbol, int priority)
8703 if (!function_label_operand (symbol, VOIDmode))
8704 pa_encode_label (symbol);
8706 #ifdef DTORS_SECTION_ASM_OP
8707 default_dtor_section_asm_out_destructor (symbol, priority);
8708 #else
8709 # ifdef TARGET_ASM_NAMED_SECTION
8710 default_named_section_asm_out_destructor (symbol, priority);
8711 # else
8712 default_stabs_asm_out_destructor (symbol, priority);
8713 # endif
8714 #endif
8716 #endif
8718 /* This function places uninitialized global data in the bss section.
8719 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
8720 function on the SOM port to prevent uninitialized global data from
8721 being placed in the data section. */
8723 void
8724 pa_asm_output_aligned_bss (FILE *stream,
8725 const char *name,
8726 unsigned HOST_WIDE_INT size,
8727 unsigned int align)
8729 switch_to_section (bss_section);
8730 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8732 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
8733 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
8734 #endif
8736 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
8737 ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
8738 #endif
8740 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8741 ASM_OUTPUT_LABEL (stream, name);
8742 fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8745 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
8746 that doesn't allow the alignment of global common storage to be directly
8747 specified. The SOM linker aligns common storage based on the rounded
8748 value of the NUM_BYTES parameter in the .comm directive. It's not
8749 possible to use the .align directive as it doesn't affect the alignment
8750 of the label associated with a .comm directive. */
8752 void
8753 pa_asm_output_aligned_common (FILE *stream,
8754 const char *name,
8755 unsigned HOST_WIDE_INT size,
8756 unsigned int align)
8758 unsigned int max_common_align;
8760 max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
8761 if (align > max_common_align)
8763 warning (0, "alignment (%u) for %s exceeds maximum alignment "
8764 "for global common data. Using %u",
8765 align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT);
8766 align = max_common_align;
8769 switch_to_section (bss_section);
8771 assemble_name (stream, name);
8772 fprintf (stream, "\t.comm " HOST_WIDE_INT_PRINT_UNSIGNED"\n",
8773 MAX (size, align / BITS_PER_UNIT));
8776 /* We can't use .comm for local common storage as the SOM linker effectively
8777 treats the symbol as universal and uses the same storage for local symbols
8778 with the same name in different object files. The .block directive
8779 reserves an uninitialized block of storage. However, it's not common
8780 storage. Fortunately, GCC never requests common storage with the same
8781 name in any given translation unit. */
8783 void
8784 pa_asm_output_aligned_local (FILE *stream,
8785 const char *name,
8786 unsigned HOST_WIDE_INT size,
8787 unsigned int align)
8789 switch_to_section (bss_section);
8790 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8792 #ifdef LOCAL_ASM_OP
8793 fprintf (stream, "%s", LOCAL_ASM_OP);
8794 assemble_name (stream, name);
8795 fprintf (stream, "\n");
8796 #endif
8798 ASM_OUTPUT_LABEL (stream, name);
8799 fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8802 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8803 use in fmpysub instructions. */
8805 pa_fmpysuboperands (rtx *operands)
8807 machine_mode mode = GET_MODE (operands[0]);
8809 /* Must be a floating point mode. */
8810 if (mode != SFmode && mode != DFmode)
8811 return 0;
8813 /* All modes must be the same. */
8814 if (! (mode == GET_MODE (operands[1])
8815 && mode == GET_MODE (operands[2])
8816 && mode == GET_MODE (operands[3])
8817 && mode == GET_MODE (operands[4])
8818 && mode == GET_MODE (operands[5])))
8819 return 0;
8821 /* All operands must be registers. */
8822 if (! (GET_CODE (operands[1]) == REG
8823 && GET_CODE (operands[2]) == REG
8824 && GET_CODE (operands[3]) == REG
8825 && GET_CODE (operands[4]) == REG
8826 && GET_CODE (operands[5]) == REG))
8827 return 0;
8829 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
8830 operation, so operands[4] must be the same as operand[3]. */
8831 if (! rtx_equal_p (operands[3], operands[4]))
8832 return 0;
8834 /* multiply cannot feed into subtraction. */
8835 if (rtx_equal_p (operands[5], operands[0]))
8836 return 0;
8838 /* Inout operand of sub cannot conflict with any operands from multiply. */
8839 if (rtx_equal_p (operands[3], operands[0])
8840 || rtx_equal_p (operands[3], operands[1])
8841 || rtx_equal_p (operands[3], operands[2]))
8842 return 0;
8844 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8845 if (mode == SFmode
8846 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8847 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8848 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8849 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8850 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8851 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8852 return 0;
8854 /* Passed. Operands are suitable for fmpysub. */
8855 return 1;
8858 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
8859 constants for a MULT embedded inside a memory address. */
8861 pa_mem_shadd_constant_p (int val)
8863 if (val == 2 || val == 4 || val == 8)
8864 return 1;
8865 else
8866 return 0;
8869 /* Return 1 if the given constant is 1, 2, or 3. These are the valid
8870 constants for shadd instructions. */
8872 pa_shadd_constant_p (int val)
8874 if (val == 1 || val == 2 || val == 3)
8875 return 1;
8876 else
8877 return 0;
8880 /* Return TRUE if INSN branches forward. */
8882 static bool
8883 forward_branch_p (rtx_insn *insn)
8885 rtx lab = JUMP_LABEL (insn);
8887 /* The INSN must have a jump label. */
8888 gcc_assert (lab != NULL_RTX);
8890 if (INSN_ADDRESSES_SET_P ())
8891 return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn));
8893 while (insn)
8895 if (insn == lab)
8896 return true;
8897 else
8898 insn = NEXT_INSN (insn);
8901 return false;
8904 /* Output an unconditional move and branch insn. */
8906 const char *
8907 pa_output_parallel_movb (rtx *operands, rtx_insn *insn)
8909 int length = get_attr_length (insn);
8911 /* These are the cases in which we win. */
8912 if (length == 4)
8913 return "mov%I1b,tr %1,%0,%2";
8915 /* None of the following cases win, but they don't lose either. */
8916 if (length == 8)
8918 if (dbr_sequence_length () == 0)
8920 /* Nothing in the delay slot, fake it by putting the combined
8921 insn (the copy or add) in the delay slot of a bl. */
8922 if (GET_CODE (operands[1]) == CONST_INT)
8923 return "b %2\n\tldi %1,%0";
8924 else
8925 return "b %2\n\tcopy %1,%0";
8927 else
8929 /* Something in the delay slot, but we've got a long branch. */
8930 if (GET_CODE (operands[1]) == CONST_INT)
8931 return "ldi %1,%0\n\tb %2";
8932 else
8933 return "copy %1,%0\n\tb %2";
8937 if (GET_CODE (operands[1]) == CONST_INT)
8938 output_asm_insn ("ldi %1,%0", operands);
8939 else
8940 output_asm_insn ("copy %1,%0", operands);
8941 return pa_output_lbranch (operands[2], insn, 1);
8944 /* Output an unconditional add and branch insn. */
8946 const char *
8947 pa_output_parallel_addb (rtx *operands, rtx_insn *insn)
8949 int length = get_attr_length (insn);
8951 /* To make life easy we want operand0 to be the shared input/output
8952 operand and operand1 to be the readonly operand. */
8953 if (operands[0] == operands[1])
8954 operands[1] = operands[2];
8956 /* These are the cases in which we win. */
8957 if (length == 4)
8958 return "add%I1b,tr %1,%0,%3";
8960 /* None of the following cases win, but they don't lose either. */
8961 if (length == 8)
8963 if (dbr_sequence_length () == 0)
8964 /* Nothing in the delay slot, fake it by putting the combined
8965 insn (the copy or add) in the delay slot of a bl. */
8966 return "b %3\n\tadd%I1 %1,%0,%0";
8967 else
8968 /* Something in the delay slot, but we've got a long branch. */
8969 return "add%I1 %1,%0,%0\n\tb %3";
8972 output_asm_insn ("add%I1 %1,%0,%0", operands);
8973 return pa_output_lbranch (operands[3], insn, 1);
8976 /* We use this hook to perform a PA specific optimization which is difficult
8977 to do in earlier passes. */
8979 static void
8980 pa_reorg (void)
8982 remove_useless_addtr_insns (1);
8984 if (pa_cpu < PROCESSOR_8000)
8985 pa_combine_instructions ();
8988 /* The PA has a number of odd instructions which can perform multiple
8989 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
8990 it may be profitable to combine two instructions into one instruction
8991 with two outputs. It's not profitable PA2.0 machines because the
8992 two outputs would take two slots in the reorder buffers.
8994 This routine finds instructions which can be combined and combines
8995 them. We only support some of the potential combinations, and we
8996 only try common ways to find suitable instructions.
8998 * addb can add two registers or a register and a small integer
8999 and jump to a nearby (+-8k) location. Normally the jump to the
9000 nearby location is conditional on the result of the add, but by
9001 using the "true" condition we can make the jump unconditional.
9002 Thus addb can perform two independent operations in one insn.
9004 * movb is similar to addb in that it can perform a reg->reg
9005 or small immediate->reg copy and jump to a nearby (+-8k location).
9007 * fmpyadd and fmpysub can perform a FP multiply and either an
9008 FP add or FP sub if the operands of the multiply and add/sub are
9009 independent (there are other minor restrictions). Note both
9010 the fmpy and fadd/fsub can in theory move to better spots according
9011 to data dependencies, but for now we require the fmpy stay at a
9012 fixed location.
9014 * Many of the memory operations can perform pre & post updates
9015 of index registers. GCC's pre/post increment/decrement addressing
9016 is far too simple to take advantage of all the possibilities. This
9017 pass may not be suitable since those insns may not be independent.
9019 * comclr can compare two ints or an int and a register, nullify
9020 the following instruction and zero some other register. This
9021 is more difficult to use as it's harder to find an insn which
9022 will generate a comclr than finding something like an unconditional
9023 branch. (conditional moves & long branches create comclr insns).
9025 * Most arithmetic operations can conditionally skip the next
9026 instruction. They can be viewed as "perform this operation
9027 and conditionally jump to this nearby location" (where nearby
9028 is an insns away). These are difficult to use due to the
9029 branch length restrictions. */
9031 static void
9032 pa_combine_instructions (void)
9034 rtx_insn *anchor;
9036 /* This can get expensive since the basic algorithm is on the
9037 order of O(n^2) (or worse). Only do it for -O2 or higher
9038 levels of optimization. */
9039 if (optimize < 2)
9040 return;
9042 /* Walk down the list of insns looking for "anchor" insns which
9043 may be combined with "floating" insns. As the name implies,
9044 "anchor" instructions don't move, while "floating" insns may
9045 move around. */
9046 rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
9047 rtx_insn *new_rtx = make_insn_raw (par);
9049 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
9051 enum attr_pa_combine_type anchor_attr;
9052 enum attr_pa_combine_type floater_attr;
9054 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
9055 Also ignore any special USE insns. */
9056 if ((! NONJUMP_INSN_P (anchor) && ! JUMP_P (anchor) && ! CALL_P (anchor))
9057 || GET_CODE (PATTERN (anchor)) == USE
9058 || GET_CODE (PATTERN (anchor)) == CLOBBER)
9059 continue;
9061 anchor_attr = get_attr_pa_combine_type (anchor);
9062 /* See if anchor is an insn suitable for combination. */
9063 if (anchor_attr == PA_COMBINE_TYPE_FMPY
9064 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
9065 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9066 && ! forward_branch_p (anchor)))
9068 rtx_insn *floater;
9070 for (floater = PREV_INSN (anchor);
9071 floater;
9072 floater = PREV_INSN (floater))
9074 if (NOTE_P (floater)
9075 || (NONJUMP_INSN_P (floater)
9076 && (GET_CODE (PATTERN (floater)) == USE
9077 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9078 continue;
9080 /* Anything except a regular INSN will stop our search. */
9081 if (! NONJUMP_INSN_P (floater))
9083 floater = NULL;
9084 break;
9087 /* See if FLOATER is suitable for combination with the
9088 anchor. */
9089 floater_attr = get_attr_pa_combine_type (floater);
9090 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9091 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9092 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9093 && floater_attr == PA_COMBINE_TYPE_FMPY))
9095 /* If ANCHOR and FLOATER can be combined, then we're
9096 done with this pass. */
9097 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9098 SET_DEST (PATTERN (floater)),
9099 XEXP (SET_SRC (PATTERN (floater)), 0),
9100 XEXP (SET_SRC (PATTERN (floater)), 1)))
9101 break;
9104 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9105 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
9107 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
9109 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9110 SET_DEST (PATTERN (floater)),
9111 XEXP (SET_SRC (PATTERN (floater)), 0),
9112 XEXP (SET_SRC (PATTERN (floater)), 1)))
9113 break;
9115 else
9117 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9118 SET_DEST (PATTERN (floater)),
9119 SET_SRC (PATTERN (floater)),
9120 SET_SRC (PATTERN (floater))))
9121 break;
9126 /* If we didn't find anything on the backwards scan try forwards. */
9127 if (!floater
9128 && (anchor_attr == PA_COMBINE_TYPE_FMPY
9129 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
9131 for (floater = anchor; floater; floater = NEXT_INSN (floater))
9133 if (NOTE_P (floater)
9134 || (NONJUMP_INSN_P (floater)
9135 && (GET_CODE (PATTERN (floater)) == USE
9136 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9138 continue;
9140 /* Anything except a regular INSN will stop our search. */
9141 if (! NONJUMP_INSN_P (floater))
9143 floater = NULL;
9144 break;
9147 /* See if FLOATER is suitable for combination with the
9148 anchor. */
9149 floater_attr = get_attr_pa_combine_type (floater);
9150 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9151 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9152 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9153 && floater_attr == PA_COMBINE_TYPE_FMPY))
9155 /* If ANCHOR and FLOATER can be combined, then we're
9156 done with this pass. */
9157 if (pa_can_combine_p (new_rtx, anchor, floater, 1,
9158 SET_DEST (PATTERN (floater)),
9159 XEXP (SET_SRC (PATTERN (floater)),
9161 XEXP (SET_SRC (PATTERN (floater)),
9162 1)))
9163 break;
9168 /* FLOATER will be nonzero if we found a suitable floating
9169 insn for combination with ANCHOR. */
9170 if (floater
9171 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9172 || anchor_attr == PA_COMBINE_TYPE_FMPY))
9174 /* Emit the new instruction and delete the old anchor. */
9175 emit_insn_before (gen_rtx_PARALLEL
9176 (VOIDmode,
9177 gen_rtvec (2, PATTERN (anchor),
9178 PATTERN (floater))),
9179 anchor);
9181 SET_INSN_DELETED (anchor);
9183 /* Emit a special USE insn for FLOATER, then delete
9184 the floating insn. */
9185 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
9186 delete_insn (floater);
9188 continue;
9190 else if (floater
9191 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
9193 rtx temp;
9194 /* Emit the new_jump instruction and delete the old anchor. */
9195 temp
9196 = emit_jump_insn_before (gen_rtx_PARALLEL
9197 (VOIDmode,
9198 gen_rtvec (2, PATTERN (anchor),
9199 PATTERN (floater))),
9200 anchor);
9202 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
9203 SET_INSN_DELETED (anchor);
9205 /* Emit a special USE insn for FLOATER, then delete
9206 the floating insn. */
9207 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
9208 delete_insn (floater);
9209 continue;
9215 static int
9216 pa_can_combine_p (rtx_insn *new_rtx, rtx_insn *anchor, rtx_insn *floater,
9217 int reversed, rtx dest,
9218 rtx src1, rtx src2)
9220 int insn_code_number;
9221 rtx_insn *start, *end;
9223 /* Create a PARALLEL with the patterns of ANCHOR and
9224 FLOATER, try to recognize it, then test constraints
9225 for the resulting pattern.
9227 If the pattern doesn't match or the constraints
9228 aren't met keep searching for a suitable floater
9229 insn. */
9230 XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor);
9231 XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater);
9232 INSN_CODE (new_rtx) = -1;
9233 insn_code_number = recog_memoized (new_rtx);
9234 basic_block bb = BLOCK_FOR_INSN (anchor);
9235 if (insn_code_number < 0
9236 || (extract_insn (new_rtx),
9237 !constrain_operands (1, get_preferred_alternatives (new_rtx, bb))))
9238 return 0;
9240 if (reversed)
9242 start = anchor;
9243 end = floater;
9245 else
9247 start = floater;
9248 end = anchor;
9251 /* There's up to three operands to consider. One
9252 output and two inputs.
9254 The output must not be used between FLOATER & ANCHOR
9255 exclusive. The inputs must not be set between
9256 FLOATER and ANCHOR exclusive. */
9258 if (reg_used_between_p (dest, start, end))
9259 return 0;
9261 if (reg_set_between_p (src1, start, end))
9262 return 0;
9264 if (reg_set_between_p (src2, start, end))
9265 return 0;
9267 /* If we get here, then everything is good. */
9268 return 1;
9271 /* Return nonzero if references for INSN are delayed.
9273 Millicode insns are actually function calls with some special
9274 constraints on arguments and register usage.
9276 Millicode calls always expect their arguments in the integer argument
9277 registers, and always return their result in %r29 (ret1). They
9278 are expected to clobber their arguments, %r1, %r29, and the return
9279 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9281 This function tells reorg that the references to arguments and
9282 millicode calls do not appear to happen until after the millicode call.
9283 This allows reorg to put insns which set the argument registers into the
9284 delay slot of the millicode call -- thus they act more like traditional
9285 CALL_INSNs.
9287 Note we cannot consider side effects of the insn to be delayed because
9288 the branch and link insn will clobber the return pointer. If we happened
9289 to use the return pointer in the delay slot of the call, then we lose.
9291 get_attr_type will try to recognize the given insn, so make sure to
9292 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9293 in particular. */
9295 pa_insn_refs_are_delayed (rtx_insn *insn)
9297 return ((NONJUMP_INSN_P (insn)
9298 && GET_CODE (PATTERN (insn)) != SEQUENCE
9299 && GET_CODE (PATTERN (insn)) != USE
9300 && GET_CODE (PATTERN (insn)) != CLOBBER
9301 && get_attr_type (insn) == TYPE_MILLI));
9304 /* Promote the return value, but not the arguments. */
9306 static machine_mode
9307 pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9308 machine_mode mode,
9309 int *punsignedp ATTRIBUTE_UNUSED,
9310 const_tree fntype ATTRIBUTE_UNUSED,
9311 int for_return)
9313 if (for_return == 0)
9314 return mode;
9315 return promote_mode (type, mode, punsignedp);
9318 /* On the HP-PA the value is found in register(s) 28(-29), unless
9319 the mode is SF or DF. Then the value is returned in fr4 (32).
9321 This must perform the same promotions as PROMOTE_MODE, else promoting
9322 return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
9324 Small structures must be returned in a PARALLEL on PA64 in order
9325 to match the HP Compiler ABI. */
9327 static rtx
9328 pa_function_value (const_tree valtype,
9329 const_tree func ATTRIBUTE_UNUSED,
9330 bool outgoing ATTRIBUTE_UNUSED)
9332 machine_mode valmode;
9334 if (AGGREGATE_TYPE_P (valtype)
9335 || TREE_CODE (valtype) == COMPLEX_TYPE
9336 || TREE_CODE (valtype) == VECTOR_TYPE)
9338 HOST_WIDE_INT valsize = int_size_in_bytes (valtype);
9340 /* Handle aggregates that fit exactly in a word or double word. */
9341 if ((valsize & (UNITS_PER_WORD - 1)) == 0)
9342 return gen_rtx_REG (TYPE_MODE (valtype), 28);
9344 if (TARGET_64BIT)
9346 /* Aggregates with a size less than or equal to 128 bits are
9347 returned in GR 28(-29). They are left justified. The pad
9348 bits are undefined. Larger aggregates are returned in
9349 memory. */
9350 rtx loc[2];
9351 int i, offset = 0;
9352 int ub = valsize <= UNITS_PER_WORD ? 1 : 2;
9354 for (i = 0; i < ub; i++)
9356 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9357 gen_rtx_REG (DImode, 28 + i),
9358 GEN_INT (offset));
9359 offset += 8;
9362 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
9364 else if (valsize > UNITS_PER_WORD)
9366 /* Aggregates 5 to 8 bytes in size are returned in general
9367 registers r28-r29 in the same manner as other non
9368 floating-point objects. The data is right-justified and
9369 zero-extended to 64 bits. This is opposite to the normal
9370 justification used on big endian targets and requires
9371 special treatment. */
9372 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9373 gen_rtx_REG (DImode, 28), const0_rtx);
9374 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9378 if ((INTEGRAL_TYPE_P (valtype)
9379 && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD)
9380 || POINTER_TYPE_P (valtype))
9381 valmode = word_mode;
9382 else
9383 valmode = TYPE_MODE (valtype);
9385 if (TREE_CODE (valtype) == REAL_TYPE
9386 && !AGGREGATE_TYPE_P (valtype)
9387 && TYPE_MODE (valtype) != TFmode
9388 && !TARGET_SOFT_FLOAT)
9389 return gen_rtx_REG (valmode, 32);
9391 return gen_rtx_REG (valmode, 28);
9394 /* Implement the TARGET_LIBCALL_VALUE hook. */
9396 static rtx
9397 pa_libcall_value (machine_mode mode,
9398 const_rtx fun ATTRIBUTE_UNUSED)
9400 if (! TARGET_SOFT_FLOAT
9401 && (mode == SFmode || mode == DFmode))
9402 return gen_rtx_REG (mode, 32);
9403 else
9404 return gen_rtx_REG (mode, 28);
9407 /* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook. */
9409 static bool
9410 pa_function_value_regno_p (const unsigned int regno)
9412 if (regno == 28
9413 || (! TARGET_SOFT_FLOAT && regno == 32))
9414 return true;
9416 return false;
9419 /* Update the data in CUM to advance over an argument
9420 of mode MODE and data type TYPE.
9421 (TYPE is null for libcalls where that information may not be available.) */
9423 static void
9424 pa_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
9425 const_tree type, bool named ATTRIBUTE_UNUSED)
9427 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9428 int arg_size = FUNCTION_ARG_SIZE (mode, type);
9430 cum->nargs_prototype--;
9431 cum->words += (arg_size
9432 + ((cum->words & 01)
9433 && type != NULL_TREE
9434 && arg_size > 1));
9437 /* Return the location of a parameter that is passed in a register or NULL
9438 if the parameter has any component that is passed in memory.
9440 This is new code and will be pushed to into the net sources after
9441 further testing.
9443 ??? We might want to restructure this so that it looks more like other
9444 ports. */
9445 static rtx
9446 pa_function_arg (cumulative_args_t cum_v, machine_mode mode,
9447 const_tree type, bool named ATTRIBUTE_UNUSED)
9449 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9450 int max_arg_words = (TARGET_64BIT ? 8 : 4);
9451 int alignment = 0;
9452 int arg_size;
9453 int fpr_reg_base;
9454 int gpr_reg_base;
9455 rtx retval;
9457 if (mode == VOIDmode)
9458 return NULL_RTX;
9460 arg_size = FUNCTION_ARG_SIZE (mode, type);
9462 /* If this arg would be passed partially or totally on the stack, then
9463 this routine should return zero. pa_arg_partial_bytes will
9464 handle arguments which are split between regs and stack slots if
9465 the ABI mandates split arguments. */
9466 if (!TARGET_64BIT)
9468 /* The 32-bit ABI does not split arguments. */
9469 if (cum->words + arg_size > max_arg_words)
9470 return NULL_RTX;
9472 else
9474 if (arg_size > 1)
9475 alignment = cum->words & 1;
9476 if (cum->words + alignment >= max_arg_words)
9477 return NULL_RTX;
9480 /* The 32bit ABIs and the 64bit ABIs are rather different,
9481 particularly in their handling of FP registers. We might
9482 be able to cleverly share code between them, but I'm not
9483 going to bother in the hope that splitting them up results
9484 in code that is more easily understood. */
9486 if (TARGET_64BIT)
9488 /* Advance the base registers to their current locations.
9490 Remember, gprs grow towards smaller register numbers while
9491 fprs grow to higher register numbers. Also remember that
9492 although FP regs are 32-bit addressable, we pretend that
9493 the registers are 64-bits wide. */
9494 gpr_reg_base = 26 - cum->words;
9495 fpr_reg_base = 32 + cum->words;
9497 /* Arguments wider than one word and small aggregates need special
9498 treatment. */
9499 if (arg_size > 1
9500 || mode == BLKmode
9501 || (type && (AGGREGATE_TYPE_P (type)
9502 || TREE_CODE (type) == COMPLEX_TYPE
9503 || TREE_CODE (type) == VECTOR_TYPE)))
9505 /* Double-extended precision (80-bit), quad-precision (128-bit)
9506 and aggregates including complex numbers are aligned on
9507 128-bit boundaries. The first eight 64-bit argument slots
9508 are associated one-to-one, with general registers r26
9509 through r19, and also with floating-point registers fr4
9510 through fr11. Arguments larger than one word are always
9511 passed in general registers.
9513 Using a PARALLEL with a word mode register results in left
9514 justified data on a big-endian target. */
9516 rtx loc[8];
9517 int i, offset = 0, ub = arg_size;
9519 /* Align the base register. */
9520 gpr_reg_base -= alignment;
9522 ub = MIN (ub, max_arg_words - cum->words - alignment);
9523 for (i = 0; i < ub; i++)
9525 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9526 gen_rtx_REG (DImode, gpr_reg_base),
9527 GEN_INT (offset));
9528 gpr_reg_base -= 1;
9529 offset += 8;
9532 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
9535 else
9537 /* If the argument is larger than a word, then we know precisely
9538 which registers we must use. */
9539 if (arg_size > 1)
9541 if (cum->words)
9543 gpr_reg_base = 23;
9544 fpr_reg_base = 38;
9546 else
9548 gpr_reg_base = 25;
9549 fpr_reg_base = 34;
9552 /* Structures 5 to 8 bytes in size are passed in the general
9553 registers in the same manner as other non floating-point
9554 objects. The data is right-justified and zero-extended
9555 to 64 bits. This is opposite to the normal justification
9556 used on big endian targets and requires special treatment.
9557 We now define BLOCK_REG_PADDING to pad these objects.
9558 Aggregates, complex and vector types are passed in the same
9559 manner as structures. */
9560 if (mode == BLKmode
9561 || (type && (AGGREGATE_TYPE_P (type)
9562 || TREE_CODE (type) == COMPLEX_TYPE
9563 || TREE_CODE (type) == VECTOR_TYPE)))
9565 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9566 gen_rtx_REG (DImode, gpr_reg_base),
9567 const0_rtx);
9568 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9571 else
9573 /* We have a single word (32 bits). A simple computation
9574 will get us the register #s we need. */
9575 gpr_reg_base = 26 - cum->words;
9576 fpr_reg_base = 32 + 2 * cum->words;
9580 /* Determine if the argument needs to be passed in both general and
9581 floating point registers. */
9582 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
9583 /* If we are doing soft-float with portable runtime, then there
9584 is no need to worry about FP regs. */
9585 && !TARGET_SOFT_FLOAT
9586 /* The parameter must be some kind of scalar float, else we just
9587 pass it in integer registers. */
9588 && GET_MODE_CLASS (mode) == MODE_FLOAT
9589 /* The target function must not have a prototype. */
9590 && cum->nargs_prototype <= 0
9591 /* libcalls do not need to pass items in both FP and general
9592 registers. */
9593 && type != NULL_TREE
9594 /* All this hair applies to "outgoing" args only. This includes
9595 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
9596 && !cum->incoming)
9597 /* Also pass outgoing floating arguments in both registers in indirect
9598 calls with the 32 bit ABI and the HP assembler since there is no
9599 way to the specify argument locations in static functions. */
9600 || (!TARGET_64BIT
9601 && !TARGET_GAS
9602 && !cum->incoming
9603 && cum->indirect
9604 && GET_MODE_CLASS (mode) == MODE_FLOAT))
9606 retval
9607 = gen_rtx_PARALLEL
9608 (mode,
9609 gen_rtvec (2,
9610 gen_rtx_EXPR_LIST (VOIDmode,
9611 gen_rtx_REG (mode, fpr_reg_base),
9612 const0_rtx),
9613 gen_rtx_EXPR_LIST (VOIDmode,
9614 gen_rtx_REG (mode, gpr_reg_base),
9615 const0_rtx)));
9617 else
9619 /* See if we should pass this parameter in a general register. */
9620 if (TARGET_SOFT_FLOAT
9621 /* Indirect calls in the normal 32bit ABI require all arguments
9622 to be passed in general registers. */
9623 || (!TARGET_PORTABLE_RUNTIME
9624 && !TARGET_64BIT
9625 && !TARGET_ELF32
9626 && cum->indirect)
9627 /* If the parameter is not a scalar floating-point parameter,
9628 then it belongs in GPRs. */
9629 || GET_MODE_CLASS (mode) != MODE_FLOAT
9630 /* Structure with single SFmode field belongs in GPR. */
9631 || (type && AGGREGATE_TYPE_P (type)))
9632 retval = gen_rtx_REG (mode, gpr_reg_base);
9633 else
9634 retval = gen_rtx_REG (mode, fpr_reg_base);
9636 return retval;
9639 /* Arguments larger than one word are double word aligned. */
9641 static unsigned int
9642 pa_function_arg_boundary (machine_mode mode, const_tree type)
9644 bool singleword = (type
9645 ? (integer_zerop (TYPE_SIZE (type))
9646 || !TREE_CONSTANT (TYPE_SIZE (type))
9647 || int_size_in_bytes (type) <= UNITS_PER_WORD)
9648 : GET_MODE_SIZE (mode) <= UNITS_PER_WORD);
9650 return singleword ? PARM_BOUNDARY : MAX_PARM_BOUNDARY;
9653 /* If this arg would be passed totally in registers or totally on the stack,
9654 then this routine should return zero. */
9656 static int
9657 pa_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
9658 tree type, bool named ATTRIBUTE_UNUSED)
9660 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9661 unsigned int max_arg_words = 8;
9662 unsigned int offset = 0;
9664 if (!TARGET_64BIT)
9665 return 0;
9667 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
9668 offset = 1;
9670 if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
9671 /* Arg fits fully into registers. */
9672 return 0;
9673 else if (cum->words + offset >= max_arg_words)
9674 /* Arg fully on the stack. */
9675 return 0;
9676 else
9677 /* Arg is split. */
9678 return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
9682 /* A get_unnamed_section callback for switching to the text section.
9684 This function is only used with SOM. Because we don't support
9685 named subspaces, we can only create a new subspace or switch back
9686 to the default text subspace. */
9688 static void
9689 som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED)
9691 gcc_assert (TARGET_SOM);
9692 if (TARGET_GAS)
9694 if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
9696 /* We only want to emit a .nsubspa directive once at the
9697 start of the function. */
9698 cfun->machine->in_nsubspa = 1;
9700 /* Create a new subspace for the text. This provides
9701 better stub placement and one-only functions. */
9702 if (cfun->decl
9703 && DECL_ONE_ONLY (cfun->decl)
9704 && !DECL_WEAK (cfun->decl))
9706 output_section_asm_op ("\t.SPACE $TEXT$\n"
9707 "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
9708 "ACCESS=44,SORT=24,COMDAT");
9709 return;
9712 else
9714 /* There isn't a current function or the body of the current
9715 function has been completed. So, we are changing to the
9716 text section to output debugging information. Thus, we
9717 need to forget that we are in the text section so that
9718 varasm.c will call us when text_section is selected again. */
9719 gcc_assert (!cfun || !cfun->machine
9720 || cfun->machine->in_nsubspa == 2);
9721 in_section = NULL;
9723 output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
9724 return;
9726 output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
9729 /* A get_unnamed_section callback for switching to comdat data
9730 sections. This function is only used with SOM. */
9732 static void
9733 som_output_comdat_data_section_asm_op (const void *data)
9735 in_section = NULL;
9736 output_section_asm_op (data);
9739 /* Implement TARGET_ASM_INITIALIZE_SECTIONS */
9741 static void
9742 pa_som_asm_init_sections (void)
9744 text_section
9745 = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
9747 /* SOM puts readonly data in the default $LIT$ subspace when PIC code
9748 is not being generated. */
9749 som_readonly_data_section
9750 = get_unnamed_section (0, output_section_asm_op,
9751 "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
9753 /* When secondary definitions are not supported, SOM makes readonly
9754 data one-only by creating a new $LIT$ subspace in $TEXT$ with
9755 the comdat flag. */
9756 som_one_only_readonly_data_section
9757 = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
9758 "\t.SPACE $TEXT$\n"
9759 "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
9760 "ACCESS=0x2c,SORT=16,COMDAT");
9763 /* When secondary definitions are not supported, SOM makes data one-only
9764 by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */
9765 som_one_only_data_section
9766 = get_unnamed_section (SECTION_WRITE,
9767 som_output_comdat_data_section_asm_op,
9768 "\t.SPACE $PRIVATE$\n"
9769 "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
9770 "ACCESS=31,SORT=24,COMDAT");
9772 if (flag_tm)
9773 som_tm_clone_table_section
9774 = get_unnamed_section (0, output_section_asm_op,
9775 "\t.SPACE $PRIVATE$\n\t.SUBSPA $TM_CLONE_TABLE$");
9777 /* FIXME: HPUX ld generates incorrect GOT entries for "T" fixups
9778 which reference data within the $TEXT$ space (for example constant
9779 strings in the $LIT$ subspace).
9781 The assemblers (GAS and HP as) both have problems with handling
9782 the difference of two symbols which is the other correct way to
9783 reference constant data during PIC code generation.
9785 So, there's no way to reference constant data which is in the
9786 $TEXT$ space during PIC generation. Instead place all constant
9787 data into the $PRIVATE$ subspace (this reduces sharing, but it
9788 works correctly). */
9789 readonly_data_section = flag_pic ? data_section : som_readonly_data_section;
9791 /* We must not have a reference to an external symbol defined in a
9792 shared library in a readonly section, else the SOM linker will
9793 complain.
9795 So, we force exception information into the data section. */
9796 exception_section = data_section;
9799 /* Implement TARGET_ASM_TM_CLONE_TABLE_SECTION. */
9801 static section *
9802 pa_som_tm_clone_table_section (void)
9804 return som_tm_clone_table_section;
9807 /* On hpux10, the linker will give an error if we have a reference
9808 in the read-only data section to a symbol defined in a shared
9809 library. Therefore, expressions that might require a reloc can
9810 not be placed in the read-only data section. */
9812 static section *
9813 pa_select_section (tree exp, int reloc,
9814 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
9816 if (TREE_CODE (exp) == VAR_DECL
9817 && TREE_READONLY (exp)
9818 && !TREE_THIS_VOLATILE (exp)
9819 && DECL_INITIAL (exp)
9820 && (DECL_INITIAL (exp) == error_mark_node
9821 || TREE_CONSTANT (DECL_INITIAL (exp)))
9822 && !reloc)
9824 if (TARGET_SOM
9825 && DECL_ONE_ONLY (exp)
9826 && !DECL_WEAK (exp))
9827 return som_one_only_readonly_data_section;
9828 else
9829 return readonly_data_section;
9831 else if (CONSTANT_CLASS_P (exp) && !reloc)
9832 return readonly_data_section;
9833 else if (TARGET_SOM
9834 && TREE_CODE (exp) == VAR_DECL
9835 && DECL_ONE_ONLY (exp)
9836 && !DECL_WEAK (exp))
9837 return som_one_only_data_section;
9838 else
9839 return data_section;
9842 /* Implement pa_reloc_rw_mask. */
9844 static int
9845 pa_reloc_rw_mask (void)
9847 /* We force (const (plus (symbol) (const_int))) to memory when the
9848 const_int doesn't fit in a 14-bit integer. The SOM linker can't
9849 handle this construct in read-only memory and we want to avoid
9850 this for ELF. So, we always force an RTX needing relocation to
9851 the data section. */
9852 return 3;
9855 static void
9856 pa_globalize_label (FILE *stream, const char *name)
9858 /* We only handle DATA objects here, functions are globalized in
9859 ASM_DECLARE_FUNCTION_NAME. */
9860 if (! FUNCTION_NAME_P (name))
9862 fputs ("\t.EXPORT ", stream);
9863 assemble_name (stream, name);
9864 fputs (",DATA\n", stream);
9868 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
9870 static rtx
9871 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
9872 int incoming ATTRIBUTE_UNUSED)
9874 return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
9877 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9879 bool
9880 pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9882 /* SOM ABI says that objects larger than 64 bits are returned in memory.
9883 PA64 ABI says that objects larger than 128 bits are returned in memory.
9884 Note, int_size_in_bytes can return -1 if the size of the object is
9885 variable or larger than the maximum value that can be expressed as
9886 a HOST_WIDE_INT. It can also return zero for an empty type. The
9887 simplest way to handle variable and empty types is to pass them in
9888 memory. This avoids problems in defining the boundaries of argument
9889 slots, allocating registers, etc. */
9890 return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
9891 || int_size_in_bytes (type) <= 0);
9894 /* Structure to hold declaration and name of external symbols that are
9895 emitted by GCC. We generate a vector of these symbols and output them
9896 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
9897 This avoids putting out names that are never really used. */
9899 typedef struct GTY(()) extern_symbol
9901 tree decl;
9902 const char *name;
9903 } extern_symbol;
9905 /* Define gc'd vector type for extern_symbol. */
9907 /* Vector of extern_symbol pointers. */
9908 static GTY(()) vec<extern_symbol, va_gc> *extern_symbols;
9910 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9911 /* Mark DECL (name NAME) as an external reference (assembler output
9912 file FILE). This saves the names to output at the end of the file
9913 if actually referenced. */
9915 void
9916 pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
9918 gcc_assert (file == asm_out_file);
9919 extern_symbol p = {decl, name};
9920 vec_safe_push (extern_symbols, p);
9923 /* Output text required at the end of an assembler file.
9924 This includes deferred plabels and .import directives for
9925 all external symbols that were actually referenced. */
9927 static void
9928 pa_hpux_file_end (void)
9930 unsigned int i;
9931 extern_symbol *p;
9933 if (!NO_DEFERRED_PROFILE_COUNTERS)
9934 output_deferred_profile_counters ();
9936 output_deferred_plabels ();
9938 for (i = 0; vec_safe_iterate (extern_symbols, i, &p); i++)
9940 tree decl = p->decl;
9942 if (!TREE_ASM_WRITTEN (decl)
9943 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
9944 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
9947 vec_free (extern_symbols);
9949 #endif
9951 /* Return true if a change from mode FROM to mode TO for a register
9952 in register class RCLASS is invalid. */
9954 bool
9955 pa_cannot_change_mode_class (machine_mode from, machine_mode to,
9956 enum reg_class rclass)
9958 if (from == to)
9959 return false;
9961 /* Reject changes to/from complex and vector modes. */
9962 if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from)
9963 || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to))
9964 return true;
9966 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to))
9967 return false;
9969 /* There is no way to load QImode or HImode values directly from
9970 memory. SImode loads to the FP registers are not zero extended.
9971 On the 64-bit target, this conflicts with the definition of
9972 LOAD_EXTEND_OP. Thus, we can't allow changing between modes
9973 with different sizes in the floating-point registers. */
9974 if (MAYBE_FP_REG_CLASS_P (rclass))
9975 return true;
9977 /* HARD_REGNO_MODE_OK places modes with sizes larger than a word
9978 in specific sets of registers. Thus, we cannot allow changing
9979 to a larger mode when it's larger than a word. */
9980 if (GET_MODE_SIZE (to) > UNITS_PER_WORD
9981 && GET_MODE_SIZE (to) > GET_MODE_SIZE (from))
9982 return true;
9984 return false;
9987 /* Returns TRUE if it is a good idea to tie two pseudo registers
9988 when one has mode MODE1 and one has mode MODE2.
9989 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
9990 for any hard reg, then this must be FALSE for correct output.
9992 We should return FALSE for QImode and HImode because these modes
9993 are not ok in the floating-point registers. However, this prevents
9994 tieing these modes to SImode and DImode in the general registers.
9995 So, this isn't a good idea. We rely on HARD_REGNO_MODE_OK and
9996 CANNOT_CHANGE_MODE_CLASS to prevent these modes from being used
9997 in the floating-point registers. */
9999 bool
10000 pa_modes_tieable_p (machine_mode mode1, machine_mode mode2)
10002 /* Don't tie modes in different classes. */
10003 if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2))
10004 return false;
10006 return true;
10010 /* Length in units of the trampoline instruction code. */
10012 #define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 32 : 40))
10015 /* Output assembler code for a block containing the constant parts
10016 of a trampoline, leaving space for the variable parts.\
10018 The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
10019 and then branches to the specified routine.
10021 This code template is copied from text segment to stack location
10022 and then patched with pa_trampoline_init to contain valid values,
10023 and then entered as a subroutine.
10025 It is best to keep this as small as possible to avoid having to
10026 flush multiple lines in the cache. */
10028 static void
10029 pa_asm_trampoline_template (FILE *f)
10031 if (!TARGET_64BIT)
10033 fputs ("\tldw 36(%r22),%r21\n", f);
10034 fputs ("\tbb,>=,n %r21,30,.+16\n", f);
10035 if (ASSEMBLER_DIALECT == 0)
10036 fputs ("\tdepi 0,31,2,%r21\n", f);
10037 else
10038 fputs ("\tdepwi 0,31,2,%r21\n", f);
10039 fputs ("\tldw 4(%r21),%r19\n", f);
10040 fputs ("\tldw 0(%r21),%r21\n", f);
10041 if (TARGET_PA_20)
10043 fputs ("\tbve (%r21)\n", f);
10044 fputs ("\tldw 40(%r22),%r29\n", f);
10045 fputs ("\t.word 0\n", f);
10046 fputs ("\t.word 0\n", f);
10048 else
10050 fputs ("\tldsid (%r21),%r1\n", f);
10051 fputs ("\tmtsp %r1,%sr0\n", f);
10052 fputs ("\tbe 0(%sr0,%r21)\n", f);
10053 fputs ("\tldw 40(%r22),%r29\n", f);
10055 fputs ("\t.word 0\n", f);
10056 fputs ("\t.word 0\n", f);
10057 fputs ("\t.word 0\n", f);
10058 fputs ("\t.word 0\n", f);
10060 else
10062 fputs ("\t.dword 0\n", f);
10063 fputs ("\t.dword 0\n", f);
10064 fputs ("\t.dword 0\n", f);
10065 fputs ("\t.dword 0\n", f);
10066 fputs ("\tmfia %r31\n", f);
10067 fputs ("\tldd 24(%r31),%r1\n", f);
10068 fputs ("\tldd 24(%r1),%r27\n", f);
10069 fputs ("\tldd 16(%r1),%r1\n", f);
10070 fputs ("\tbve (%r1)\n", f);
10071 fputs ("\tldd 32(%r31),%r31\n", f);
10072 fputs ("\t.dword 0 ; fptr\n", f);
10073 fputs ("\t.dword 0 ; static link\n", f);
10077 /* Emit RTL insns to initialize the variable parts of a trampoline.
10078 FNADDR is an RTX for the address of the function's pure code.
10079 CXT is an RTX for the static chain value for the function.
10081 Move the function address to the trampoline template at offset 36.
10082 Move the static chain value to trampoline template at offset 40.
10083 Move the trampoline address to trampoline template at offset 44.
10084 Move r19 to trampoline template at offset 48. The latter two
10085 words create a plabel for the indirect call to the trampoline.
10087 A similar sequence is used for the 64-bit port but the plabel is
10088 at the beginning of the trampoline.
10090 Finally, the cache entries for the trampoline code are flushed.
10091 This is necessary to ensure that the trampoline instruction sequence
10092 is written to memory prior to any attempts at prefetching the code
10093 sequence. */
10095 static void
10096 pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
10098 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10099 rtx start_addr = gen_reg_rtx (Pmode);
10100 rtx end_addr = gen_reg_rtx (Pmode);
10101 rtx line_length = gen_reg_rtx (Pmode);
10102 rtx r_tramp, tmp;
10104 emit_block_move (m_tramp, assemble_trampoline_template (),
10105 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
10106 r_tramp = force_reg (Pmode, XEXP (m_tramp, 0));
10108 if (!TARGET_64BIT)
10110 tmp = adjust_address (m_tramp, Pmode, 36);
10111 emit_move_insn (tmp, fnaddr);
10112 tmp = adjust_address (m_tramp, Pmode, 40);
10113 emit_move_insn (tmp, chain_value);
10115 /* Create a fat pointer for the trampoline. */
10116 tmp = adjust_address (m_tramp, Pmode, 44);
10117 emit_move_insn (tmp, r_tramp);
10118 tmp = adjust_address (m_tramp, Pmode, 48);
10119 emit_move_insn (tmp, gen_rtx_REG (Pmode, 19));
10121 /* fdc and fic only use registers for the address to flush,
10122 they do not accept integer displacements. We align the
10123 start and end addresses to the beginning of their respective
10124 cache lines to minimize the number of lines flushed. */
10125 emit_insn (gen_andsi3 (start_addr, r_tramp,
10126 GEN_INT (-MIN_CACHELINE_SIZE)));
10127 tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp,
10128 TRAMPOLINE_CODE_SIZE-1));
10129 emit_insn (gen_andsi3 (end_addr, tmp,
10130 GEN_INT (-MIN_CACHELINE_SIZE)));
10131 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10132 emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length));
10133 emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length,
10134 gen_reg_rtx (Pmode),
10135 gen_reg_rtx (Pmode)));
10137 else
10139 tmp = adjust_address (m_tramp, Pmode, 56);
10140 emit_move_insn (tmp, fnaddr);
10141 tmp = adjust_address (m_tramp, Pmode, 64);
10142 emit_move_insn (tmp, chain_value);
10144 /* Create a fat pointer for the trampoline. */
10145 tmp = adjust_address (m_tramp, Pmode, 16);
10146 emit_move_insn (tmp, force_reg (Pmode, plus_constant (Pmode,
10147 r_tramp, 32)));
10148 tmp = adjust_address (m_tramp, Pmode, 24);
10149 emit_move_insn (tmp, gen_rtx_REG (Pmode, 27));
10151 /* fdc and fic only use registers for the address to flush,
10152 they do not accept integer displacements. We align the
10153 start and end addresses to the beginning of their respective
10154 cache lines to minimize the number of lines flushed. */
10155 tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp, 32));
10156 emit_insn (gen_anddi3 (start_addr, tmp,
10157 GEN_INT (-MIN_CACHELINE_SIZE)));
10158 tmp = force_reg (Pmode, plus_constant (Pmode, tmp,
10159 TRAMPOLINE_CODE_SIZE - 1));
10160 emit_insn (gen_anddi3 (end_addr, tmp,
10161 GEN_INT (-MIN_CACHELINE_SIZE)));
10162 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10163 emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length));
10164 emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length,
10165 gen_reg_rtx (Pmode),
10166 gen_reg_rtx (Pmode)));
10169 #ifdef HAVE_ENABLE_EXECUTE_STACK
10170  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
10171      LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
10172 #endif
10175 /* Perform any machine-specific adjustment in the address of the trampoline.
10176 ADDR contains the address that was passed to pa_trampoline_init.
10177 Adjust the trampoline address to point to the plabel at offset 44. */
10179 static rtx
10180 pa_trampoline_adjust_address (rtx addr)
10182 if (!TARGET_64BIT)
10183 addr = memory_address (Pmode, plus_constant (Pmode, addr, 46));
10184 return addr;
10187 static rtx
10188 pa_delegitimize_address (rtx orig_x)
10190 rtx x = delegitimize_mem_from_attrs (orig_x);
10192 if (GET_CODE (x) == LO_SUM
10193 && GET_CODE (XEXP (x, 1)) == UNSPEC
10194 && XINT (XEXP (x, 1), 1) == UNSPEC_DLTIND14R)
10195 return gen_const_mem (Pmode, XVECEXP (XEXP (x, 1), 0, 0));
10196 return x;
10199 static rtx
10200 pa_internal_arg_pointer (void)
10202 /* The argument pointer and the hard frame pointer are the same in
10203 the 32-bit runtime, so we don't need a copy. */
10204 if (TARGET_64BIT)
10205 return copy_to_reg (virtual_incoming_args_rtx);
10206 else
10207 return virtual_incoming_args_rtx;
10210 /* Given FROM and TO register numbers, say whether this elimination is allowed.
10211 Frame pointer elimination is automatically handled. */
10213 static bool
10214 pa_can_eliminate (const int from, const int to)
10216 /* The argument cannot be eliminated in the 64-bit runtime. */
10217 if (TARGET_64BIT && from == ARG_POINTER_REGNUM)
10218 return false;
10220 return (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
10221 ? ! frame_pointer_needed
10222 : true);
10225 /* Define the offset between two registers, FROM to be eliminated and its
10226 replacement TO, at the start of a routine. */
10227 HOST_WIDE_INT
10228 pa_initial_elimination_offset (int from, int to)
10230 HOST_WIDE_INT offset;
10232 if ((from == HARD_FRAME_POINTER_REGNUM || from == FRAME_POINTER_REGNUM)
10233 && to == STACK_POINTER_REGNUM)
10234 offset = -pa_compute_frame_size (get_frame_size (), 0);
10235 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10236 offset = 0;
10237 else
10238 gcc_unreachable ();
10240 return offset;
10243 static void
10244 pa_conditional_register_usage (void)
10246 int i;
10248 if (!TARGET_64BIT && !TARGET_PA_11)
10250 for (i = 56; i <= FP_REG_LAST; i++)
10251 fixed_regs[i] = call_used_regs[i] = 1;
10252 for (i = 33; i < 56; i += 2)
10253 fixed_regs[i] = call_used_regs[i] = 1;
10255 if (TARGET_DISABLE_FPREGS || TARGET_SOFT_FLOAT)
10257 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
10258 fixed_regs[i] = call_used_regs[i] = 1;
10260 if (flag_pic)
10261 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
10264 /* Target hook for c_mode_for_suffix. */
10266 static machine_mode
10267 pa_c_mode_for_suffix (char suffix)
10269 if (HPUX_LONG_DOUBLE_LIBRARY)
10271 if (suffix == 'q')
10272 return TFmode;
10275 return VOIDmode;
10278 /* Target hook for function_section. */
10280 static section *
10281 pa_function_section (tree decl, enum node_frequency freq,
10282 bool startup, bool exit)
10284 /* Put functions in text section if target doesn't have named sections. */
10285 if (!targetm_common.have_named_sections)
10286 return text_section;
10288 /* Force nested functions into the same section as the containing
10289 function. */
10290 if (decl
10291 && DECL_SECTION_NAME (decl) == NULL
10292 && DECL_CONTEXT (decl) != NULL_TREE
10293 && TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL
10294 && DECL_SECTION_NAME (DECL_CONTEXT (decl)) == NULL)
10295 return function_section (DECL_CONTEXT (decl));
10297 /* Otherwise, use the default function section. */
10298 return default_function_section (decl, freq, startup, exit);
10301 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
10303 In 64-bit mode, we reject CONST_DOUBLES. We also reject CONST_INTS
10304 that need more than three instructions to load prior to reload. This
10305 limit is somewhat arbitrary. It takes three instructions to load a
10306 CONST_INT from memory but two are memory accesses. It may be better
10307 to increase the allowed range for CONST_INTS. We may also be able
10308 to handle CONST_DOUBLES. */
10310 static bool
10311 pa_legitimate_constant_p (machine_mode mode, rtx x)
10313 if (GET_MODE_CLASS (mode) == MODE_FLOAT && x != CONST0_RTX (mode))
10314 return false;
10316 if (!NEW_HP_ASSEMBLER && !TARGET_GAS && GET_CODE (x) == LABEL_REF)
10317 return false;
10319 /* TLS_MODEL_GLOBAL_DYNAMIC and TLS_MODEL_LOCAL_DYNAMIC are not
10320 legitimate constants. The other variants can't be handled by
10321 the move patterns after reload starts. */
10322 if (tls_referenced_p (x))
10323 return false;
10325 if (TARGET_64BIT && GET_CODE (x) == CONST_DOUBLE)
10326 return false;
10328 if (TARGET_64BIT
10329 && HOST_BITS_PER_WIDE_INT > 32
10330 && GET_CODE (x) == CONST_INT
10331 && !reload_in_progress
10332 && !reload_completed
10333 && !LEGITIMATE_64BIT_CONST_INT_P (INTVAL (x))
10334 && !pa_cint_ok_for_move (UINTVAL (x)))
10335 return false;
10337 if (function_label_operand (x, mode))
10338 return false;
10340 return true;
10343 /* Implement TARGET_SECTION_TYPE_FLAGS. */
10345 static unsigned int
10346 pa_section_type_flags (tree decl, const char *name, int reloc)
10348 unsigned int flags;
10350 flags = default_section_type_flags (decl, name, reloc);
10352 /* Function labels are placed in the constant pool. This can
10353 cause a section conflict if decls are put in ".data.rel.ro"
10354 or ".data.rel.ro.local" using the __attribute__ construct. */
10355 if (strcmp (name, ".data.rel.ro") == 0
10356 || strcmp (name, ".data.rel.ro.local") == 0)
10357 flags |= SECTION_WRITE | SECTION_RELRO;
10359 return flags;
10362 /* pa_legitimate_address_p recognizes an RTL expression that is a
10363 valid memory address for an instruction. The MODE argument is the
10364 machine mode for the MEM expression that wants to use this address.
10366 On HP PA-RISC, the legitimate address forms are REG+SMALLINT,
10367 REG+REG, and REG+(REG*SCALE). The indexed address forms are only
10368 available with floating point loads and stores, and integer loads.
10369 We get better code by allowing indexed addresses in the initial
10370 RTL generation.
10372 The acceptance of indexed addresses as legitimate implies that we
10373 must provide patterns for doing indexed integer stores, or the move
10374 expanders must force the address of an indexed store to a register.
10375 We have adopted the latter approach.
10377 Another function of pa_legitimate_address_p is to ensure that
10378 the base register is a valid pointer for indexed instructions.
10379 On targets that have non-equivalent space registers, we have to
10380 know at the time of assembler output which register in a REG+REG
10381 pair is the base register. The REG_POINTER flag is sometimes lost
10382 in reload and the following passes, so it can't be relied on during
10383 code generation. Thus, we either have to canonicalize the order
10384 of the registers in REG+REG indexed addresses, or treat REG+REG
10385 addresses separately and provide patterns for both permutations.
10387 The latter approach requires several hundred additional lines of
10388 code in pa.md. The downside to canonicalizing is that a PLUS
10389 in the wrong order can't combine to form to make a scaled indexed
10390 memory operand. As we won't need to canonicalize the operands if
10391 the REG_POINTER lossage can be fixed, it seems better canonicalize.
10393 We initially break out scaled indexed addresses in canonical order
10394 in pa_emit_move_sequence. LEGITIMIZE_ADDRESS also canonicalizes
10395 scaled indexed addresses during RTL generation. However, fold_rtx
10396 has its own opinion on how the operands of a PLUS should be ordered.
10397 If one of the operands is equivalent to a constant, it will make
10398 that operand the second operand. As the base register is likely to
10399 be equivalent to a SYMBOL_REF, we have made it the second operand.
10401 pa_legitimate_address_p accepts REG+REG as legitimate when the
10402 operands are in the order INDEX+BASE on targets with non-equivalent
10403 space registers, and in any order on targets with equivalent space
10404 registers. It accepts both MULT+BASE and BASE+MULT for scaled indexing.
10406 We treat a SYMBOL_REF as legitimate if it is part of the current
10407 function's constant-pool, because such addresses can actually be
10408 output as REG+SMALLINT. */
10410 static bool
10411 pa_legitimate_address_p (machine_mode mode, rtx x, bool strict)
10413 if ((REG_P (x)
10414 && (strict ? STRICT_REG_OK_FOR_BASE_P (x)
10415 : REG_OK_FOR_BASE_P (x)))
10416 || ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC
10417 || GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC)
10418 && REG_P (XEXP (x, 0))
10419 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10420 : REG_OK_FOR_BASE_P (XEXP (x, 0)))))
10421 return true;
10423 if (GET_CODE (x) == PLUS)
10425 rtx base, index;
10427 /* For REG+REG, the base register should be in XEXP (x, 1),
10428 so check it first. */
10429 if (REG_P (XEXP (x, 1))
10430 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 1))
10431 : REG_OK_FOR_BASE_P (XEXP (x, 1))))
10432 base = XEXP (x, 1), index = XEXP (x, 0);
10433 else if (REG_P (XEXP (x, 0))
10434 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10435 : REG_OK_FOR_BASE_P (XEXP (x, 0))))
10436 base = XEXP (x, 0), index = XEXP (x, 1);
10437 else
10438 return false;
10440 if (GET_CODE (index) == CONST_INT)
10442 if (INT_5_BITS (index))
10443 return true;
10445 /* When INT14_OK_STRICT is false, a secondary reload is needed
10446 to adjust the displacement of SImode and DImode floating point
10447 instructions but this may fail when the register also needs
10448 reloading. So, we return false when STRICT is true. We
10449 also reject long displacements for float mode addresses since
10450 the majority of accesses will use floating point instructions
10451 that don't support 14-bit offsets. */
10452 if (!INT14_OK_STRICT
10453 && (strict || !(reload_in_progress || reload_completed))
10454 && mode != QImode
10455 && mode != HImode)
10456 return false;
10458 return base14_operand (index, mode);
10461 if (!TARGET_DISABLE_INDEXING
10462 /* Only accept the "canonical" INDEX+BASE operand order
10463 on targets with non-equivalent space registers. */
10464 && (TARGET_NO_SPACE_REGS
10465 ? REG_P (index)
10466 : (base == XEXP (x, 1) && REG_P (index)
10467 && (reload_completed
10468 || (reload_in_progress && HARD_REGISTER_P (base))
10469 || REG_POINTER (base))
10470 && (reload_completed
10471 || (reload_in_progress && HARD_REGISTER_P (index))
10472 || !REG_POINTER (index))))
10473 && MODE_OK_FOR_UNSCALED_INDEXING_P (mode)
10474 && (strict ? STRICT_REG_OK_FOR_INDEX_P (index)
10475 : REG_OK_FOR_INDEX_P (index))
10476 && borx_reg_operand (base, Pmode)
10477 && borx_reg_operand (index, Pmode))
10478 return true;
10480 if (!TARGET_DISABLE_INDEXING
10481 && GET_CODE (index) == MULT
10482 && MODE_OK_FOR_SCALED_INDEXING_P (mode)
10483 && REG_P (XEXP (index, 0))
10484 && GET_MODE (XEXP (index, 0)) == Pmode
10485 && (strict ? STRICT_REG_OK_FOR_INDEX_P (XEXP (index, 0))
10486 : REG_OK_FOR_INDEX_P (XEXP (index, 0)))
10487 && GET_CODE (XEXP (index, 1)) == CONST_INT
10488 && INTVAL (XEXP (index, 1))
10489 == (HOST_WIDE_INT) GET_MODE_SIZE (mode)
10490 && borx_reg_operand (base, Pmode))
10491 return true;
10493 return false;
10496 if (GET_CODE (x) == LO_SUM)
10498 rtx y = XEXP (x, 0);
10500 if (GET_CODE (y) == SUBREG)
10501 y = SUBREG_REG (y);
10503 if (REG_P (y)
10504 && (strict ? STRICT_REG_OK_FOR_BASE_P (y)
10505 : REG_OK_FOR_BASE_P (y)))
10507 /* Needed for -fPIC */
10508 if (mode == Pmode
10509 && GET_CODE (XEXP (x, 1)) == UNSPEC)
10510 return true;
10512 if (!INT14_OK_STRICT
10513 && (strict || !(reload_in_progress || reload_completed))
10514 && mode != QImode
10515 && mode != HImode)
10516 return false;
10518 if (CONSTANT_P (XEXP (x, 1)))
10519 return true;
10521 return false;
10524 if (GET_CODE (x) == CONST_INT && INT_5_BITS (x))
10525 return true;
10527 return false;
10530 /* Look for machine dependent ways to make the invalid address AD a
10531 valid address.
10533 For the PA, transform:
10535 memory(X + <large int>)
10537 into:
10539 if (<large int> & mask) >= 16
10540 Y = (<large int> & ~mask) + mask + 1 Round up.
10541 else
10542 Y = (<large int> & ~mask) Round down.
10543 Z = X + Y
10544 memory (Z + (<large int> - Y));
10546 This makes reload inheritance and reload_cse work better since Z
10547 can be reused.
10549 There may be more opportunities to improve code with this hook. */
10552 pa_legitimize_reload_address (rtx ad, machine_mode mode,
10553 int opnum, int type,
10554 int ind_levels ATTRIBUTE_UNUSED)
10556 long offset, newoffset, mask;
10557 rtx new_rtx, temp = NULL_RTX;
10559 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
10560 && !INT14_OK_STRICT ? 0x1f : 0x3fff);
10562 if (optimize && GET_CODE (ad) == PLUS)
10563 temp = simplify_binary_operation (PLUS, Pmode,
10564 XEXP (ad, 0), XEXP (ad, 1));
10566 new_rtx = temp ? temp : ad;
10568 if (optimize
10569 && GET_CODE (new_rtx) == PLUS
10570 && GET_CODE (XEXP (new_rtx, 0)) == REG
10571 && GET_CODE (XEXP (new_rtx, 1)) == CONST_INT)
10573 offset = INTVAL (XEXP ((new_rtx), 1));
10575 /* Choose rounding direction. Round up if we are >= halfway. */
10576 if ((offset & mask) >= ((mask + 1) / 2))
10577 newoffset = (offset & ~mask) + mask + 1;
10578 else
10579 newoffset = offset & ~mask;
10581 /* Ensure that long displacements are aligned. */
10582 if (mask == 0x3fff
10583 && (GET_MODE_CLASS (mode) == MODE_FLOAT
10584 || (TARGET_64BIT && (mode) == DImode)))
10585 newoffset &= ~(GET_MODE_SIZE (mode) - 1);
10587 if (newoffset != 0 && VAL_14_BITS_P (newoffset))
10589 temp = gen_rtx_PLUS (Pmode, XEXP (new_rtx, 0),
10590 GEN_INT (newoffset));
10591 ad = gen_rtx_PLUS (Pmode, temp, GEN_INT (offset - newoffset));
10592 push_reload (XEXP (ad, 0), 0, &XEXP (ad, 0), 0,
10593 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
10594 opnum, (enum reload_type) type);
10595 return ad;
10599 return NULL_RTX;
10602 /* Output address vector. */
10604 void
10605 pa_output_addr_vec (rtx lab, rtx body)
10607 int idx, vlen = XVECLEN (body, 0);
10609 targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10610 if (TARGET_GAS)
10611 fputs ("\t.begin_brtab\n", asm_out_file);
10612 for (idx = 0; idx < vlen; idx++)
10614 ASM_OUTPUT_ADDR_VEC_ELT
10615 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
10617 if (TARGET_GAS)
10618 fputs ("\t.end_brtab\n", asm_out_file);
10621 /* Output address difference vector. */
10623 void
10624 pa_output_addr_diff_vec (rtx lab, rtx body)
10626 rtx base = XEXP (XEXP (body, 0), 0);
10627 int idx, vlen = XVECLEN (body, 1);
10629 targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10630 if (TARGET_GAS)
10631 fputs ("\t.begin_brtab\n", asm_out_file);
10632 for (idx = 0; idx < vlen; idx++)
10634 ASM_OUTPUT_ADDR_DIFF_ELT
10635 (asm_out_file,
10636 body,
10637 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
10638 CODE_LABEL_NUMBER (base));
10640 if (TARGET_GAS)
10641 fputs ("\t.end_brtab\n", asm_out_file);
10644 /* This is a helper function for the other atomic operations. This function
10645 emits a loop that contains SEQ that iterates until a compare-and-swap
10646 operation at the end succeeds. MEM is the memory to be modified. SEQ is
10647 a set of instructions that takes a value from OLD_REG as an input and
10648 produces a value in NEW_REG as an output. Before SEQ, OLD_REG will be
10649 set to the current contents of MEM. After SEQ, a compare-and-swap will
10650 attempt to update MEM with NEW_REG. The function returns true when the
10651 loop was generated successfully. */
10653 static bool
10654 pa_expand_compare_and_swap_loop (rtx mem, rtx old_reg, rtx new_reg, rtx seq)
10656 machine_mode mode = GET_MODE (mem);
10657 rtx_code_label *label;
10658 rtx cmp_reg, success, oldval;
10660 /* The loop we want to generate looks like
10662 cmp_reg = mem;
10663 label:
10664 old_reg = cmp_reg;
10665 seq;
10666 (success, cmp_reg) = compare-and-swap(mem, old_reg, new_reg)
10667 if (success)
10668 goto label;
10670 Note that we only do the plain load from memory once. Subsequent
10671 iterations use the value loaded by the compare-and-swap pattern. */
10673 label = gen_label_rtx ();
10674 cmp_reg = gen_reg_rtx (mode);
10676 emit_move_insn (cmp_reg, mem);
10677 emit_label (label);
10678 emit_move_insn (old_reg, cmp_reg);
10679 if (seq)
10680 emit_insn (seq);
10682 success = NULL_RTX;
10683 oldval = cmp_reg;
10684 if (!expand_atomic_compare_and_swap (&success, &oldval, mem, old_reg,
10685 new_reg, false, MEMMODEL_SYNC_SEQ_CST,
10686 MEMMODEL_RELAXED))
10687 return false;
10689 if (oldval != cmp_reg)
10690 emit_move_insn (cmp_reg, oldval);
10692 /* Mark this jump predicted not taken. */
10693 emit_cmp_and_jump_insns (success, const0_rtx, EQ, const0_rtx,
10694 GET_MODE (success), 1, label, 0);
10695 return true;
10698 /* This function tries to implement an atomic exchange operation using a
10699 compare_and_swap loop. VAL is written to *MEM. The previous contents of
10700 *MEM are returned, using TARGET if possible. No memory model is required
10701 since a compare_and_swap loop is seq-cst. */
10704 pa_maybe_emit_compare_and_swap_exchange_loop (rtx target, rtx mem, rtx val)
10706 machine_mode mode = GET_MODE (mem);
10708 if (can_compare_and_swap_p (mode, true))
10710 if (!target || !register_operand (target, mode))
10711 target = gen_reg_rtx (mode);
10712 if (pa_expand_compare_and_swap_loop (mem, target, val, NULL_RTX))
10713 return target;
10716 return NULL_RTX;
10719 #include "gt-pa.h"