PR sanitize/80932
[official-gcc.git] / gcc / config / pa / pa.c
blob3e65ba1db59d14bca40f221120dcc0d3735a9f21
1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992-2017 Free Software Foundation, Inc.
3 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "memmodel.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "df.h"
30 #include "tm_p.h"
31 #include "stringpool.h"
32 #include "optabs.h"
33 #include "regs.h"
34 #include "emit-rtl.h"
35 #include "recog.h"
36 #include "diagnostic-core.h"
37 #include "insn-attr.h"
38 #include "alias.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "varasm.h"
42 #include "calls.h"
43 #include "output.h"
44 #include "except.h"
45 #include "explow.h"
46 #include "expr.h"
47 #include "reload.h"
48 #include "common/common-target.h"
49 #include "langhooks.h"
50 #include "cfgrtl.h"
51 #include "opts.h"
52 #include "builtins.h"
54 /* This file should be included last. */
55 #include "target-def.h"
57 /* Return nonzero if there is a bypass for the output of
58 OUT_INSN and the fp store IN_INSN. */
59 int
60 pa_fpstore_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
62 machine_mode store_mode;
63 machine_mode other_mode;
64 rtx set;
66 if (recog_memoized (in_insn) < 0
67 || (get_attr_type (in_insn) != TYPE_FPSTORE
68 && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
69 || recog_memoized (out_insn) < 0)
70 return 0;
72 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
74 set = single_set (out_insn);
75 if (!set)
76 return 0;
78 other_mode = GET_MODE (SET_SRC (set));
80 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
84 #ifndef DO_FRAME_NOTES
85 #ifdef INCOMING_RETURN_ADDR_RTX
86 #define DO_FRAME_NOTES 1
87 #else
88 #define DO_FRAME_NOTES 0
89 #endif
90 #endif
92 static void pa_option_override (void);
93 static void copy_reg_pointer (rtx, rtx);
94 static void fix_range (const char *);
95 static int hppa_register_move_cost (machine_mode mode, reg_class_t,
96 reg_class_t);
97 static int hppa_address_cost (rtx, machine_mode mode, addr_space_t, bool);
98 static bool hppa_rtx_costs (rtx, machine_mode, int, int, int *, bool);
99 static inline rtx force_mode (machine_mode, rtx);
100 static void pa_reorg (void);
101 static void pa_combine_instructions (void);
102 static int pa_can_combine_p (rtx_insn *, rtx_insn *, rtx_insn *, int, rtx,
103 rtx, rtx);
104 static bool forward_branch_p (rtx_insn *);
105 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
106 static void compute_zdepdi_operands (unsigned HOST_WIDE_INT, unsigned *);
107 static int compute_movmem_length (rtx_insn *);
108 static int compute_clrmem_length (rtx_insn *);
109 static bool pa_assemble_integer (rtx, unsigned int, int);
110 static void remove_useless_addtr_insns (int);
111 static void store_reg (int, HOST_WIDE_INT, int);
112 static void store_reg_modify (int, int, HOST_WIDE_INT);
113 static void load_reg (int, HOST_WIDE_INT, int);
114 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
115 static rtx pa_function_value (const_tree, const_tree, bool);
116 static rtx pa_libcall_value (machine_mode, const_rtx);
117 static bool pa_function_value_regno_p (const unsigned int);
118 static void pa_output_function_prologue (FILE *, HOST_WIDE_INT);
119 static void update_total_code_bytes (unsigned int);
120 static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT);
121 static int pa_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
122 static int pa_adjust_priority (rtx_insn *, int);
123 static int pa_issue_rate (void);
124 static int pa_reloc_rw_mask (void);
125 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
126 static section *pa_som_tm_clone_table_section (void) ATTRIBUTE_UNUSED;
127 static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
128 ATTRIBUTE_UNUSED;
129 static void pa_encode_section_info (tree, rtx, int);
130 static const char *pa_strip_name_encoding (const char *);
131 static bool pa_function_ok_for_sibcall (tree, tree);
132 static void pa_globalize_label (FILE *, const char *)
133 ATTRIBUTE_UNUSED;
134 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
135 HOST_WIDE_INT, tree);
136 #if !defined(USE_COLLECT2)
137 static void pa_asm_out_constructor (rtx, int);
138 static void pa_asm_out_destructor (rtx, int);
139 #endif
140 static void pa_init_builtins (void);
141 static rtx pa_expand_builtin (tree, rtx, rtx, machine_mode mode, int);
142 static rtx hppa_builtin_saveregs (void);
143 static void hppa_va_start (tree, rtx);
144 static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
145 static bool pa_scalar_mode_supported_p (machine_mode);
146 static bool pa_commutative_p (const_rtx x, int outer_code);
147 static void copy_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
148 static int length_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
149 static rtx hppa_legitimize_address (rtx, rtx, machine_mode);
150 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
151 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
152 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
153 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
154 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
155 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
156 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
157 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
158 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
159 static void output_deferred_plabels (void);
160 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
161 #ifdef ASM_OUTPUT_EXTERNAL_REAL
162 static void pa_hpux_file_end (void);
163 #endif
164 static void pa_init_libfuncs (void);
165 static rtx pa_struct_value_rtx (tree, int);
166 static bool pa_pass_by_reference (cumulative_args_t, machine_mode,
167 const_tree, bool);
168 static int pa_arg_partial_bytes (cumulative_args_t, machine_mode,
169 tree, bool);
170 static void pa_function_arg_advance (cumulative_args_t, machine_mode,
171 const_tree, bool);
172 static rtx pa_function_arg (cumulative_args_t, machine_mode,
173 const_tree, bool);
174 static unsigned int pa_function_arg_boundary (machine_mode, const_tree);
175 static struct machine_function * pa_init_machine_status (void);
176 static reg_class_t pa_secondary_reload (bool, rtx, reg_class_t,
177 machine_mode,
178 secondary_reload_info *);
179 static void pa_extra_live_on_entry (bitmap);
180 static machine_mode pa_promote_function_mode (const_tree,
181 machine_mode, int *,
182 const_tree, int);
184 static void pa_asm_trampoline_template (FILE *);
185 static void pa_trampoline_init (rtx, tree, rtx);
186 static rtx pa_trampoline_adjust_address (rtx);
187 static rtx pa_delegitimize_address (rtx);
188 static bool pa_print_operand_punct_valid_p (unsigned char);
189 static rtx pa_internal_arg_pointer (void);
190 static bool pa_can_eliminate (const int, const int);
191 static void pa_conditional_register_usage (void);
192 static machine_mode pa_c_mode_for_suffix (char);
193 static section *pa_function_section (tree, enum node_frequency, bool, bool);
194 static bool pa_cannot_force_const_mem (machine_mode, rtx);
195 static bool pa_legitimate_constant_p (machine_mode, rtx);
196 static unsigned int pa_section_type_flags (tree, const char *, int);
197 static bool pa_legitimate_address_p (machine_mode, rtx, bool);
198 static bool pa_callee_copies (cumulative_args_t, machine_mode,
199 const_tree, bool);
201 /* The following extra sections are only used for SOM. */
202 static GTY(()) section *som_readonly_data_section;
203 static GTY(()) section *som_one_only_readonly_data_section;
204 static GTY(()) section *som_one_only_data_section;
205 static GTY(()) section *som_tm_clone_table_section;
207 /* Counts for the number of callee-saved general and floating point
208 registers which were saved by the current function's prologue. */
209 static int gr_saved, fr_saved;
211 /* Boolean indicating whether the return pointer was saved by the
212 current function's prologue. */
213 static bool rp_saved;
215 static rtx find_addr_reg (rtx);
217 /* Keep track of the number of bytes we have output in the CODE subspace
218 during this compilation so we'll know when to emit inline long-calls. */
219 unsigned long total_code_bytes;
221 /* The last address of the previous function plus the number of bytes in
222 associated thunks that have been output. This is used to determine if
223 a thunk can use an IA-relative branch to reach its target function. */
224 static unsigned int last_address;
226 /* Variables to handle plabels that we discover are necessary at assembly
227 output time. They are output after the current function. */
228 struct GTY(()) deferred_plabel
230 rtx internal_label;
231 rtx symbol;
233 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
234 deferred_plabels;
235 static size_t n_deferred_plabels = 0;
237 /* Initialize the GCC target structure. */
239 #undef TARGET_OPTION_OVERRIDE
240 #define TARGET_OPTION_OVERRIDE pa_option_override
242 #undef TARGET_ASM_ALIGNED_HI_OP
243 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
244 #undef TARGET_ASM_ALIGNED_SI_OP
245 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
246 #undef TARGET_ASM_ALIGNED_DI_OP
247 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
248 #undef TARGET_ASM_UNALIGNED_HI_OP
249 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
250 #undef TARGET_ASM_UNALIGNED_SI_OP
251 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
252 #undef TARGET_ASM_UNALIGNED_DI_OP
253 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
254 #undef TARGET_ASM_INTEGER
255 #define TARGET_ASM_INTEGER pa_assemble_integer
257 #undef TARGET_ASM_FUNCTION_PROLOGUE
258 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
259 #undef TARGET_ASM_FUNCTION_EPILOGUE
260 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
262 #undef TARGET_FUNCTION_VALUE
263 #define TARGET_FUNCTION_VALUE pa_function_value
264 #undef TARGET_LIBCALL_VALUE
265 #define TARGET_LIBCALL_VALUE pa_libcall_value
266 #undef TARGET_FUNCTION_VALUE_REGNO_P
267 #define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p
269 #undef TARGET_LEGITIMIZE_ADDRESS
270 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
272 #undef TARGET_SCHED_ADJUST_COST
273 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
274 #undef TARGET_SCHED_ADJUST_PRIORITY
275 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
276 #undef TARGET_SCHED_ISSUE_RATE
277 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
279 #undef TARGET_ENCODE_SECTION_INFO
280 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
281 #undef TARGET_STRIP_NAME_ENCODING
282 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
284 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
285 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
287 #undef TARGET_COMMUTATIVE_P
288 #define TARGET_COMMUTATIVE_P pa_commutative_p
290 #undef TARGET_ASM_OUTPUT_MI_THUNK
291 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
292 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
293 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
295 #undef TARGET_ASM_FILE_END
296 #ifdef ASM_OUTPUT_EXTERNAL_REAL
297 #define TARGET_ASM_FILE_END pa_hpux_file_end
298 #else
299 #define TARGET_ASM_FILE_END output_deferred_plabels
300 #endif
302 #undef TARGET_ASM_RELOC_RW_MASK
303 #define TARGET_ASM_RELOC_RW_MASK pa_reloc_rw_mask
305 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
306 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p
308 #if !defined(USE_COLLECT2)
309 #undef TARGET_ASM_CONSTRUCTOR
310 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
311 #undef TARGET_ASM_DESTRUCTOR
312 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
313 #endif
315 #undef TARGET_INIT_BUILTINS
316 #define TARGET_INIT_BUILTINS pa_init_builtins
318 #undef TARGET_EXPAND_BUILTIN
319 #define TARGET_EXPAND_BUILTIN pa_expand_builtin
321 #undef TARGET_REGISTER_MOVE_COST
322 #define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
323 #undef TARGET_RTX_COSTS
324 #define TARGET_RTX_COSTS hppa_rtx_costs
325 #undef TARGET_ADDRESS_COST
326 #define TARGET_ADDRESS_COST hppa_address_cost
328 #undef TARGET_MACHINE_DEPENDENT_REORG
329 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
331 #undef TARGET_INIT_LIBFUNCS
332 #define TARGET_INIT_LIBFUNCS pa_init_libfuncs
334 #undef TARGET_PROMOTE_FUNCTION_MODE
335 #define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
336 #undef TARGET_PROMOTE_PROTOTYPES
337 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
339 #undef TARGET_STRUCT_VALUE_RTX
340 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
341 #undef TARGET_RETURN_IN_MEMORY
342 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
343 #undef TARGET_MUST_PASS_IN_STACK
344 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
345 #undef TARGET_PASS_BY_REFERENCE
346 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
347 #undef TARGET_CALLEE_COPIES
348 #define TARGET_CALLEE_COPIES pa_callee_copies
349 #undef TARGET_ARG_PARTIAL_BYTES
350 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
351 #undef TARGET_FUNCTION_ARG
352 #define TARGET_FUNCTION_ARG pa_function_arg
353 #undef TARGET_FUNCTION_ARG_ADVANCE
354 #define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance
355 #undef TARGET_FUNCTION_ARG_BOUNDARY
356 #define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary
358 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
359 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
360 #undef TARGET_EXPAND_BUILTIN_VA_START
361 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
362 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
363 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
365 #undef TARGET_SCALAR_MODE_SUPPORTED_P
366 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
368 #undef TARGET_CANNOT_FORCE_CONST_MEM
369 #define TARGET_CANNOT_FORCE_CONST_MEM pa_cannot_force_const_mem
371 #undef TARGET_SECONDARY_RELOAD
372 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
374 #undef TARGET_EXTRA_LIVE_ON_ENTRY
375 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
377 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
378 #define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
379 #undef TARGET_TRAMPOLINE_INIT
380 #define TARGET_TRAMPOLINE_INIT pa_trampoline_init
381 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
382 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
383 #undef TARGET_DELEGITIMIZE_ADDRESS
384 #define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
385 #undef TARGET_INTERNAL_ARG_POINTER
386 #define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer
387 #undef TARGET_CAN_ELIMINATE
388 #define TARGET_CAN_ELIMINATE pa_can_eliminate
389 #undef TARGET_CONDITIONAL_REGISTER_USAGE
390 #define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage
391 #undef TARGET_C_MODE_FOR_SUFFIX
392 #define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix
393 #undef TARGET_ASM_FUNCTION_SECTION
394 #define TARGET_ASM_FUNCTION_SECTION pa_function_section
396 #undef TARGET_LEGITIMATE_CONSTANT_P
397 #define TARGET_LEGITIMATE_CONSTANT_P pa_legitimate_constant_p
398 #undef TARGET_SECTION_TYPE_FLAGS
399 #define TARGET_SECTION_TYPE_FLAGS pa_section_type_flags
400 #undef TARGET_LEGITIMATE_ADDRESS_P
401 #define TARGET_LEGITIMATE_ADDRESS_P pa_legitimate_address_p
403 #undef TARGET_LRA_P
404 #define TARGET_LRA_P hook_bool_void_false
406 struct gcc_target targetm = TARGET_INITIALIZER;
408 /* Parse the -mfixed-range= option string. */
410 static void
411 fix_range (const char *const_str)
413 int i, first, last;
414 char *str, *dash, *comma;
416 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
417 REG2 are either register names or register numbers. The effect
418 of this option is to mark the registers in the range from REG1 to
419 REG2 as ``fixed'' so they won't be used by the compiler. This is
420 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */
422 i = strlen (const_str);
423 str = (char *) alloca (i + 1);
424 memcpy (str, const_str, i + 1);
426 while (1)
428 dash = strchr (str, '-');
429 if (!dash)
431 warning (0, "value of -mfixed-range must have form REG1-REG2");
432 return;
434 *dash = '\0';
436 comma = strchr (dash + 1, ',');
437 if (comma)
438 *comma = '\0';
440 first = decode_reg_name (str);
441 if (first < 0)
443 warning (0, "unknown register name: %s", str);
444 return;
447 last = decode_reg_name (dash + 1);
448 if (last < 0)
450 warning (0, "unknown register name: %s", dash + 1);
451 return;
454 *dash = '-';
456 if (first > last)
458 warning (0, "%s-%s is an empty range", str, dash + 1);
459 return;
462 for (i = first; i <= last; ++i)
463 fixed_regs[i] = call_used_regs[i] = 1;
465 if (!comma)
466 break;
468 *comma = ',';
469 str = comma + 1;
472 /* Check if all floating point registers have been fixed. */
473 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
474 if (!fixed_regs[i])
475 break;
477 if (i > FP_REG_LAST)
478 target_flags |= MASK_DISABLE_FPREGS;
481 /* Implement the TARGET_OPTION_OVERRIDE hook. */
483 static void
484 pa_option_override (void)
486 unsigned int i;
487 cl_deferred_option *opt;
488 vec<cl_deferred_option> *v
489 = (vec<cl_deferred_option> *) pa_deferred_options;
491 if (v)
492 FOR_EACH_VEC_ELT (*v, i, opt)
494 switch (opt->opt_index)
496 case OPT_mfixed_range_:
497 fix_range (opt->arg);
498 break;
500 default:
501 gcc_unreachable ();
505 if (flag_pic && TARGET_PORTABLE_RUNTIME)
507 warning (0, "PIC code generation is not supported in the portable runtime model");
510 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
512 warning (0, "PIC code generation is not compatible with fast indirect calls");
515 if (! TARGET_GAS && write_symbols != NO_DEBUG)
517 warning (0, "-g is only supported when using GAS on this processor,");
518 warning (0, "-g option disabled");
519 write_symbols = NO_DEBUG;
522 /* We only support the "big PIC" model now. And we always generate PIC
523 code when in 64bit mode. */
524 if (flag_pic == 1 || TARGET_64BIT)
525 flag_pic = 2;
527 /* Disable -freorder-blocks-and-partition as we don't support hot and
528 cold partitioning. */
529 if (flag_reorder_blocks_and_partition)
531 inform (input_location,
532 "-freorder-blocks-and-partition does not work "
533 "on this architecture");
534 flag_reorder_blocks_and_partition = 0;
535 flag_reorder_blocks = 1;
538 /* We can't guarantee that .dword is available for 32-bit targets. */
539 if (UNITS_PER_WORD == 4)
540 targetm.asm_out.aligned_op.di = NULL;
542 /* The unaligned ops are only available when using GAS. */
543 if (!TARGET_GAS)
545 targetm.asm_out.unaligned_op.hi = NULL;
546 targetm.asm_out.unaligned_op.si = NULL;
547 targetm.asm_out.unaligned_op.di = NULL;
550 init_machine_status = pa_init_machine_status;
553 enum pa_builtins
555 PA_BUILTIN_COPYSIGNQ,
556 PA_BUILTIN_FABSQ,
557 PA_BUILTIN_INFQ,
558 PA_BUILTIN_HUGE_VALQ,
559 PA_BUILTIN_max
562 static GTY(()) tree pa_builtins[(int) PA_BUILTIN_max];
564 static void
565 pa_init_builtins (void)
567 #ifdef DONT_HAVE_FPUTC_UNLOCKED
569 tree decl = builtin_decl_explicit (BUILT_IN_PUTC_UNLOCKED);
570 set_builtin_decl (BUILT_IN_FPUTC_UNLOCKED, decl,
571 builtin_decl_implicit_p (BUILT_IN_PUTC_UNLOCKED));
573 #endif
574 #if TARGET_HPUX_11
576 tree decl;
578 if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
579 set_user_assembler_name (decl, "_Isfinite");
580 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
581 set_user_assembler_name (decl, "_Isfinitef");
583 #endif
585 if (HPUX_LONG_DOUBLE_LIBRARY)
587 tree decl, ftype;
589 /* Under HPUX, the __float128 type is a synonym for "long double". */
590 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
591 "__float128");
593 /* TFmode support builtins. */
594 ftype = build_function_type_list (long_double_type_node,
595 long_double_type_node,
596 NULL_TREE);
597 decl = add_builtin_function ("__builtin_fabsq", ftype,
598 PA_BUILTIN_FABSQ, BUILT_IN_MD,
599 "_U_Qfabs", NULL_TREE);
600 TREE_READONLY (decl) = 1;
601 pa_builtins[PA_BUILTIN_FABSQ] = decl;
603 ftype = build_function_type_list (long_double_type_node,
604 long_double_type_node,
605 long_double_type_node,
606 NULL_TREE);
607 decl = add_builtin_function ("__builtin_copysignq", ftype,
608 PA_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
609 "_U_Qfcopysign", NULL_TREE);
610 TREE_READONLY (decl) = 1;
611 pa_builtins[PA_BUILTIN_COPYSIGNQ] = decl;
613 ftype = build_function_type_list (long_double_type_node, NULL_TREE);
614 decl = add_builtin_function ("__builtin_infq", ftype,
615 PA_BUILTIN_INFQ, BUILT_IN_MD,
616 NULL, NULL_TREE);
617 pa_builtins[PA_BUILTIN_INFQ] = decl;
619 decl = add_builtin_function ("__builtin_huge_valq", ftype,
620 PA_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
621 NULL, NULL_TREE);
622 pa_builtins[PA_BUILTIN_HUGE_VALQ] = decl;
626 static rtx
627 pa_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
628 machine_mode mode ATTRIBUTE_UNUSED,
629 int ignore ATTRIBUTE_UNUSED)
631 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
632 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
634 switch (fcode)
636 case PA_BUILTIN_FABSQ:
637 case PA_BUILTIN_COPYSIGNQ:
638 return expand_call (exp, target, ignore);
640 case PA_BUILTIN_INFQ:
641 case PA_BUILTIN_HUGE_VALQ:
643 machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
644 REAL_VALUE_TYPE inf;
645 rtx tmp;
647 real_inf (&inf);
648 tmp = const_double_from_real_value (inf, target_mode);
650 tmp = validize_mem (force_const_mem (target_mode, tmp));
652 if (target == 0)
653 target = gen_reg_rtx (target_mode);
655 emit_move_insn (target, tmp);
656 return target;
659 default:
660 gcc_unreachable ();
663 return NULL_RTX;
666 /* Function to init struct machine_function.
667 This will be called, via a pointer variable,
668 from push_function_context. */
670 static struct machine_function *
671 pa_init_machine_status (void)
673 return ggc_cleared_alloc<machine_function> ();
676 /* If FROM is a probable pointer register, mark TO as a probable
677 pointer register with the same pointer alignment as FROM. */
679 static void
680 copy_reg_pointer (rtx to, rtx from)
682 if (REG_POINTER (from))
683 mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
686 /* Return 1 if X contains a symbolic expression. We know these
687 expressions will have one of a few well defined forms, so
688 we need only check those forms. */
690 pa_symbolic_expression_p (rtx x)
693 /* Strip off any HIGH. */
694 if (GET_CODE (x) == HIGH)
695 x = XEXP (x, 0);
697 return symbolic_operand (x, VOIDmode);
700 /* Accept any constant that can be moved in one instruction into a
701 general register. */
703 pa_cint_ok_for_move (unsigned HOST_WIDE_INT ival)
705 /* OK if ldo, ldil, or zdepi, can be used. */
706 return (VAL_14_BITS_P (ival)
707 || pa_ldil_cint_p (ival)
708 || pa_zdepi_cint_p (ival));
711 /* True iff ldil can be used to load this CONST_INT. The least
712 significant 11 bits of the value must be zero and the value must
713 not change sign when extended from 32 to 64 bits. */
715 pa_ldil_cint_p (unsigned HOST_WIDE_INT ival)
717 unsigned HOST_WIDE_INT x;
719 x = ival & (((unsigned HOST_WIDE_INT) -1 << 31) | 0x7ff);
720 return x == 0 || x == ((unsigned HOST_WIDE_INT) -1 << 31);
723 /* True iff zdepi can be used to generate this CONST_INT.
724 zdepi first sign extends a 5-bit signed number to a given field
725 length, then places this field anywhere in a zero. */
727 pa_zdepi_cint_p (unsigned HOST_WIDE_INT x)
729 unsigned HOST_WIDE_INT lsb_mask, t;
731 /* This might not be obvious, but it's at least fast.
732 This function is critical; we don't have the time loops would take. */
733 lsb_mask = x & -x;
734 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
735 /* Return true iff t is a power of two. */
736 return ((t & (t - 1)) == 0);
739 /* True iff depi or extru can be used to compute (reg & mask).
740 Accept bit pattern like these:
741 0....01....1
742 1....10....0
743 1..10..01..1 */
745 pa_and_mask_p (unsigned HOST_WIDE_INT mask)
747 mask = ~mask;
748 mask += mask & -mask;
749 return (mask & (mask - 1)) == 0;
752 /* True iff depi can be used to compute (reg | MASK). */
754 pa_ior_mask_p (unsigned HOST_WIDE_INT mask)
756 mask += mask & -mask;
757 return (mask & (mask - 1)) == 0;
760 /* Legitimize PIC addresses. If the address is already
761 position-independent, we return ORIG. Newly generated
762 position-independent addresses go to REG. If we need more
763 than one register, we lose. */
765 static rtx
766 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
768 rtx pic_ref = orig;
770 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
772 /* Labels need special handling. */
773 if (pic_label_operand (orig, mode))
775 rtx_insn *insn;
777 /* We do not want to go through the movXX expanders here since that
778 would create recursion.
780 Nor do we really want to call a generator for a named pattern
781 since that requires multiple patterns if we want to support
782 multiple word sizes.
784 So instead we just emit the raw set, which avoids the movXX
785 expanders completely. */
786 mark_reg_pointer (reg, BITS_PER_UNIT);
787 insn = emit_insn (gen_rtx_SET (reg, orig));
789 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
790 add_reg_note (insn, REG_EQUAL, orig);
792 /* During and after reload, we need to generate a REG_LABEL_OPERAND note
793 and update LABEL_NUSES because this is not done automatically. */
794 if (reload_in_progress || reload_completed)
796 /* Extract LABEL_REF. */
797 if (GET_CODE (orig) == CONST)
798 orig = XEXP (XEXP (orig, 0), 0);
799 /* Extract CODE_LABEL. */
800 orig = XEXP (orig, 0);
801 add_reg_note (insn, REG_LABEL_OPERAND, orig);
802 /* Make sure we have label and not a note. */
803 if (LABEL_P (orig))
804 LABEL_NUSES (orig)++;
806 crtl->uses_pic_offset_table = 1;
807 return reg;
809 if (GET_CODE (orig) == SYMBOL_REF)
811 rtx_insn *insn;
812 rtx tmp_reg;
814 gcc_assert (reg);
816 /* Before reload, allocate a temporary register for the intermediate
817 result. This allows the sequence to be deleted when the final
818 result is unused and the insns are trivially dead. */
819 tmp_reg = ((reload_in_progress || reload_completed)
820 ? reg : gen_reg_rtx (Pmode));
822 if (function_label_operand (orig, VOIDmode))
824 /* Force function label into memory in word mode. */
825 orig = XEXP (force_const_mem (word_mode, orig), 0);
826 /* Load plabel address from DLT. */
827 emit_move_insn (tmp_reg,
828 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
829 gen_rtx_HIGH (word_mode, orig)));
830 pic_ref
831 = gen_const_mem (Pmode,
832 gen_rtx_LO_SUM (Pmode, tmp_reg,
833 gen_rtx_UNSPEC (Pmode,
834 gen_rtvec (1, orig),
835 UNSPEC_DLTIND14R)));
836 emit_move_insn (reg, pic_ref);
837 /* Now load address of function descriptor. */
838 pic_ref = gen_rtx_MEM (Pmode, reg);
840 else
842 /* Load symbol reference from DLT. */
843 emit_move_insn (tmp_reg,
844 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
845 gen_rtx_HIGH (word_mode, orig)));
846 pic_ref
847 = gen_const_mem (Pmode,
848 gen_rtx_LO_SUM (Pmode, tmp_reg,
849 gen_rtx_UNSPEC (Pmode,
850 gen_rtvec (1, orig),
851 UNSPEC_DLTIND14R)));
854 crtl->uses_pic_offset_table = 1;
855 mark_reg_pointer (reg, BITS_PER_UNIT);
856 insn = emit_move_insn (reg, pic_ref);
858 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
859 set_unique_reg_note (insn, REG_EQUAL, orig);
861 return reg;
863 else if (GET_CODE (orig) == CONST)
865 rtx base;
867 if (GET_CODE (XEXP (orig, 0)) == PLUS
868 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
869 return orig;
871 gcc_assert (reg);
872 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
874 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
875 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
876 base == reg ? 0 : reg);
878 if (GET_CODE (orig) == CONST_INT)
880 if (INT_14_BITS (orig))
881 return plus_constant (Pmode, base, INTVAL (orig));
882 orig = force_reg (Pmode, orig);
884 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
885 /* Likewise, should we set special REG_NOTEs here? */
888 return pic_ref;
891 static GTY(()) rtx gen_tls_tga;
893 static rtx
894 gen_tls_get_addr (void)
896 if (!gen_tls_tga)
897 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
898 return gen_tls_tga;
901 static rtx
902 hppa_tls_call (rtx arg)
904 rtx ret;
906 ret = gen_reg_rtx (Pmode);
907 emit_library_call_value (gen_tls_get_addr (), ret,
908 LCT_CONST, Pmode, 1, arg, Pmode);
910 return ret;
913 static rtx
914 legitimize_tls_address (rtx addr)
916 rtx ret, tmp, t1, t2, tp;
917 rtx_insn *insn;
919 /* Currently, we can't handle anything but a SYMBOL_REF. */
920 if (GET_CODE (addr) != SYMBOL_REF)
921 return addr;
923 switch (SYMBOL_REF_TLS_MODEL (addr))
925 case TLS_MODEL_GLOBAL_DYNAMIC:
926 tmp = gen_reg_rtx (Pmode);
927 if (flag_pic)
928 emit_insn (gen_tgd_load_pic (tmp, addr));
929 else
930 emit_insn (gen_tgd_load (tmp, addr));
931 ret = hppa_tls_call (tmp);
932 break;
934 case TLS_MODEL_LOCAL_DYNAMIC:
935 ret = gen_reg_rtx (Pmode);
936 tmp = gen_reg_rtx (Pmode);
937 start_sequence ();
938 if (flag_pic)
939 emit_insn (gen_tld_load_pic (tmp, addr));
940 else
941 emit_insn (gen_tld_load (tmp, addr));
942 t1 = hppa_tls_call (tmp);
943 insn = get_insns ();
944 end_sequence ();
945 t2 = gen_reg_rtx (Pmode);
946 emit_libcall_block (insn, t2, t1,
947 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
948 UNSPEC_TLSLDBASE));
949 emit_insn (gen_tld_offset_load (ret, addr, t2));
950 break;
952 case TLS_MODEL_INITIAL_EXEC:
953 tp = gen_reg_rtx (Pmode);
954 tmp = gen_reg_rtx (Pmode);
955 ret = gen_reg_rtx (Pmode);
956 emit_insn (gen_tp_load (tp));
957 if (flag_pic)
958 emit_insn (gen_tie_load_pic (tmp, addr));
959 else
960 emit_insn (gen_tie_load (tmp, addr));
961 emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
962 break;
964 case TLS_MODEL_LOCAL_EXEC:
965 tp = gen_reg_rtx (Pmode);
966 ret = gen_reg_rtx (Pmode);
967 emit_insn (gen_tp_load (tp));
968 emit_insn (gen_tle_load (ret, addr, tp));
969 break;
971 default:
972 gcc_unreachable ();
975 return ret;
978 /* Helper for hppa_legitimize_address. Given X, return true if it
979 is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8.
981 This respectively represent canonical shift-add rtxs or scaled
982 memory addresses. */
983 static bool
984 mem_shadd_or_shadd_rtx_p (rtx x)
986 return ((GET_CODE (x) == ASHIFT
987 || GET_CODE (x) == MULT)
988 && GET_CODE (XEXP (x, 1)) == CONST_INT
989 && ((GET_CODE (x) == ASHIFT
990 && pa_shadd_constant_p (INTVAL (XEXP (x, 1))))
991 || (GET_CODE (x) == MULT
992 && pa_mem_shadd_constant_p (INTVAL (XEXP (x, 1))))));
995 /* Try machine-dependent ways of modifying an illegitimate address
996 to be legitimate. If we find one, return the new, valid address.
997 This macro is used in only one place: `memory_address' in explow.c.
999 OLDX is the address as it was before break_out_memory_refs was called.
1000 In some cases it is useful to look at this to decide what needs to be done.
1002 It is always safe for this macro to do nothing. It exists to recognize
1003 opportunities to optimize the output.
1005 For the PA, transform:
1007 memory(X + <large int>)
1009 into:
1011 if (<large int> & mask) >= 16
1012 Y = (<large int> & ~mask) + mask + 1 Round up.
1013 else
1014 Y = (<large int> & ~mask) Round down.
1015 Z = X + Y
1016 memory (Z + (<large int> - Y));
1018 This is for CSE to find several similar references, and only use one Z.
1020 X can either be a SYMBOL_REF or REG, but because combine cannot
1021 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
1022 D will not fit in 14 bits.
1024 MODE_FLOAT references allow displacements which fit in 5 bits, so use
1025 0x1f as the mask.
1027 MODE_INT references allow displacements which fit in 14 bits, so use
1028 0x3fff as the mask.
1030 This relies on the fact that most mode MODE_FLOAT references will use FP
1031 registers and most mode MODE_INT references will use integer registers.
1032 (In the rare case of an FP register used in an integer MODE, we depend
1033 on secondary reloads to clean things up.)
1036 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
1037 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
1038 addressing modes to be used).
1040 Note that the addresses passed into hppa_legitimize_address always
1041 come from a MEM, so we only have to match the MULT form on incoming
1042 addresses. But to be future proof we also match the ASHIFT form.
1044 However, this routine always places those shift-add sequences into
1045 registers, so we have to generate the ASHIFT form as our output.
1047 Put X and Z into registers. Then put the entire expression into
1048 a register. */
1051 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1052 machine_mode mode)
1054 rtx orig = x;
1056 /* We need to canonicalize the order of operands in unscaled indexed
1057 addresses since the code that checks if an address is valid doesn't
1058 always try both orders. */
1059 if (!TARGET_NO_SPACE_REGS
1060 && GET_CODE (x) == PLUS
1061 && GET_MODE (x) == Pmode
1062 && REG_P (XEXP (x, 0))
1063 && REG_P (XEXP (x, 1))
1064 && REG_POINTER (XEXP (x, 0))
1065 && !REG_POINTER (XEXP (x, 1)))
1066 return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
1068 if (tls_referenced_p (x))
1069 return legitimize_tls_address (x);
1070 else if (flag_pic)
1071 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
1073 /* Strip off CONST. */
1074 if (GET_CODE (x) == CONST)
1075 x = XEXP (x, 0);
1077 /* Special case. Get the SYMBOL_REF into a register and use indexing.
1078 That should always be safe. */
1079 if (GET_CODE (x) == PLUS
1080 && GET_CODE (XEXP (x, 0)) == REG
1081 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
1083 rtx reg = force_reg (Pmode, XEXP (x, 1));
1084 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
1087 /* Note we must reject symbols which represent function addresses
1088 since the assembler/linker can't handle arithmetic on plabels. */
1089 if (GET_CODE (x) == PLUS
1090 && GET_CODE (XEXP (x, 1)) == CONST_INT
1091 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
1092 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
1093 || GET_CODE (XEXP (x, 0)) == REG))
1095 rtx int_part, ptr_reg;
1096 int newoffset;
1097 int offset = INTVAL (XEXP (x, 1));
1098 int mask;
1100 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
1101 && !INT14_OK_STRICT ? 0x1f : 0x3fff);
1103 /* Choose which way to round the offset. Round up if we
1104 are >= halfway to the next boundary. */
1105 if ((offset & mask) >= ((mask + 1) / 2))
1106 newoffset = (offset & ~ mask) + mask + 1;
1107 else
1108 newoffset = (offset & ~ mask);
1110 /* If the newoffset will not fit in 14 bits (ldo), then
1111 handling this would take 4 or 5 instructions (2 to load
1112 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1113 add the new offset and the SYMBOL_REF.) Combine can
1114 not handle 4->2 or 5->2 combinations, so do not create
1115 them. */
1116 if (! VAL_14_BITS_P (newoffset)
1117 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1119 rtx const_part = plus_constant (Pmode, XEXP (x, 0), newoffset);
1120 rtx tmp_reg
1121 = force_reg (Pmode,
1122 gen_rtx_HIGH (Pmode, const_part));
1123 ptr_reg
1124 = force_reg (Pmode,
1125 gen_rtx_LO_SUM (Pmode,
1126 tmp_reg, const_part));
1128 else
1130 if (! VAL_14_BITS_P (newoffset))
1131 int_part = force_reg (Pmode, GEN_INT (newoffset));
1132 else
1133 int_part = GEN_INT (newoffset);
1135 ptr_reg = force_reg (Pmode,
1136 gen_rtx_PLUS (Pmode,
1137 force_reg (Pmode, XEXP (x, 0)),
1138 int_part));
1140 return plus_constant (Pmode, ptr_reg, offset - newoffset);
1143 /* Handle (plus (mult (a) (mem_shadd_constant)) (b)). */
1145 if (GET_CODE (x) == PLUS
1146 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1147 && (OBJECT_P (XEXP (x, 1))
1148 || GET_CODE (XEXP (x, 1)) == SUBREG)
1149 && GET_CODE (XEXP (x, 1)) != CONST)
1151 /* If we were given a MULT, we must fix the constant
1152 as we're going to create the ASHIFT form. */
1153 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1154 if (GET_CODE (XEXP (x, 0)) == MULT)
1155 shift_val = exact_log2 (shift_val);
1157 rtx reg1, reg2;
1158 reg1 = XEXP (x, 1);
1159 if (GET_CODE (reg1) != REG)
1160 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1162 reg2 = XEXP (XEXP (x, 0), 0);
1163 if (GET_CODE (reg2) != REG)
1164 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1166 return force_reg (Pmode,
1167 gen_rtx_PLUS (Pmode,
1168 gen_rtx_ASHIFT (Pmode, reg2,
1169 GEN_INT (shift_val)),
1170 reg1));
1173 /* Similarly for (plus (plus (mult (a) (mem_shadd_constant)) (b)) (c)).
1175 Only do so for floating point modes since this is more speculative
1176 and we lose if it's an integer store. */
1177 if (GET_CODE (x) == PLUS
1178 && GET_CODE (XEXP (x, 0)) == PLUS
1179 && mem_shadd_or_shadd_rtx_p (XEXP (XEXP (x, 0), 0))
1180 && (mode == SFmode || mode == DFmode))
1182 int shift_val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
1184 /* If we were given a MULT, we must fix the constant
1185 as we're going to create the ASHIFT form. */
1186 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
1187 shift_val = exact_log2 (shift_val);
1189 /* Try and figure out what to use as a base register. */
1190 rtx reg1, reg2, base, idx;
1192 reg1 = XEXP (XEXP (x, 0), 1);
1193 reg2 = XEXP (x, 1);
1194 base = NULL_RTX;
1195 idx = NULL_RTX;
1197 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1198 then pa_emit_move_sequence will turn on REG_POINTER so we'll know
1199 it's a base register below. */
1200 if (GET_CODE (reg1) != REG)
1201 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1203 if (GET_CODE (reg2) != REG)
1204 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1206 /* Figure out what the base and index are. */
1208 if (GET_CODE (reg1) == REG
1209 && REG_POINTER (reg1))
1211 base = reg1;
1212 idx = gen_rtx_PLUS (Pmode,
1213 gen_rtx_ASHIFT (Pmode,
1214 XEXP (XEXP (XEXP (x, 0), 0), 0),
1215 GEN_INT (shift_val)),
1216 XEXP (x, 1));
1218 else if (GET_CODE (reg2) == REG
1219 && REG_POINTER (reg2))
1221 base = reg2;
1222 idx = XEXP (x, 0);
1225 if (base == 0)
1226 return orig;
1228 /* If the index adds a large constant, try to scale the
1229 constant so that it can be loaded with only one insn. */
1230 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1231 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1232 / INTVAL (XEXP (XEXP (idx, 0), 1)))
1233 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1235 /* Divide the CONST_INT by the scale factor, then add it to A. */
1236 int val = INTVAL (XEXP (idx, 1));
1237 val /= (1 << shift_val);
1239 reg1 = XEXP (XEXP (idx, 0), 0);
1240 if (GET_CODE (reg1) != REG)
1241 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1243 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1245 /* We can now generate a simple scaled indexed address. */
1246 return
1247 force_reg
1248 (Pmode, gen_rtx_PLUS (Pmode,
1249 gen_rtx_ASHIFT (Pmode, reg1,
1250 GEN_INT (shift_val)),
1251 base));
1254 /* If B + C is still a valid base register, then add them. */
1255 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1256 && INTVAL (XEXP (idx, 1)) <= 4096
1257 && INTVAL (XEXP (idx, 1)) >= -4096)
1259 rtx reg1, reg2;
1261 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1263 reg2 = XEXP (XEXP (idx, 0), 0);
1264 if (GET_CODE (reg2) != CONST_INT)
1265 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1267 return force_reg (Pmode,
1268 gen_rtx_PLUS (Pmode,
1269 gen_rtx_ASHIFT (Pmode, reg2,
1270 GEN_INT (shift_val)),
1271 reg1));
1274 /* Get the index into a register, then add the base + index and
1275 return a register holding the result. */
1277 /* First get A into a register. */
1278 reg1 = XEXP (XEXP (idx, 0), 0);
1279 if (GET_CODE (reg1) != REG)
1280 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1282 /* And get B into a register. */
1283 reg2 = XEXP (idx, 1);
1284 if (GET_CODE (reg2) != REG)
1285 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1287 reg1 = force_reg (Pmode,
1288 gen_rtx_PLUS (Pmode,
1289 gen_rtx_ASHIFT (Pmode, reg1,
1290 GEN_INT (shift_val)),
1291 reg2));
1293 /* Add the result to our base register and return. */
1294 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1298 /* Uh-oh. We might have an address for x[n-100000]. This needs
1299 special handling to avoid creating an indexed memory address
1300 with x-100000 as the base.
1302 If the constant part is small enough, then it's still safe because
1303 there is a guard page at the beginning and end of the data segment.
1305 Scaled references are common enough that we want to try and rearrange the
1306 terms so that we can use indexing for these addresses too. Only
1307 do the optimization for floatint point modes. */
1309 if (GET_CODE (x) == PLUS
1310 && pa_symbolic_expression_p (XEXP (x, 1)))
1312 /* Ugly. We modify things here so that the address offset specified
1313 by the index expression is computed first, then added to x to form
1314 the entire address. */
1316 rtx regx1, regx2, regy1, regy2, y;
1318 /* Strip off any CONST. */
1319 y = XEXP (x, 1);
1320 if (GET_CODE (y) == CONST)
1321 y = XEXP (y, 0);
1323 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1325 /* See if this looks like
1326 (plus (mult (reg) (mem_shadd_const))
1327 (const (plus (symbol_ref) (const_int))))
1329 Where const_int is small. In that case the const
1330 expression is a valid pointer for indexing.
1332 If const_int is big, but can be divided evenly by shadd_const
1333 and added to (reg). This allows more scaled indexed addresses. */
1334 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1335 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1336 && GET_CODE (XEXP (y, 1)) == CONST_INT
1337 && INTVAL (XEXP (y, 1)) >= -4096
1338 && INTVAL (XEXP (y, 1)) <= 4095)
1340 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1342 /* If we were given a MULT, we must fix the constant
1343 as we're going to create the ASHIFT form. */
1344 if (GET_CODE (XEXP (x, 0)) == MULT)
1345 shift_val = exact_log2 (shift_val);
1347 rtx reg1, reg2;
1349 reg1 = XEXP (x, 1);
1350 if (GET_CODE (reg1) != REG)
1351 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1353 reg2 = XEXP (XEXP (x, 0), 0);
1354 if (GET_CODE (reg2) != REG)
1355 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1357 return
1358 force_reg (Pmode,
1359 gen_rtx_PLUS (Pmode,
1360 gen_rtx_ASHIFT (Pmode,
1361 reg2,
1362 GEN_INT (shift_val)),
1363 reg1));
1365 else if ((mode == DFmode || mode == SFmode)
1366 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1367 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1368 && GET_CODE (XEXP (y, 1)) == CONST_INT
1369 && INTVAL (XEXP (y, 1)) % (1 << INTVAL (XEXP (XEXP (x, 0), 1))) == 0)
1371 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1373 /* If we were given a MULT, we must fix the constant
1374 as we're going to create the ASHIFT form. */
1375 if (GET_CODE (XEXP (x, 0)) == MULT)
1376 shift_val = exact_log2 (shift_val);
1378 regx1
1379 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1380 / INTVAL (XEXP (XEXP (x, 0), 1))));
1381 regx2 = XEXP (XEXP (x, 0), 0);
1382 if (GET_CODE (regx2) != REG)
1383 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1384 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1385 regx2, regx1));
1386 return
1387 force_reg (Pmode,
1388 gen_rtx_PLUS (Pmode,
1389 gen_rtx_ASHIFT (Pmode, regx2,
1390 GEN_INT (shift_val)),
1391 force_reg (Pmode, XEXP (y, 0))));
1393 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1394 && INTVAL (XEXP (y, 1)) >= -4096
1395 && INTVAL (XEXP (y, 1)) <= 4095)
1397 /* This is safe because of the guard page at the
1398 beginning and end of the data space. Just
1399 return the original address. */
1400 return orig;
1402 else
1404 /* Doesn't look like one we can optimize. */
1405 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1406 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1407 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1408 regx1 = force_reg (Pmode,
1409 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1410 regx1, regy2));
1411 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1416 return orig;
1419 /* Implement the TARGET_REGISTER_MOVE_COST hook.
1421 Compute extra cost of moving data between one register class
1422 and another.
1424 Make moves from SAR so expensive they should never happen. We used to
1425 have 0xffff here, but that generates overflow in rare cases.
1427 Copies involving a FP register and a non-FP register are relatively
1428 expensive because they must go through memory.
1430 Other copies are reasonably cheap. */
1432 static int
1433 hppa_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
1434 reg_class_t from, reg_class_t to)
1436 if (from == SHIFT_REGS)
1437 return 0x100;
1438 else if (to == SHIFT_REGS && FP_REG_CLASS_P (from))
1439 return 18;
1440 else if ((FP_REG_CLASS_P (from) && ! FP_REG_CLASS_P (to))
1441 || (FP_REG_CLASS_P (to) && ! FP_REG_CLASS_P (from)))
1442 return 16;
1443 else
1444 return 2;
1447 /* For the HPPA, REG and REG+CONST is cost 0
1448 and addresses involving symbolic constants are cost 2.
1450 PIC addresses are very expensive.
1452 It is no coincidence that this has the same structure
1453 as pa_legitimate_address_p. */
1455 static int
1456 hppa_address_cost (rtx X, machine_mode mode ATTRIBUTE_UNUSED,
1457 addr_space_t as ATTRIBUTE_UNUSED,
1458 bool speed ATTRIBUTE_UNUSED)
1460 switch (GET_CODE (X))
1462 case REG:
1463 case PLUS:
1464 case LO_SUM:
1465 return 1;
1466 case HIGH:
1467 return 2;
1468 default:
1469 return 4;
1473 /* Compute a (partial) cost for rtx X. Return true if the complete
1474 cost has been computed, and false if subexpressions should be
1475 scanned. In either case, *TOTAL contains the cost result. */
1477 static bool
1478 hppa_rtx_costs (rtx x, machine_mode mode, int outer_code,
1479 int opno ATTRIBUTE_UNUSED,
1480 int *total, bool speed ATTRIBUTE_UNUSED)
1482 int factor;
1483 int code = GET_CODE (x);
1485 switch (code)
1487 case CONST_INT:
1488 if (INTVAL (x) == 0)
1489 *total = 0;
1490 else if (INT_14_BITS (x))
1491 *total = 1;
1492 else
1493 *total = 2;
1494 return true;
1496 case HIGH:
1497 *total = 2;
1498 return true;
1500 case CONST:
1501 case LABEL_REF:
1502 case SYMBOL_REF:
1503 *total = 4;
1504 return true;
1506 case CONST_DOUBLE:
1507 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1508 && outer_code != SET)
1509 *total = 0;
1510 else
1511 *total = 8;
1512 return true;
1514 case MULT:
1515 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1517 *total = COSTS_N_INSNS (3);
1518 return true;
1521 /* A mode size N times larger than SImode needs O(N*N) more insns. */
1522 factor = GET_MODE_SIZE (mode) / 4;
1523 if (factor == 0)
1524 factor = 1;
1526 if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1527 *total = factor * factor * COSTS_N_INSNS (8);
1528 else
1529 *total = factor * factor * COSTS_N_INSNS (20);
1530 return true;
1532 case DIV:
1533 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1535 *total = COSTS_N_INSNS (14);
1536 return true;
1538 /* FALLTHRU */
1540 case UDIV:
1541 case MOD:
1542 case UMOD:
1543 /* A mode size N times larger than SImode needs O(N*N) more insns. */
1544 factor = GET_MODE_SIZE (mode) / 4;
1545 if (factor == 0)
1546 factor = 1;
1548 *total = factor * factor * COSTS_N_INSNS (60);
1549 return true;
1551 case PLUS: /* this includes shNadd insns */
1552 case MINUS:
1553 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1555 *total = COSTS_N_INSNS (3);
1556 return true;
1559 /* A size N times larger than UNITS_PER_WORD needs N times as
1560 many insns, taking N times as long. */
1561 factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
1562 if (factor == 0)
1563 factor = 1;
1564 *total = factor * COSTS_N_INSNS (1);
1565 return true;
1567 case ASHIFT:
1568 case ASHIFTRT:
1569 case LSHIFTRT:
1570 *total = COSTS_N_INSNS (1);
1571 return true;
1573 default:
1574 return false;
1578 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1579 new rtx with the correct mode. */
1580 static inline rtx
1581 force_mode (machine_mode mode, rtx orig)
1583 if (mode == GET_MODE (orig))
1584 return orig;
1586 gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
1588 return gen_rtx_REG (mode, REGNO (orig));
1591 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
1593 static bool
1594 pa_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1596 return tls_referenced_p (x);
1599 /* Emit insns to move operands[1] into operands[0].
1601 Return 1 if we have written out everything that needs to be done to
1602 do the move. Otherwise, return 0 and the caller will emit the move
1603 normally.
1605 Note SCRATCH_REG may not be in the proper mode depending on how it
1606 will be used. This routine is responsible for creating a new copy
1607 of SCRATCH_REG in the proper mode. */
1610 pa_emit_move_sequence (rtx *operands, machine_mode mode, rtx scratch_reg)
1612 register rtx operand0 = operands[0];
1613 register rtx operand1 = operands[1];
1614 register rtx tem;
1616 /* We can only handle indexed addresses in the destination operand
1617 of floating point stores. Thus, we need to break out indexed
1618 addresses from the destination operand. */
1619 if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1621 gcc_assert (can_create_pseudo_p ());
1623 tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1624 operand0 = replace_equiv_address (operand0, tem);
1627 /* On targets with non-equivalent space registers, break out unscaled
1628 indexed addresses from the source operand before the final CSE.
1629 We have to do this because the REG_POINTER flag is not correctly
1630 carried through various optimization passes and CSE may substitute
1631 a pseudo without the pointer set for one with the pointer set. As
1632 a result, we loose various opportunities to create insns with
1633 unscaled indexed addresses. */
1634 if (!TARGET_NO_SPACE_REGS
1635 && !cse_not_expected
1636 && GET_CODE (operand1) == MEM
1637 && GET_CODE (XEXP (operand1, 0)) == PLUS
1638 && REG_P (XEXP (XEXP (operand1, 0), 0))
1639 && REG_P (XEXP (XEXP (operand1, 0), 1)))
1640 operand1
1641 = replace_equiv_address (operand1,
1642 copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1644 if (scratch_reg
1645 && reload_in_progress && GET_CODE (operand0) == REG
1646 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1647 operand0 = reg_equiv_mem (REGNO (operand0));
1648 else if (scratch_reg
1649 && reload_in_progress && GET_CODE (operand0) == SUBREG
1650 && GET_CODE (SUBREG_REG (operand0)) == REG
1651 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1653 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1654 the code which tracks sets/uses for delete_output_reload. */
1655 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1656 reg_equiv_mem (REGNO (SUBREG_REG (operand0))),
1657 SUBREG_BYTE (operand0));
1658 operand0 = alter_subreg (&temp, true);
1661 if (scratch_reg
1662 && reload_in_progress && GET_CODE (operand1) == REG
1663 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1664 operand1 = reg_equiv_mem (REGNO (operand1));
1665 else if (scratch_reg
1666 && reload_in_progress && GET_CODE (operand1) == SUBREG
1667 && GET_CODE (SUBREG_REG (operand1)) == REG
1668 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1670 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1671 the code which tracks sets/uses for delete_output_reload. */
1672 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1673 reg_equiv_mem (REGNO (SUBREG_REG (operand1))),
1674 SUBREG_BYTE (operand1));
1675 operand1 = alter_subreg (&temp, true);
1678 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1679 && ((tem = find_replacement (&XEXP (operand0, 0)))
1680 != XEXP (operand0, 0)))
1681 operand0 = replace_equiv_address (operand0, tem);
1683 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1684 && ((tem = find_replacement (&XEXP (operand1, 0)))
1685 != XEXP (operand1, 0)))
1686 operand1 = replace_equiv_address (operand1, tem);
1688 /* Handle secondary reloads for loads/stores of FP registers from
1689 REG+D addresses where D does not fit in 5 or 14 bits, including
1690 (subreg (mem (addr))) cases, and reloads for other unsupported
1691 memory operands. */
1692 if (scratch_reg
1693 && FP_REG_P (operand0)
1694 && (MEM_P (operand1)
1695 || (GET_CODE (operand1) == SUBREG
1696 && MEM_P (XEXP (operand1, 0)))))
1698 rtx op1 = operand1;
1700 if (GET_CODE (op1) == SUBREG)
1701 op1 = XEXP (op1, 0);
1703 if (reg_plus_base_memory_operand (op1, GET_MODE (op1)))
1705 if (!(TARGET_PA_20
1706 && !TARGET_ELF32
1707 && INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
1708 && !INT_5_BITS (XEXP (XEXP (op1, 0), 1)))
1710 /* SCRATCH_REG will hold an address and maybe the actual data.
1711 We want it in WORD_MODE regardless of what mode it was
1712 originally given to us. */
1713 scratch_reg = force_mode (word_mode, scratch_reg);
1715 /* D might not fit in 14 bits either; for such cases load D
1716 into scratch reg. */
1717 if (!INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
1719 emit_move_insn (scratch_reg, XEXP (XEXP (op1, 0), 1));
1720 emit_move_insn (scratch_reg,
1721 gen_rtx_fmt_ee (GET_CODE (XEXP (op1, 0)),
1722 Pmode,
1723 XEXP (XEXP (op1, 0), 0),
1724 scratch_reg));
1726 else
1727 emit_move_insn (scratch_reg, XEXP (op1, 0));
1728 emit_insn (gen_rtx_SET (operand0,
1729 replace_equiv_address (op1, scratch_reg)));
1730 return 1;
1733 else if ((!INT14_OK_STRICT && symbolic_memory_operand (op1, VOIDmode))
1734 || IS_LO_SUM_DLT_ADDR_P (XEXP (op1, 0))
1735 || IS_INDEX_ADDR_P (XEXP (op1, 0)))
1737 /* Load memory address into SCRATCH_REG. */
1738 scratch_reg = force_mode (word_mode, scratch_reg);
1739 emit_move_insn (scratch_reg, XEXP (op1, 0));
1740 emit_insn (gen_rtx_SET (operand0,
1741 replace_equiv_address (op1, scratch_reg)));
1742 return 1;
1745 else if (scratch_reg
1746 && FP_REG_P (operand1)
1747 && (MEM_P (operand0)
1748 || (GET_CODE (operand0) == SUBREG
1749 && MEM_P (XEXP (operand0, 0)))))
1751 rtx op0 = operand0;
1753 if (GET_CODE (op0) == SUBREG)
1754 op0 = XEXP (op0, 0);
1756 if (reg_plus_base_memory_operand (op0, GET_MODE (op0)))
1758 if (!(TARGET_PA_20
1759 && !TARGET_ELF32
1760 && INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
1761 && !INT_5_BITS (XEXP (XEXP (op0, 0), 1)))
1763 /* SCRATCH_REG will hold an address and maybe the actual data.
1764 We want it in WORD_MODE regardless of what mode it was
1765 originally given to us. */
1766 scratch_reg = force_mode (word_mode, scratch_reg);
1768 /* D might not fit in 14 bits either; for such cases load D
1769 into scratch reg. */
1770 if (!INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
1772 emit_move_insn (scratch_reg, XEXP (XEXP (op0, 0), 1));
1773 emit_move_insn (scratch_reg,
1774 gen_rtx_fmt_ee (GET_CODE (XEXP (op0, 0)),
1775 Pmode,
1776 XEXP (XEXP (op0, 0), 0),
1777 scratch_reg));
1779 else
1780 emit_move_insn (scratch_reg, XEXP (op0, 0));
1781 emit_insn (gen_rtx_SET (replace_equiv_address (op0, scratch_reg),
1782 operand1));
1783 return 1;
1786 else if ((!INT14_OK_STRICT && symbolic_memory_operand (op0, VOIDmode))
1787 || IS_LO_SUM_DLT_ADDR_P (XEXP (op0, 0))
1788 || IS_INDEX_ADDR_P (XEXP (op0, 0)))
1790 /* Load memory address into SCRATCH_REG. */
1791 scratch_reg = force_mode (word_mode, scratch_reg);
1792 emit_move_insn (scratch_reg, XEXP (op0, 0));
1793 emit_insn (gen_rtx_SET (replace_equiv_address (op0, scratch_reg),
1794 operand1));
1795 return 1;
1798 /* Handle secondary reloads for loads of FP registers from constant
1799 expressions by forcing the constant into memory. For the most part,
1800 this is only necessary for SImode and DImode.
1802 Use scratch_reg to hold the address of the memory location. */
1803 else if (scratch_reg
1804 && CONSTANT_P (operand1)
1805 && FP_REG_P (operand0))
1807 rtx const_mem, xoperands[2];
1809 if (operand1 == CONST0_RTX (mode))
1811 emit_insn (gen_rtx_SET (operand0, operand1));
1812 return 1;
1815 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1816 it in WORD_MODE regardless of what mode it was originally given
1817 to us. */
1818 scratch_reg = force_mode (word_mode, scratch_reg);
1820 /* Force the constant into memory and put the address of the
1821 memory location into scratch_reg. */
1822 const_mem = force_const_mem (mode, operand1);
1823 xoperands[0] = scratch_reg;
1824 xoperands[1] = XEXP (const_mem, 0);
1825 pa_emit_move_sequence (xoperands, Pmode, 0);
1827 /* Now load the destination register. */
1828 emit_insn (gen_rtx_SET (operand0,
1829 replace_equiv_address (const_mem, scratch_reg)));
1830 return 1;
1832 /* Handle secondary reloads for SAR. These occur when trying to load
1833 the SAR from memory or a constant. */
1834 else if (scratch_reg
1835 && GET_CODE (operand0) == REG
1836 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1837 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1838 && (GET_CODE (operand1) == MEM || GET_CODE (operand1) == CONST_INT))
1840 /* D might not fit in 14 bits either; for such cases load D into
1841 scratch reg. */
1842 if (GET_CODE (operand1) == MEM
1843 && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0)))
1845 /* We are reloading the address into the scratch register, so we
1846 want to make sure the scratch register is a full register. */
1847 scratch_reg = force_mode (word_mode, scratch_reg);
1849 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1850 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1851 0)),
1852 Pmode,
1853 XEXP (XEXP (operand1, 0),
1855 scratch_reg));
1857 /* Now we are going to load the scratch register from memory,
1858 we want to load it in the same width as the original MEM,
1859 which must be the same as the width of the ultimate destination,
1860 OPERAND0. */
1861 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1863 emit_move_insn (scratch_reg,
1864 replace_equiv_address (operand1, scratch_reg));
1866 else
1868 /* We want to load the scratch register using the same mode as
1869 the ultimate destination. */
1870 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1872 emit_move_insn (scratch_reg, operand1);
1875 /* And emit the insn to set the ultimate destination. We know that
1876 the scratch register has the same mode as the destination at this
1877 point. */
1878 emit_move_insn (operand0, scratch_reg);
1879 return 1;
1882 /* Handle the most common case: storing into a register. */
1883 if (register_operand (operand0, mode))
1885 /* Legitimize TLS symbol references. This happens for references
1886 that aren't a legitimate constant. */
1887 if (PA_SYMBOL_REF_TLS_P (operand1))
1888 operand1 = legitimize_tls_address (operand1);
1890 if (register_operand (operand1, mode)
1891 || (GET_CODE (operand1) == CONST_INT
1892 && pa_cint_ok_for_move (UINTVAL (operand1)))
1893 || (operand1 == CONST0_RTX (mode))
1894 || (GET_CODE (operand1) == HIGH
1895 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1896 /* Only `general_operands' can come here, so MEM is ok. */
1897 || GET_CODE (operand1) == MEM)
1899 /* Various sets are created during RTL generation which don't
1900 have the REG_POINTER flag correctly set. After the CSE pass,
1901 instruction recognition can fail if we don't consistently
1902 set this flag when performing register copies. This should
1903 also improve the opportunities for creating insns that use
1904 unscaled indexing. */
1905 if (REG_P (operand0) && REG_P (operand1))
1907 if (REG_POINTER (operand1)
1908 && !REG_POINTER (operand0)
1909 && !HARD_REGISTER_P (operand0))
1910 copy_reg_pointer (operand0, operand1);
1913 /* When MEMs are broken out, the REG_POINTER flag doesn't
1914 get set. In some cases, we can set the REG_POINTER flag
1915 from the declaration for the MEM. */
1916 if (REG_P (operand0)
1917 && GET_CODE (operand1) == MEM
1918 && !REG_POINTER (operand0))
1920 tree decl = MEM_EXPR (operand1);
1922 /* Set the register pointer flag and register alignment
1923 if the declaration for this memory reference is a
1924 pointer type. */
1925 if (decl)
1927 tree type;
1929 /* If this is a COMPONENT_REF, use the FIELD_DECL from
1930 tree operand 1. */
1931 if (TREE_CODE (decl) == COMPONENT_REF)
1932 decl = TREE_OPERAND (decl, 1);
1934 type = TREE_TYPE (decl);
1935 type = strip_array_types (type);
1937 if (POINTER_TYPE_P (type))
1938 mark_reg_pointer (operand0, BITS_PER_UNIT);
1942 emit_insn (gen_rtx_SET (operand0, operand1));
1943 return 1;
1946 else if (GET_CODE (operand0) == MEM)
1948 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1949 && !(reload_in_progress || reload_completed))
1951 rtx temp = gen_reg_rtx (DFmode);
1953 emit_insn (gen_rtx_SET (temp, operand1));
1954 emit_insn (gen_rtx_SET (operand0, temp));
1955 return 1;
1957 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1959 /* Run this case quickly. */
1960 emit_insn (gen_rtx_SET (operand0, operand1));
1961 return 1;
1963 if (! (reload_in_progress || reload_completed))
1965 operands[0] = validize_mem (operand0);
1966 operands[1] = operand1 = force_reg (mode, operand1);
1970 /* Simplify the source if we need to.
1971 Note we do have to handle function labels here, even though we do
1972 not consider them legitimate constants. Loop optimizations can
1973 call the emit_move_xxx with one as a source. */
1974 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1975 || (GET_CODE (operand1) == HIGH
1976 && symbolic_operand (XEXP (operand1, 0), mode))
1977 || function_label_operand (operand1, VOIDmode)
1978 || tls_referenced_p (operand1))
1980 int ishighonly = 0;
1982 if (GET_CODE (operand1) == HIGH)
1984 ishighonly = 1;
1985 operand1 = XEXP (operand1, 0);
1987 if (symbolic_operand (operand1, mode))
1989 /* Argh. The assembler and linker can't handle arithmetic
1990 involving plabels.
1992 So we force the plabel into memory, load operand0 from
1993 the memory location, then add in the constant part. */
1994 if ((GET_CODE (operand1) == CONST
1995 && GET_CODE (XEXP (operand1, 0)) == PLUS
1996 && function_label_operand (XEXP (XEXP (operand1, 0), 0),
1997 VOIDmode))
1998 || function_label_operand (operand1, VOIDmode))
2000 rtx temp, const_part;
2002 /* Figure out what (if any) scratch register to use. */
2003 if (reload_in_progress || reload_completed)
2005 scratch_reg = scratch_reg ? scratch_reg : operand0;
2006 /* SCRATCH_REG will hold an address and maybe the actual
2007 data. We want it in WORD_MODE regardless of what mode it
2008 was originally given to us. */
2009 scratch_reg = force_mode (word_mode, scratch_reg);
2011 else if (flag_pic)
2012 scratch_reg = gen_reg_rtx (Pmode);
2014 if (GET_CODE (operand1) == CONST)
2016 /* Save away the constant part of the expression. */
2017 const_part = XEXP (XEXP (operand1, 0), 1);
2018 gcc_assert (GET_CODE (const_part) == CONST_INT);
2020 /* Force the function label into memory. */
2021 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
2023 else
2025 /* No constant part. */
2026 const_part = NULL_RTX;
2028 /* Force the function label into memory. */
2029 temp = force_const_mem (mode, operand1);
2033 /* Get the address of the memory location. PIC-ify it if
2034 necessary. */
2035 temp = XEXP (temp, 0);
2036 if (flag_pic)
2037 temp = legitimize_pic_address (temp, mode, scratch_reg);
2039 /* Put the address of the memory location into our destination
2040 register. */
2041 operands[1] = temp;
2042 pa_emit_move_sequence (operands, mode, scratch_reg);
2044 /* Now load from the memory location into our destination
2045 register. */
2046 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
2047 pa_emit_move_sequence (operands, mode, scratch_reg);
2049 /* And add back in the constant part. */
2050 if (const_part != NULL_RTX)
2051 expand_inc (operand0, const_part);
2053 return 1;
2056 if (flag_pic)
2058 rtx_insn *insn;
2059 rtx temp;
2061 if (reload_in_progress || reload_completed)
2063 temp = scratch_reg ? scratch_reg : operand0;
2064 /* TEMP will hold an address and maybe the actual
2065 data. We want it in WORD_MODE regardless of what mode it
2066 was originally given to us. */
2067 temp = force_mode (word_mode, temp);
2069 else
2070 temp = gen_reg_rtx (Pmode);
2072 /* Force (const (plus (symbol) (const_int))) to memory
2073 if the const_int will not fit in 14 bits. Although
2074 this requires a relocation, the instruction sequence
2075 needed to load the value is shorter. */
2076 if (GET_CODE (operand1) == CONST
2077 && GET_CODE (XEXP (operand1, 0)) == PLUS
2078 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
2079 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1)))
2081 rtx x, m = force_const_mem (mode, operand1);
2083 x = legitimize_pic_address (XEXP (m, 0), mode, temp);
2084 x = replace_equiv_address (m, x);
2085 insn = emit_move_insn (operand0, x);
2087 else
2089 operands[1] = legitimize_pic_address (operand1, mode, temp);
2090 if (REG_P (operand0) && REG_P (operands[1]))
2091 copy_reg_pointer (operand0, operands[1]);
2092 insn = emit_move_insn (operand0, operands[1]);
2095 /* Put a REG_EQUAL note on this insn. */
2096 set_unique_reg_note (insn, REG_EQUAL, operand1);
2098 /* On the HPPA, references to data space are supposed to use dp,
2099 register 27, but showing it in the RTL inhibits various cse
2100 and loop optimizations. */
2101 else
2103 rtx temp, set;
2105 if (reload_in_progress || reload_completed)
2107 temp = scratch_reg ? scratch_reg : operand0;
2108 /* TEMP will hold an address and maybe the actual
2109 data. We want it in WORD_MODE regardless of what mode it
2110 was originally given to us. */
2111 temp = force_mode (word_mode, temp);
2113 else
2114 temp = gen_reg_rtx (mode);
2116 /* Loading a SYMBOL_REF into a register makes that register
2117 safe to be used as the base in an indexed address.
2119 Don't mark hard registers though. That loses. */
2120 if (GET_CODE (operand0) == REG
2121 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
2122 mark_reg_pointer (operand0, BITS_PER_UNIT);
2123 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
2124 mark_reg_pointer (temp, BITS_PER_UNIT);
2126 if (ishighonly)
2127 set = gen_rtx_SET (operand0, temp);
2128 else
2129 set = gen_rtx_SET (operand0,
2130 gen_rtx_LO_SUM (mode, temp, operand1));
2132 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
2133 emit_insn (set);
2136 return 1;
2138 else if (tls_referenced_p (operand1))
2140 rtx tmp = operand1;
2141 rtx addend = NULL;
2143 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
2145 addend = XEXP (XEXP (tmp, 0), 1);
2146 tmp = XEXP (XEXP (tmp, 0), 0);
2149 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
2150 tmp = legitimize_tls_address (tmp);
2151 if (addend)
2153 tmp = gen_rtx_PLUS (mode, tmp, addend);
2154 tmp = force_operand (tmp, operands[0]);
2156 operands[1] = tmp;
2158 else if (GET_CODE (operand1) != CONST_INT
2159 || !pa_cint_ok_for_move (UINTVAL (operand1)))
2161 rtx temp;
2162 rtx_insn *insn;
2163 rtx op1 = operand1;
2164 HOST_WIDE_INT value = 0;
2165 HOST_WIDE_INT insv = 0;
2166 int insert = 0;
2168 if (GET_CODE (operand1) == CONST_INT)
2169 value = INTVAL (operand1);
2171 if (TARGET_64BIT
2172 && GET_CODE (operand1) == CONST_INT
2173 && HOST_BITS_PER_WIDE_INT > 32
2174 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
2176 HOST_WIDE_INT nval;
2178 /* Extract the low order 32 bits of the value and sign extend.
2179 If the new value is the same as the original value, we can
2180 can use the original value as-is. If the new value is
2181 different, we use it and insert the most-significant 32-bits
2182 of the original value into the final result. */
2183 nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
2184 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
2185 if (value != nval)
2187 #if HOST_BITS_PER_WIDE_INT > 32
2188 insv = value >= 0 ? value >> 32 : ~(~value >> 32);
2189 #endif
2190 insert = 1;
2191 value = nval;
2192 operand1 = GEN_INT (nval);
2196 if (reload_in_progress || reload_completed)
2197 temp = scratch_reg ? scratch_reg : operand0;
2198 else
2199 temp = gen_reg_rtx (mode);
2201 /* We don't directly split DImode constants on 32-bit targets
2202 because PLUS uses an 11-bit immediate and the insn sequence
2203 generated is not as efficient as the one using HIGH/LO_SUM. */
2204 if (GET_CODE (operand1) == CONST_INT
2205 && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
2206 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
2207 && !insert)
2209 /* Directly break constant into high and low parts. This
2210 provides better optimization opportunities because various
2211 passes recognize constants split with PLUS but not LO_SUM.
2212 We use a 14-bit signed low part except when the addition
2213 of 0x4000 to the high part might change the sign of the
2214 high part. */
2215 HOST_WIDE_INT low = value & 0x3fff;
2216 HOST_WIDE_INT high = value & ~ 0x3fff;
2218 if (low >= 0x2000)
2220 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
2221 high += 0x2000;
2222 else
2223 high += 0x4000;
2226 low = value - high;
2228 emit_insn (gen_rtx_SET (temp, GEN_INT (high)));
2229 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
2231 else
2233 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
2234 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
2237 insn = emit_move_insn (operands[0], operands[1]);
2239 /* Now insert the most significant 32 bits of the value
2240 into the register. When we don't have a second register
2241 available, it could take up to nine instructions to load
2242 a 64-bit integer constant. Prior to reload, we force
2243 constants that would take more than three instructions
2244 to load to the constant pool. During and after reload,
2245 we have to handle all possible values. */
2246 if (insert)
2248 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2249 register and the value to be inserted is outside the
2250 range that can be loaded with three depdi instructions. */
2251 if (temp != operand0 && (insv >= 16384 || insv < -16384))
2253 operand1 = GEN_INT (insv);
2255 emit_insn (gen_rtx_SET (temp,
2256 gen_rtx_HIGH (mode, operand1)));
2257 emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
2258 if (mode == DImode)
2259 insn = emit_insn (gen_insvdi (operand0, GEN_INT (32),
2260 const0_rtx, temp));
2261 else
2262 insn = emit_insn (gen_insvsi (operand0, GEN_INT (32),
2263 const0_rtx, temp));
2265 else
2267 int len = 5, pos = 27;
2269 /* Insert the bits using the depdi instruction. */
2270 while (pos >= 0)
2272 HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2273 HOST_WIDE_INT sign = v5 < 0;
2275 /* Left extend the insertion. */
2276 insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2277 while (pos > 0 && (insv & 1) == sign)
2279 insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2280 len += 1;
2281 pos -= 1;
2284 if (mode == DImode)
2285 insn = emit_insn (gen_insvdi (operand0,
2286 GEN_INT (len),
2287 GEN_INT (pos),
2288 GEN_INT (v5)));
2289 else
2290 insn = emit_insn (gen_insvsi (operand0,
2291 GEN_INT (len),
2292 GEN_INT (pos),
2293 GEN_INT (v5)));
2295 len = pos > 0 && pos < 5 ? pos : 5;
2296 pos -= len;
2301 set_unique_reg_note (insn, REG_EQUAL, op1);
2303 return 1;
2306 /* Now have insn-emit do whatever it normally does. */
2307 return 0;
2310 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2311 it will need a link/runtime reloc). */
2314 pa_reloc_needed (tree exp)
2316 int reloc = 0;
2318 switch (TREE_CODE (exp))
2320 case ADDR_EXPR:
2321 return 1;
2323 case POINTER_PLUS_EXPR:
2324 case PLUS_EXPR:
2325 case MINUS_EXPR:
2326 reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2327 reloc |= pa_reloc_needed (TREE_OPERAND (exp, 1));
2328 break;
2330 CASE_CONVERT:
2331 case NON_LVALUE_EXPR:
2332 reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2333 break;
2335 case CONSTRUCTOR:
2337 tree value;
2338 unsigned HOST_WIDE_INT ix;
2340 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2341 if (value)
2342 reloc |= pa_reloc_needed (value);
2344 break;
2346 case ERROR_MARK:
2347 break;
2349 default:
2350 break;
2352 return reloc;
2356 /* Return the best assembler insn template
2357 for moving operands[1] into operands[0] as a fullword. */
2358 const char *
2359 pa_singlemove_string (rtx *operands)
2361 HOST_WIDE_INT intval;
2363 if (GET_CODE (operands[0]) == MEM)
2364 return "stw %r1,%0";
2365 if (GET_CODE (operands[1]) == MEM)
2366 return "ldw %1,%0";
2367 if (GET_CODE (operands[1]) == CONST_DOUBLE)
2369 long i;
2371 gcc_assert (GET_MODE (operands[1]) == SFmode);
2373 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2374 bit pattern. */
2375 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (operands[1]), i);
2377 operands[1] = GEN_INT (i);
2378 /* Fall through to CONST_INT case. */
2380 if (GET_CODE (operands[1]) == CONST_INT)
2382 intval = INTVAL (operands[1]);
2384 if (VAL_14_BITS_P (intval))
2385 return "ldi %1,%0";
2386 else if ((intval & 0x7ff) == 0)
2387 return "ldil L'%1,%0";
2388 else if (pa_zdepi_cint_p (intval))
2389 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2390 else
2391 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2393 return "copy %1,%0";
2397 /* Compute position (in OP[1]) and width (in OP[2])
2398 useful for copying IMM to a register using the zdepi
2399 instructions. Store the immediate value to insert in OP[0]. */
2400 static void
2401 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2403 int lsb, len;
2405 /* Find the least significant set bit in IMM. */
2406 for (lsb = 0; lsb < 32; lsb++)
2408 if ((imm & 1) != 0)
2409 break;
2410 imm >>= 1;
2413 /* Choose variants based on *sign* of the 5-bit field. */
2414 if ((imm & 0x10) == 0)
2415 len = (lsb <= 28) ? 4 : 32 - lsb;
2416 else
2418 /* Find the width of the bitstring in IMM. */
2419 for (len = 5; len < 32 - lsb; len++)
2421 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2422 break;
2425 /* Sign extend IMM as a 5-bit value. */
2426 imm = (imm & 0xf) - 0x10;
2429 op[0] = imm;
2430 op[1] = 31 - lsb;
2431 op[2] = len;
2434 /* Compute position (in OP[1]) and width (in OP[2])
2435 useful for copying IMM to a register using the depdi,z
2436 instructions. Store the immediate value to insert in OP[0]. */
2438 static void
2439 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2441 int lsb, len, maxlen;
2443 maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64);
2445 /* Find the least significant set bit in IMM. */
2446 for (lsb = 0; lsb < maxlen; lsb++)
2448 if ((imm & 1) != 0)
2449 break;
2450 imm >>= 1;
2453 /* Choose variants based on *sign* of the 5-bit field. */
2454 if ((imm & 0x10) == 0)
2455 len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb;
2456 else
2458 /* Find the width of the bitstring in IMM. */
2459 for (len = 5; len < maxlen - lsb; len++)
2461 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2462 break;
2465 /* Extend length if host is narrow and IMM is negative. */
2466 if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb)
2467 len += 32;
2469 /* Sign extend IMM as a 5-bit value. */
2470 imm = (imm & 0xf) - 0x10;
2473 op[0] = imm;
2474 op[1] = 63 - lsb;
2475 op[2] = len;
2478 /* Output assembler code to perform a doubleword move insn
2479 with operands OPERANDS. */
2481 const char *
2482 pa_output_move_double (rtx *operands)
2484 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2485 rtx latehalf[2];
2486 rtx addreg0 = 0, addreg1 = 0;
2487 int highonly = 0;
2489 /* First classify both operands. */
2491 if (REG_P (operands[0]))
2492 optype0 = REGOP;
2493 else if (offsettable_memref_p (operands[0]))
2494 optype0 = OFFSOP;
2495 else if (GET_CODE (operands[0]) == MEM)
2496 optype0 = MEMOP;
2497 else
2498 optype0 = RNDOP;
2500 if (REG_P (operands[1]))
2501 optype1 = REGOP;
2502 else if (CONSTANT_P (operands[1]))
2503 optype1 = CNSTOP;
2504 else if (offsettable_memref_p (operands[1]))
2505 optype1 = OFFSOP;
2506 else if (GET_CODE (operands[1]) == MEM)
2507 optype1 = MEMOP;
2508 else
2509 optype1 = RNDOP;
2511 /* Check for the cases that the operand constraints are not
2512 supposed to allow to happen. */
2513 gcc_assert (optype0 == REGOP || optype1 == REGOP);
2515 /* Handle copies between general and floating registers. */
2517 if (optype0 == REGOP && optype1 == REGOP
2518 && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2520 if (FP_REG_P (operands[0]))
2522 output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2523 output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2524 return "{fldds|fldd} -16(%%sp),%0";
2526 else
2528 output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2529 output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2530 return "{ldws|ldw} -12(%%sp),%R0";
2534 /* Handle auto decrementing and incrementing loads and stores
2535 specifically, since the structure of the function doesn't work
2536 for them without major modification. Do it better when we learn
2537 this port about the general inc/dec addressing of PA.
2538 (This was written by tege. Chide him if it doesn't work.) */
2540 if (optype0 == MEMOP)
2542 /* We have to output the address syntax ourselves, since print_operand
2543 doesn't deal with the addresses we want to use. Fix this later. */
2545 rtx addr = XEXP (operands[0], 0);
2546 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2548 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2550 operands[0] = XEXP (addr, 0);
2551 gcc_assert (GET_CODE (operands[1]) == REG
2552 && GET_CODE (operands[0]) == REG);
2554 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2556 /* No overlap between high target register and address
2557 register. (We do this in a non-obvious way to
2558 save a register file writeback) */
2559 if (GET_CODE (addr) == POST_INC)
2560 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2561 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2563 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2565 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2567 operands[0] = XEXP (addr, 0);
2568 gcc_assert (GET_CODE (operands[1]) == REG
2569 && GET_CODE (operands[0]) == REG);
2571 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2572 /* No overlap between high target register and address
2573 register. (We do this in a non-obvious way to save a
2574 register file writeback) */
2575 if (GET_CODE (addr) == PRE_INC)
2576 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2577 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2580 if (optype1 == MEMOP)
2582 /* We have to output the address syntax ourselves, since print_operand
2583 doesn't deal with the addresses we want to use. Fix this later. */
2585 rtx addr = XEXP (operands[1], 0);
2586 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2588 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2590 operands[1] = XEXP (addr, 0);
2591 gcc_assert (GET_CODE (operands[0]) == REG
2592 && GET_CODE (operands[1]) == REG);
2594 if (!reg_overlap_mentioned_p (high_reg, addr))
2596 /* No overlap between high target register and address
2597 register. (We do this in a non-obvious way to
2598 save a register file writeback) */
2599 if (GET_CODE (addr) == POST_INC)
2600 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2601 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2603 else
2605 /* This is an undefined situation. We should load into the
2606 address register *and* update that register. Probably
2607 we don't need to handle this at all. */
2608 if (GET_CODE (addr) == POST_INC)
2609 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2610 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2613 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2615 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2617 operands[1] = XEXP (addr, 0);
2618 gcc_assert (GET_CODE (operands[0]) == REG
2619 && GET_CODE (operands[1]) == REG);
2621 if (!reg_overlap_mentioned_p (high_reg, addr))
2623 /* No overlap between high target register and address
2624 register. (We do this in a non-obvious way to
2625 save a register file writeback) */
2626 if (GET_CODE (addr) == PRE_INC)
2627 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2628 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2630 else
2632 /* This is an undefined situation. We should load into the
2633 address register *and* update that register. Probably
2634 we don't need to handle this at all. */
2635 if (GET_CODE (addr) == PRE_INC)
2636 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2637 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2640 else if (GET_CODE (addr) == PLUS
2641 && GET_CODE (XEXP (addr, 0)) == MULT)
2643 rtx xoperands[4];
2645 /* Load address into left half of destination register. */
2646 xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2647 xoperands[1] = XEXP (addr, 1);
2648 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2649 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2650 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2651 xoperands);
2652 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2654 else if (GET_CODE (addr) == PLUS
2655 && REG_P (XEXP (addr, 0))
2656 && REG_P (XEXP (addr, 1)))
2658 rtx xoperands[3];
2660 /* Load address into left half of destination register. */
2661 xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2662 xoperands[1] = XEXP (addr, 0);
2663 xoperands[2] = XEXP (addr, 1);
2664 output_asm_insn ("{addl|add,l} %1,%2,%0",
2665 xoperands);
2666 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2670 /* If an operand is an unoffsettable memory ref, find a register
2671 we can increment temporarily to make it refer to the second word. */
2673 if (optype0 == MEMOP)
2674 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2676 if (optype1 == MEMOP)
2677 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2679 /* Ok, we can do one word at a time.
2680 Normally we do the low-numbered word first.
2682 In either case, set up in LATEHALF the operands to use
2683 for the high-numbered word and in some cases alter the
2684 operands in OPERANDS to be suitable for the low-numbered word. */
2686 if (optype0 == REGOP)
2687 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2688 else if (optype0 == OFFSOP)
2689 latehalf[0] = adjust_address_nv (operands[0], SImode, 4);
2690 else
2691 latehalf[0] = operands[0];
2693 if (optype1 == REGOP)
2694 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2695 else if (optype1 == OFFSOP)
2696 latehalf[1] = adjust_address_nv (operands[1], SImode, 4);
2697 else if (optype1 == CNSTOP)
2699 if (GET_CODE (operands[1]) == HIGH)
2701 operands[1] = XEXP (operands[1], 0);
2702 highonly = 1;
2704 split_double (operands[1], &operands[1], &latehalf[1]);
2706 else
2707 latehalf[1] = operands[1];
2709 /* If the first move would clobber the source of the second one,
2710 do them in the other order.
2712 This can happen in two cases:
2714 mem -> register where the first half of the destination register
2715 is the same register used in the memory's address. Reload
2716 can create such insns.
2718 mem in this case will be either register indirect or register
2719 indirect plus a valid offset.
2721 register -> register move where REGNO(dst) == REGNO(src + 1)
2722 someone (Tim/Tege?) claimed this can happen for parameter loads.
2724 Handle mem -> register case first. */
2725 if (optype0 == REGOP
2726 && (optype1 == MEMOP || optype1 == OFFSOP)
2727 && refers_to_regno_p (REGNO (operands[0]), operands[1]))
2729 /* Do the late half first. */
2730 if (addreg1)
2731 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2732 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2734 /* Then clobber. */
2735 if (addreg1)
2736 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2737 return pa_singlemove_string (operands);
2740 /* Now handle register -> register case. */
2741 if (optype0 == REGOP && optype1 == REGOP
2742 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2744 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2745 return pa_singlemove_string (operands);
2748 /* Normal case: do the two words, low-numbered first. */
2750 output_asm_insn (pa_singlemove_string (operands), operands);
2752 /* Make any unoffsettable addresses point at high-numbered word. */
2753 if (addreg0)
2754 output_asm_insn ("ldo 4(%0),%0", &addreg0);
2755 if (addreg1)
2756 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2758 /* Do high-numbered word. */
2759 if (highonly)
2760 output_asm_insn ("ldil L'%1,%0", latehalf);
2761 else
2762 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2764 /* Undo the adds we just did. */
2765 if (addreg0)
2766 output_asm_insn ("ldo -4(%0),%0", &addreg0);
2767 if (addreg1)
2768 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2770 return "";
2773 const char *
2774 pa_output_fp_move_double (rtx *operands)
2776 if (FP_REG_P (operands[0]))
2778 if (FP_REG_P (operands[1])
2779 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2780 output_asm_insn ("fcpy,dbl %f1,%0", operands);
2781 else
2782 output_asm_insn ("fldd%F1 %1,%0", operands);
2784 else if (FP_REG_P (operands[1]))
2786 output_asm_insn ("fstd%F0 %1,%0", operands);
2788 else
2790 rtx xoperands[2];
2792 gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2794 /* This is a pain. You have to be prepared to deal with an
2795 arbitrary address here including pre/post increment/decrement.
2797 so avoid this in the MD. */
2798 gcc_assert (GET_CODE (operands[0]) == REG);
2800 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2801 xoperands[0] = operands[0];
2802 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2804 return "";
2807 /* Return a REG that occurs in ADDR with coefficient 1.
2808 ADDR can be effectively incremented by incrementing REG. */
2810 static rtx
2811 find_addr_reg (rtx addr)
2813 while (GET_CODE (addr) == PLUS)
2815 if (GET_CODE (XEXP (addr, 0)) == REG)
2816 addr = XEXP (addr, 0);
2817 else if (GET_CODE (XEXP (addr, 1)) == REG)
2818 addr = XEXP (addr, 1);
2819 else if (CONSTANT_P (XEXP (addr, 0)))
2820 addr = XEXP (addr, 1);
2821 else if (CONSTANT_P (XEXP (addr, 1)))
2822 addr = XEXP (addr, 0);
2823 else
2824 gcc_unreachable ();
2826 gcc_assert (GET_CODE (addr) == REG);
2827 return addr;
2830 /* Emit code to perform a block move.
2832 OPERANDS[0] is the destination pointer as a REG, clobbered.
2833 OPERANDS[1] is the source pointer as a REG, clobbered.
2834 OPERANDS[2] is a register for temporary storage.
2835 OPERANDS[3] is a register for temporary storage.
2836 OPERANDS[4] is the size as a CONST_INT
2837 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2838 OPERANDS[6] is another temporary register. */
2840 const char *
2841 pa_output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2843 int align = INTVAL (operands[5]);
2844 unsigned long n_bytes = INTVAL (operands[4]);
2846 /* We can't move more than a word at a time because the PA
2847 has no longer integer move insns. (Could use fp mem ops?) */
2848 if (align > (TARGET_64BIT ? 8 : 4))
2849 align = (TARGET_64BIT ? 8 : 4);
2851 /* Note that we know each loop below will execute at least twice
2852 (else we would have open-coded the copy). */
2853 switch (align)
2855 case 8:
2856 /* Pre-adjust the loop counter. */
2857 operands[4] = GEN_INT (n_bytes - 16);
2858 output_asm_insn ("ldi %4,%2", operands);
2860 /* Copying loop. */
2861 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2862 output_asm_insn ("ldd,ma 8(%1),%6", operands);
2863 output_asm_insn ("std,ma %3,8(%0)", operands);
2864 output_asm_insn ("addib,>= -16,%2,.-12", operands);
2865 output_asm_insn ("std,ma %6,8(%0)", operands);
2867 /* Handle the residual. There could be up to 7 bytes of
2868 residual to copy! */
2869 if (n_bytes % 16 != 0)
2871 operands[4] = GEN_INT (n_bytes % 8);
2872 if (n_bytes % 16 >= 8)
2873 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2874 if (n_bytes % 8 != 0)
2875 output_asm_insn ("ldd 0(%1),%6", operands);
2876 if (n_bytes % 16 >= 8)
2877 output_asm_insn ("std,ma %3,8(%0)", operands);
2878 if (n_bytes % 8 != 0)
2879 output_asm_insn ("stdby,e %6,%4(%0)", operands);
2881 return "";
2883 case 4:
2884 /* Pre-adjust the loop counter. */
2885 operands[4] = GEN_INT (n_bytes - 8);
2886 output_asm_insn ("ldi %4,%2", operands);
2888 /* Copying loop. */
2889 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2890 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2891 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2892 output_asm_insn ("addib,>= -8,%2,.-12", operands);
2893 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2895 /* Handle the residual. There could be up to 7 bytes of
2896 residual to copy! */
2897 if (n_bytes % 8 != 0)
2899 operands[4] = GEN_INT (n_bytes % 4);
2900 if (n_bytes % 8 >= 4)
2901 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2902 if (n_bytes % 4 != 0)
2903 output_asm_insn ("ldw 0(%1),%6", operands);
2904 if (n_bytes % 8 >= 4)
2905 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2906 if (n_bytes % 4 != 0)
2907 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2909 return "";
2911 case 2:
2912 /* Pre-adjust the loop counter. */
2913 operands[4] = GEN_INT (n_bytes - 4);
2914 output_asm_insn ("ldi %4,%2", operands);
2916 /* Copying loop. */
2917 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2918 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2919 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2920 output_asm_insn ("addib,>= -4,%2,.-12", operands);
2921 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2923 /* Handle the residual. */
2924 if (n_bytes % 4 != 0)
2926 if (n_bytes % 4 >= 2)
2927 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2928 if (n_bytes % 2 != 0)
2929 output_asm_insn ("ldb 0(%1),%6", operands);
2930 if (n_bytes % 4 >= 2)
2931 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2932 if (n_bytes % 2 != 0)
2933 output_asm_insn ("stb %6,0(%0)", operands);
2935 return "";
2937 case 1:
2938 /* Pre-adjust the loop counter. */
2939 operands[4] = GEN_INT (n_bytes - 2);
2940 output_asm_insn ("ldi %4,%2", operands);
2942 /* Copying loop. */
2943 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2944 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2945 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2946 output_asm_insn ("addib,>= -2,%2,.-12", operands);
2947 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2949 /* Handle the residual. */
2950 if (n_bytes % 2 != 0)
2952 output_asm_insn ("ldb 0(%1),%3", operands);
2953 output_asm_insn ("stb %3,0(%0)", operands);
2955 return "";
2957 default:
2958 gcc_unreachable ();
2962 /* Count the number of insns necessary to handle this block move.
2964 Basic structure is the same as emit_block_move, except that we
2965 count insns rather than emit them. */
2967 static int
2968 compute_movmem_length (rtx_insn *insn)
2970 rtx pat = PATTERN (insn);
2971 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2972 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2973 unsigned int n_insns = 0;
2975 /* We can't move more than four bytes at a time because the PA
2976 has no longer integer move insns. (Could use fp mem ops?) */
2977 if (align > (TARGET_64BIT ? 8 : 4))
2978 align = (TARGET_64BIT ? 8 : 4);
2980 /* The basic copying loop. */
2981 n_insns = 6;
2983 /* Residuals. */
2984 if (n_bytes % (2 * align) != 0)
2986 if ((n_bytes % (2 * align)) >= align)
2987 n_insns += 2;
2989 if ((n_bytes % align) != 0)
2990 n_insns += 2;
2993 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2994 return n_insns * 4;
2997 /* Emit code to perform a block clear.
2999 OPERANDS[0] is the destination pointer as a REG, clobbered.
3000 OPERANDS[1] is a register for temporary storage.
3001 OPERANDS[2] is the size as a CONST_INT
3002 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
3004 const char *
3005 pa_output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
3007 int align = INTVAL (operands[3]);
3008 unsigned long n_bytes = INTVAL (operands[2]);
3010 /* We can't clear more than a word at a time because the PA
3011 has no longer integer move insns. */
3012 if (align > (TARGET_64BIT ? 8 : 4))
3013 align = (TARGET_64BIT ? 8 : 4);
3015 /* Note that we know each loop below will execute at least twice
3016 (else we would have open-coded the copy). */
3017 switch (align)
3019 case 8:
3020 /* Pre-adjust the loop counter. */
3021 operands[2] = GEN_INT (n_bytes - 16);
3022 output_asm_insn ("ldi %2,%1", operands);
3024 /* Loop. */
3025 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3026 output_asm_insn ("addib,>= -16,%1,.-4", operands);
3027 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3029 /* Handle the residual. There could be up to 7 bytes of
3030 residual to copy! */
3031 if (n_bytes % 16 != 0)
3033 operands[2] = GEN_INT (n_bytes % 8);
3034 if (n_bytes % 16 >= 8)
3035 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3036 if (n_bytes % 8 != 0)
3037 output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
3039 return "";
3041 case 4:
3042 /* Pre-adjust the loop counter. */
3043 operands[2] = GEN_INT (n_bytes - 8);
3044 output_asm_insn ("ldi %2,%1", operands);
3046 /* Loop. */
3047 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3048 output_asm_insn ("addib,>= -8,%1,.-4", operands);
3049 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3051 /* Handle the residual. There could be up to 7 bytes of
3052 residual to copy! */
3053 if (n_bytes % 8 != 0)
3055 operands[2] = GEN_INT (n_bytes % 4);
3056 if (n_bytes % 8 >= 4)
3057 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3058 if (n_bytes % 4 != 0)
3059 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
3061 return "";
3063 case 2:
3064 /* Pre-adjust the loop counter. */
3065 operands[2] = GEN_INT (n_bytes - 4);
3066 output_asm_insn ("ldi %2,%1", operands);
3068 /* Loop. */
3069 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3070 output_asm_insn ("addib,>= -4,%1,.-4", operands);
3071 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3073 /* Handle the residual. */
3074 if (n_bytes % 4 != 0)
3076 if (n_bytes % 4 >= 2)
3077 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3078 if (n_bytes % 2 != 0)
3079 output_asm_insn ("stb %%r0,0(%0)", operands);
3081 return "";
3083 case 1:
3084 /* Pre-adjust the loop counter. */
3085 operands[2] = GEN_INT (n_bytes - 2);
3086 output_asm_insn ("ldi %2,%1", operands);
3088 /* Loop. */
3089 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3090 output_asm_insn ("addib,>= -2,%1,.-4", operands);
3091 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3093 /* Handle the residual. */
3094 if (n_bytes % 2 != 0)
3095 output_asm_insn ("stb %%r0,0(%0)", operands);
3097 return "";
3099 default:
3100 gcc_unreachable ();
3104 /* Count the number of insns necessary to handle this block move.
3106 Basic structure is the same as emit_block_move, except that we
3107 count insns rather than emit them. */
3109 static int
3110 compute_clrmem_length (rtx_insn *insn)
3112 rtx pat = PATTERN (insn);
3113 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
3114 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
3115 unsigned int n_insns = 0;
3117 /* We can't clear more than a word at a time because the PA
3118 has no longer integer move insns. */
3119 if (align > (TARGET_64BIT ? 8 : 4))
3120 align = (TARGET_64BIT ? 8 : 4);
3122 /* The basic loop. */
3123 n_insns = 4;
3125 /* Residuals. */
3126 if (n_bytes % (2 * align) != 0)
3128 if ((n_bytes % (2 * align)) >= align)
3129 n_insns++;
3131 if ((n_bytes % align) != 0)
3132 n_insns++;
3135 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
3136 return n_insns * 4;
3140 const char *
3141 pa_output_and (rtx *operands)
3143 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3145 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3146 int ls0, ls1, ms0, p, len;
3148 for (ls0 = 0; ls0 < 32; ls0++)
3149 if ((mask & (1 << ls0)) == 0)
3150 break;
3152 for (ls1 = ls0; ls1 < 32; ls1++)
3153 if ((mask & (1 << ls1)) != 0)
3154 break;
3156 for (ms0 = ls1; ms0 < 32; ms0++)
3157 if ((mask & (1 << ms0)) == 0)
3158 break;
3160 gcc_assert (ms0 == 32);
3162 if (ls1 == 32)
3164 len = ls0;
3166 gcc_assert (len);
3168 operands[2] = GEN_INT (len);
3169 return "{extru|extrw,u} %1,31,%2,%0";
3171 else
3173 /* We could use this `depi' for the case above as well, but `depi'
3174 requires one more register file access than an `extru'. */
3176 p = 31 - ls0;
3177 len = ls1 - ls0;
3179 operands[2] = GEN_INT (p);
3180 operands[3] = GEN_INT (len);
3181 return "{depi|depwi} 0,%2,%3,%0";
3184 else
3185 return "and %1,%2,%0";
3188 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3189 storing the result in operands[0]. */
3190 const char *
3191 pa_output_64bit_and (rtx *operands)
3193 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3195 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3196 int ls0, ls1, ms0, p, len;
3198 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
3199 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
3200 break;
3202 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
3203 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
3204 break;
3206 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
3207 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
3208 break;
3210 gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
3212 if (ls1 == HOST_BITS_PER_WIDE_INT)
3214 len = ls0;
3216 gcc_assert (len);
3218 operands[2] = GEN_INT (len);
3219 return "extrd,u %1,63,%2,%0";
3221 else
3223 /* We could use this `depi' for the case above as well, but `depi'
3224 requires one more register file access than an `extru'. */
3226 p = 63 - ls0;
3227 len = ls1 - ls0;
3229 operands[2] = GEN_INT (p);
3230 operands[3] = GEN_INT (len);
3231 return "depdi 0,%2,%3,%0";
3234 else
3235 return "and %1,%2,%0";
3238 const char *
3239 pa_output_ior (rtx *operands)
3241 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3242 int bs0, bs1, p, len;
3244 if (INTVAL (operands[2]) == 0)
3245 return "copy %1,%0";
3247 for (bs0 = 0; bs0 < 32; bs0++)
3248 if ((mask & (1 << bs0)) != 0)
3249 break;
3251 for (bs1 = bs0; bs1 < 32; bs1++)
3252 if ((mask & (1 << bs1)) == 0)
3253 break;
3255 gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3257 p = 31 - bs0;
3258 len = bs1 - bs0;
3260 operands[2] = GEN_INT (p);
3261 operands[3] = GEN_INT (len);
3262 return "{depi|depwi} -1,%2,%3,%0";
3265 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3266 storing the result in operands[0]. */
3267 const char *
3268 pa_output_64bit_ior (rtx *operands)
3270 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3271 int bs0, bs1, p, len;
3273 if (INTVAL (operands[2]) == 0)
3274 return "copy %1,%0";
3276 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
3277 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
3278 break;
3280 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
3281 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
3282 break;
3284 gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
3285 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3287 p = 63 - bs0;
3288 len = bs1 - bs0;
3290 operands[2] = GEN_INT (p);
3291 operands[3] = GEN_INT (len);
3292 return "depdi -1,%2,%3,%0";
3295 /* Target hook for assembling integer objects. This code handles
3296 aligned SI and DI integers specially since function references
3297 must be preceded by P%. */
3299 static bool
3300 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
3302 bool result;
3303 tree decl = NULL;
3305 /* When we have a SYMBOL_REF with a SYMBOL_REF_DECL, we need to call
3306 call assemble_external and set the SYMBOL_REF_DECL to NULL before
3307 calling output_addr_const. Otherwise, it may call assemble_external
3308 in the midst of outputing the assembler code for the SYMBOL_REF.
3309 We restore the SYMBOL_REF_DECL after the output is done. */
3310 if (GET_CODE (x) == SYMBOL_REF)
3312 decl = SYMBOL_REF_DECL (x);
3313 if (decl)
3315 assemble_external (decl);
3316 SET_SYMBOL_REF_DECL (x, NULL);
3320 if (size == UNITS_PER_WORD
3321 && aligned_p
3322 && function_label_operand (x, VOIDmode))
3324 fputs (size == 8? "\t.dword\t" : "\t.word\t", asm_out_file);
3326 /* We don't want an OPD when generating fast indirect calls. */
3327 if (!TARGET_FAST_INDIRECT_CALLS)
3328 fputs ("P%", asm_out_file);
3330 output_addr_const (asm_out_file, x);
3331 fputc ('\n', asm_out_file);
3332 result = true;
3334 else
3335 result = default_assemble_integer (x, size, aligned_p);
3337 if (decl)
3338 SET_SYMBOL_REF_DECL (x, decl);
3340 return result;
3343 /* Output an ascii string. */
3344 void
3345 pa_output_ascii (FILE *file, const char *p, int size)
3347 int i;
3348 int chars_output;
3349 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
3351 /* The HP assembler can only take strings of 256 characters at one
3352 time. This is a limitation on input line length, *not* the
3353 length of the string. Sigh. Even worse, it seems that the
3354 restriction is in number of input characters (see \xnn &
3355 \whatever). So we have to do this very carefully. */
3357 fputs ("\t.STRING \"", file);
3359 chars_output = 0;
3360 for (i = 0; i < size; i += 4)
3362 int co = 0;
3363 int io = 0;
3364 for (io = 0, co = 0; io < MIN (4, size - i); io++)
3366 register unsigned int c = (unsigned char) p[i + io];
3368 if (c == '\"' || c == '\\')
3369 partial_output[co++] = '\\';
3370 if (c >= ' ' && c < 0177)
3371 partial_output[co++] = c;
3372 else
3374 unsigned int hexd;
3375 partial_output[co++] = '\\';
3376 partial_output[co++] = 'x';
3377 hexd = c / 16 - 0 + '0';
3378 if (hexd > '9')
3379 hexd -= '9' - 'a' + 1;
3380 partial_output[co++] = hexd;
3381 hexd = c % 16 - 0 + '0';
3382 if (hexd > '9')
3383 hexd -= '9' - 'a' + 1;
3384 partial_output[co++] = hexd;
3387 if (chars_output + co > 243)
3389 fputs ("\"\n\t.STRING \"", file);
3390 chars_output = 0;
3392 fwrite (partial_output, 1, (size_t) co, file);
3393 chars_output += co;
3394 co = 0;
3396 fputs ("\"\n", file);
3399 /* Try to rewrite floating point comparisons & branches to avoid
3400 useless add,tr insns.
3402 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3403 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3404 first attempt to remove useless add,tr insns. It is zero
3405 for the second pass as reorg sometimes leaves bogus REG_DEAD
3406 notes lying around.
3408 When CHECK_NOTES is zero we can only eliminate add,tr insns
3409 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3410 instructions. */
3411 static void
3412 remove_useless_addtr_insns (int check_notes)
3414 rtx_insn *insn;
3415 static int pass = 0;
3417 /* This is fairly cheap, so always run it when optimizing. */
3418 if (optimize > 0)
3420 int fcmp_count = 0;
3421 int fbranch_count = 0;
3423 /* Walk all the insns in this function looking for fcmp & fbranch
3424 instructions. Keep track of how many of each we find. */
3425 for (insn = get_insns (); insn; insn = next_insn (insn))
3427 rtx tmp;
3429 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
3430 if (! NONJUMP_INSN_P (insn) && ! JUMP_P (insn))
3431 continue;
3433 tmp = PATTERN (insn);
3435 /* It must be a set. */
3436 if (GET_CODE (tmp) != SET)
3437 continue;
3439 /* If the destination is CCFP, then we've found an fcmp insn. */
3440 tmp = SET_DEST (tmp);
3441 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3443 fcmp_count++;
3444 continue;
3447 tmp = PATTERN (insn);
3448 /* If this is an fbranch instruction, bump the fbranch counter. */
3449 if (GET_CODE (tmp) == SET
3450 && SET_DEST (tmp) == pc_rtx
3451 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3452 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3453 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3454 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3456 fbranch_count++;
3457 continue;
3462 /* Find all floating point compare + branch insns. If possible,
3463 reverse the comparison & the branch to avoid add,tr insns. */
3464 for (insn = get_insns (); insn; insn = next_insn (insn))
3466 rtx tmp;
3467 rtx_insn *next;
3469 /* Ignore anything that isn't an INSN. */
3470 if (! NONJUMP_INSN_P (insn))
3471 continue;
3473 tmp = PATTERN (insn);
3475 /* It must be a set. */
3476 if (GET_CODE (tmp) != SET)
3477 continue;
3479 /* The destination must be CCFP, which is register zero. */
3480 tmp = SET_DEST (tmp);
3481 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3482 continue;
3484 /* INSN should be a set of CCFP.
3486 See if the result of this insn is used in a reversed FP
3487 conditional branch. If so, reverse our condition and
3488 the branch. Doing so avoids useless add,tr insns. */
3489 next = next_insn (insn);
3490 while (next)
3492 /* Jumps, calls and labels stop our search. */
3493 if (JUMP_P (next) || CALL_P (next) || LABEL_P (next))
3494 break;
3496 /* As does another fcmp insn. */
3497 if (NONJUMP_INSN_P (next)
3498 && GET_CODE (PATTERN (next)) == SET
3499 && GET_CODE (SET_DEST (PATTERN (next))) == REG
3500 && REGNO (SET_DEST (PATTERN (next))) == 0)
3501 break;
3503 next = next_insn (next);
3506 /* Is NEXT_INSN a branch? */
3507 if (next && JUMP_P (next))
3509 rtx pattern = PATTERN (next);
3511 /* If it a reversed fp conditional branch (e.g. uses add,tr)
3512 and CCFP dies, then reverse our conditional and the branch
3513 to avoid the add,tr. */
3514 if (GET_CODE (pattern) == SET
3515 && SET_DEST (pattern) == pc_rtx
3516 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3517 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3518 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3519 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3520 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3521 && (fcmp_count == fbranch_count
3522 || (check_notes
3523 && find_regno_note (next, REG_DEAD, 0))))
3525 /* Reverse the branch. */
3526 tmp = XEXP (SET_SRC (pattern), 1);
3527 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3528 XEXP (SET_SRC (pattern), 2) = tmp;
3529 INSN_CODE (next) = -1;
3531 /* Reverse our condition. */
3532 tmp = PATTERN (insn);
3533 PUT_CODE (XEXP (tmp, 1),
3534 (reverse_condition_maybe_unordered
3535 (GET_CODE (XEXP (tmp, 1)))));
3541 pass = !pass;
3545 /* You may have trouble believing this, but this is the 32 bit HP-PA
3546 stack layout. Wow.
3548 Offset Contents
3550 Variable arguments (optional; any number may be allocated)
3552 SP-(4*(N+9)) arg word N
3554 SP-56 arg word 5
3555 SP-52 arg word 4
3557 Fixed arguments (must be allocated; may remain unused)
3559 SP-48 arg word 3
3560 SP-44 arg word 2
3561 SP-40 arg word 1
3562 SP-36 arg word 0
3564 Frame Marker
3566 SP-32 External Data Pointer (DP)
3567 SP-28 External sr4
3568 SP-24 External/stub RP (RP')
3569 SP-20 Current RP
3570 SP-16 Static Link
3571 SP-12 Clean up
3572 SP-8 Calling Stub RP (RP'')
3573 SP-4 Previous SP
3575 Top of Frame
3577 SP-0 Stack Pointer (points to next available address)
3581 /* This function saves registers as follows. Registers marked with ' are
3582 this function's registers (as opposed to the previous function's).
3583 If a frame_pointer isn't needed, r4 is saved as a general register;
3584 the space for the frame pointer is still allocated, though, to keep
3585 things simple.
3588 Top of Frame
3590 SP (FP') Previous FP
3591 SP + 4 Alignment filler (sigh)
3592 SP + 8 Space for locals reserved here.
3596 SP + n All call saved register used.
3600 SP + o All call saved fp registers used.
3604 SP + p (SP') points to next available address.
3608 /* Global variables set by output_function_prologue(). */
3609 /* Size of frame. Need to know this to emit return insns from
3610 leaf procedures. */
3611 static HOST_WIDE_INT actual_fsize, local_fsize;
3612 static int save_fregs;
3614 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3615 Handle case where DISP > 8k by using the add_high_const patterns.
3617 Note in DISP > 8k case, we will leave the high part of the address
3618 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3620 static void
3621 store_reg (int reg, HOST_WIDE_INT disp, int base)
3623 rtx dest, src, basereg;
3624 rtx_insn *insn;
3626 src = gen_rtx_REG (word_mode, reg);
3627 basereg = gen_rtx_REG (Pmode, base);
3628 if (VAL_14_BITS_P (disp))
3630 dest = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
3631 insn = emit_move_insn (dest, src);
3633 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3635 rtx delta = GEN_INT (disp);
3636 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3638 emit_move_insn (tmpreg, delta);
3639 insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3640 if (DO_FRAME_NOTES)
3642 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3643 gen_rtx_SET (tmpreg,
3644 gen_rtx_PLUS (Pmode, basereg, delta)));
3645 RTX_FRAME_RELATED_P (insn) = 1;
3647 dest = gen_rtx_MEM (word_mode, tmpreg);
3648 insn = emit_move_insn (dest, src);
3650 else
3652 rtx delta = GEN_INT (disp);
3653 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3654 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3656 emit_move_insn (tmpreg, high);
3657 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3658 insn = emit_move_insn (dest, src);
3659 if (DO_FRAME_NOTES)
3660 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3661 gen_rtx_SET (gen_rtx_MEM (word_mode,
3662 gen_rtx_PLUS (word_mode,
3663 basereg,
3664 delta)),
3665 src));
3668 if (DO_FRAME_NOTES)
3669 RTX_FRAME_RELATED_P (insn) = 1;
3672 /* Emit RTL to store REG at the memory location specified by BASE and then
3673 add MOD to BASE. MOD must be <= 8k. */
3675 static void
3676 store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3678 rtx basereg, srcreg, delta;
3679 rtx_insn *insn;
3681 gcc_assert (VAL_14_BITS_P (mod));
3683 basereg = gen_rtx_REG (Pmode, base);
3684 srcreg = gen_rtx_REG (word_mode, reg);
3685 delta = GEN_INT (mod);
3687 insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3688 if (DO_FRAME_NOTES)
3690 RTX_FRAME_RELATED_P (insn) = 1;
3692 /* RTX_FRAME_RELATED_P must be set on each frame related set
3693 in a parallel with more than one element. */
3694 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3695 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3699 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3700 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3701 whether to add a frame note or not.
3703 In the DISP > 8k case, we leave the high part of the address in %r1.
3704 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3706 static void
3707 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3709 rtx_insn *insn;
3711 if (VAL_14_BITS_P (disp))
3713 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3714 plus_constant (Pmode,
3715 gen_rtx_REG (Pmode, base), disp));
3717 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3719 rtx basereg = gen_rtx_REG (Pmode, base);
3720 rtx delta = GEN_INT (disp);
3721 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3723 emit_move_insn (tmpreg, delta);
3724 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3725 gen_rtx_PLUS (Pmode, tmpreg, basereg));
3726 if (DO_FRAME_NOTES)
3727 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3728 gen_rtx_SET (tmpreg,
3729 gen_rtx_PLUS (Pmode, basereg, delta)));
3731 else
3733 rtx basereg = gen_rtx_REG (Pmode, base);
3734 rtx delta = GEN_INT (disp);
3735 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3737 emit_move_insn (tmpreg,
3738 gen_rtx_PLUS (Pmode, basereg,
3739 gen_rtx_HIGH (Pmode, delta)));
3740 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3741 gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3744 if (DO_FRAME_NOTES && note)
3745 RTX_FRAME_RELATED_P (insn) = 1;
3748 HOST_WIDE_INT
3749 pa_compute_frame_size (HOST_WIDE_INT size, int *fregs_live)
3751 int freg_saved = 0;
3752 int i, j;
3754 /* The code in pa_expand_prologue and pa_expand_epilogue must
3755 be consistent with the rounding and size calculation done here.
3756 Change them at the same time. */
3758 /* We do our own stack alignment. First, round the size of the
3759 stack locals up to a word boundary. */
3760 size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3762 /* Space for previous frame pointer + filler. If any frame is
3763 allocated, we need to add in the STARTING_FRAME_OFFSET. We
3764 waste some space here for the sake of HP compatibility. The
3765 first slot is only used when the frame pointer is needed. */
3766 if (size || frame_pointer_needed)
3767 size += STARTING_FRAME_OFFSET;
3769 /* If the current function calls __builtin_eh_return, then we need
3770 to allocate stack space for registers that will hold data for
3771 the exception handler. */
3772 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3774 unsigned int i;
3776 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3777 continue;
3778 size += i * UNITS_PER_WORD;
3781 /* Account for space used by the callee general register saves. */
3782 for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3783 if (df_regs_ever_live_p (i))
3784 size += UNITS_PER_WORD;
3786 /* Account for space used by the callee floating point register saves. */
3787 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3788 if (df_regs_ever_live_p (i)
3789 || (!TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3791 freg_saved = 1;
3793 /* We always save both halves of the FP register, so always
3794 increment the frame size by 8 bytes. */
3795 size += 8;
3798 /* If any of the floating registers are saved, account for the
3799 alignment needed for the floating point register save block. */
3800 if (freg_saved)
3802 size = (size + 7) & ~7;
3803 if (fregs_live)
3804 *fregs_live = 1;
3807 /* The various ABIs include space for the outgoing parameters in the
3808 size of the current function's stack frame. We don't need to align
3809 for the outgoing arguments as their alignment is set by the final
3810 rounding for the frame as a whole. */
3811 size += crtl->outgoing_args_size;
3813 /* Allocate space for the fixed frame marker. This space must be
3814 allocated for any function that makes calls or allocates
3815 stack space. */
3816 if (!crtl->is_leaf || size)
3817 size += TARGET_64BIT ? 48 : 32;
3819 /* Finally, round to the preferred stack boundary. */
3820 return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3821 & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3824 /* Generate the assembly code for function entry. FILE is a stdio
3825 stream to output the code to. SIZE is an int: how many units of
3826 temporary storage to allocate.
3828 Refer to the array `regs_ever_live' to determine which registers to
3829 save; `regs_ever_live[I]' is nonzero if register number I is ever
3830 used in the function. This function is responsible for knowing
3831 which registers should not be saved even if used. */
3833 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3834 of memory. If any fpu reg is used in the function, we allocate
3835 such a block here, at the bottom of the frame, just in case it's needed.
3837 If this function is a leaf procedure, then we may choose not
3838 to do a "save" insn. The decision about whether or not
3839 to do this is made in regclass.c. */
3841 static void
3842 pa_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3844 /* The function's label and associated .PROC must never be
3845 separated and must be output *after* any profiling declarations
3846 to avoid changing spaces/subspaces within a procedure. */
3847 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3848 fputs ("\t.PROC\n", file);
3850 /* pa_expand_prologue does the dirty work now. We just need
3851 to output the assembler directives which denote the start
3852 of a function. */
3853 fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
3854 if (crtl->is_leaf)
3855 fputs (",NO_CALLS", file);
3856 else
3857 fputs (",CALLS", file);
3858 if (rp_saved)
3859 fputs (",SAVE_RP", file);
3861 /* The SAVE_SP flag is used to indicate that register %r3 is stored
3862 at the beginning of the frame and that it is used as the frame
3863 pointer for the frame. We do this because our current frame
3864 layout doesn't conform to that specified in the HP runtime
3865 documentation and we need a way to indicate to programs such as
3866 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
3867 isn't used by HP compilers but is supported by the assembler.
3868 However, SAVE_SP is supposed to indicate that the previous stack
3869 pointer has been saved in the frame marker. */
3870 if (frame_pointer_needed)
3871 fputs (",SAVE_SP", file);
3873 /* Pass on information about the number of callee register saves
3874 performed in the prologue.
3876 The compiler is supposed to pass the highest register number
3877 saved, the assembler then has to adjust that number before
3878 entering it into the unwind descriptor (to account for any
3879 caller saved registers with lower register numbers than the
3880 first callee saved register). */
3881 if (gr_saved)
3882 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3884 if (fr_saved)
3885 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3887 fputs ("\n\t.ENTRY\n", file);
3889 remove_useless_addtr_insns (0);
3892 void
3893 pa_expand_prologue (void)
3895 int merge_sp_adjust_with_store = 0;
3896 HOST_WIDE_INT size = get_frame_size ();
3897 HOST_WIDE_INT offset;
3898 int i;
3899 rtx tmpreg;
3900 rtx_insn *insn;
3902 gr_saved = 0;
3903 fr_saved = 0;
3904 save_fregs = 0;
3906 /* Compute total size for frame pointer, filler, locals and rounding to
3907 the next word boundary. Similar code appears in pa_compute_frame_size
3908 and must be changed in tandem with this code. */
3909 local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3910 if (local_fsize || frame_pointer_needed)
3911 local_fsize += STARTING_FRAME_OFFSET;
3913 actual_fsize = pa_compute_frame_size (size, &save_fregs);
3914 if (flag_stack_usage_info)
3915 current_function_static_stack_size = actual_fsize;
3917 /* Compute a few things we will use often. */
3918 tmpreg = gen_rtx_REG (word_mode, 1);
3920 /* Save RP first. The calling conventions manual states RP will
3921 always be stored into the caller's frame at sp - 20 or sp - 16
3922 depending on which ABI is in use. */
3923 if (df_regs_ever_live_p (2) || crtl->calls_eh_return)
3925 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3926 rp_saved = true;
3928 else
3929 rp_saved = false;
3931 /* Allocate the local frame and set up the frame pointer if needed. */
3932 if (actual_fsize != 0)
3934 if (frame_pointer_needed)
3936 /* Copy the old frame pointer temporarily into %r1. Set up the
3937 new stack pointer, then store away the saved old frame pointer
3938 into the stack at sp and at the same time update the stack
3939 pointer by actual_fsize bytes. Two versions, first
3940 handles small (<8k) frames. The second handles large (>=8k)
3941 frames. */
3942 insn = emit_move_insn (tmpreg, hard_frame_pointer_rtx);
3943 if (DO_FRAME_NOTES)
3944 RTX_FRAME_RELATED_P (insn) = 1;
3946 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3947 if (DO_FRAME_NOTES)
3948 RTX_FRAME_RELATED_P (insn) = 1;
3950 if (VAL_14_BITS_P (actual_fsize))
3951 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
3952 else
3954 /* It is incorrect to store the saved frame pointer at *sp,
3955 then increment sp (writes beyond the current stack boundary).
3957 So instead use stwm to store at *sp and post-increment the
3958 stack pointer as an atomic operation. Then increment sp to
3959 finish allocating the new frame. */
3960 HOST_WIDE_INT adjust1 = 8192 - 64;
3961 HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
3963 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
3964 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3965 adjust2, 1);
3968 /* We set SAVE_SP in frames that need a frame pointer. Thus,
3969 we need to store the previous stack pointer (frame pointer)
3970 into the frame marker on targets that use the HP unwind
3971 library. This allows the HP unwind library to be used to
3972 unwind GCC frames. However, we are not fully compatible
3973 with the HP library because our frame layout differs from
3974 that specified in the HP runtime specification.
3976 We don't want a frame note on this instruction as the frame
3977 marker moves during dynamic stack allocation.
3979 This instruction also serves as a blockage to prevent
3980 register spills from being scheduled before the stack
3981 pointer is raised. This is necessary as we store
3982 registers using the frame pointer as a base register,
3983 and the frame pointer is set before sp is raised. */
3984 if (TARGET_HPUX_UNWIND_LIBRARY)
3986 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
3987 GEN_INT (TARGET_64BIT ? -8 : -4));
3989 emit_move_insn (gen_rtx_MEM (word_mode, addr),
3990 hard_frame_pointer_rtx);
3992 else
3993 emit_insn (gen_blockage ());
3995 /* no frame pointer needed. */
3996 else
3998 /* In some cases we can perform the first callee register save
3999 and allocating the stack frame at the same time. If so, just
4000 make a note of it and defer allocating the frame until saving
4001 the callee registers. */
4002 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
4003 merge_sp_adjust_with_store = 1;
4004 /* Can not optimize. Adjust the stack frame by actual_fsize
4005 bytes. */
4006 else
4007 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4008 actual_fsize, 1);
4012 /* Normal register save.
4014 Do not save the frame pointer in the frame_pointer_needed case. It
4015 was done earlier. */
4016 if (frame_pointer_needed)
4018 offset = local_fsize;
4020 /* Saving the EH return data registers in the frame is the simplest
4021 way to get the frame unwind information emitted. We put them
4022 just before the general registers. */
4023 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4025 unsigned int i, regno;
4027 for (i = 0; ; ++i)
4029 regno = EH_RETURN_DATA_REGNO (i);
4030 if (regno == INVALID_REGNUM)
4031 break;
4033 store_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4034 offset += UNITS_PER_WORD;
4038 for (i = 18; i >= 4; i--)
4039 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4041 store_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4042 offset += UNITS_PER_WORD;
4043 gr_saved++;
4045 /* Account for %r3 which is saved in a special place. */
4046 gr_saved++;
4048 /* No frame pointer needed. */
4049 else
4051 offset = local_fsize - actual_fsize;
4053 /* Saving the EH return data registers in the frame is the simplest
4054 way to get the frame unwind information emitted. */
4055 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4057 unsigned int i, regno;
4059 for (i = 0; ; ++i)
4061 regno = EH_RETURN_DATA_REGNO (i);
4062 if (regno == INVALID_REGNUM)
4063 break;
4065 /* If merge_sp_adjust_with_store is nonzero, then we can
4066 optimize the first save. */
4067 if (merge_sp_adjust_with_store)
4069 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
4070 merge_sp_adjust_with_store = 0;
4072 else
4073 store_reg (regno, offset, STACK_POINTER_REGNUM);
4074 offset += UNITS_PER_WORD;
4078 for (i = 18; i >= 3; i--)
4079 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4081 /* If merge_sp_adjust_with_store is nonzero, then we can
4082 optimize the first GR save. */
4083 if (merge_sp_adjust_with_store)
4085 store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
4086 merge_sp_adjust_with_store = 0;
4088 else
4089 store_reg (i, offset, STACK_POINTER_REGNUM);
4090 offset += UNITS_PER_WORD;
4091 gr_saved++;
4094 /* If we wanted to merge the SP adjustment with a GR save, but we never
4095 did any GR saves, then just emit the adjustment here. */
4096 if (merge_sp_adjust_with_store)
4097 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4098 actual_fsize, 1);
4101 /* The hppa calling conventions say that %r19, the pic offset
4102 register, is saved at sp - 32 (in this function's frame)
4103 when generating PIC code. FIXME: What is the correct thing
4104 to do for functions which make no calls and allocate no
4105 frame? Do we need to allocate a frame, or can we just omit
4106 the save? For now we'll just omit the save.
4108 We don't want a note on this insn as the frame marker can
4109 move if there is a dynamic stack allocation. */
4110 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
4112 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
4114 emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
4118 /* Align pointer properly (doubleword boundary). */
4119 offset = (offset + 7) & ~7;
4121 /* Floating point register store. */
4122 if (save_fregs)
4124 rtx base;
4126 /* First get the frame or stack pointer to the start of the FP register
4127 save area. */
4128 if (frame_pointer_needed)
4130 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4131 base = hard_frame_pointer_rtx;
4133 else
4135 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4136 base = stack_pointer_rtx;
4139 /* Now actually save the FP registers. */
4140 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4142 if (df_regs_ever_live_p (i)
4143 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4145 rtx addr, reg;
4146 rtx_insn *insn;
4147 addr = gen_rtx_MEM (DFmode,
4148 gen_rtx_POST_INC (word_mode, tmpreg));
4149 reg = gen_rtx_REG (DFmode, i);
4150 insn = emit_move_insn (addr, reg);
4151 if (DO_FRAME_NOTES)
4153 RTX_FRAME_RELATED_P (insn) = 1;
4154 if (TARGET_64BIT)
4156 rtx mem = gen_rtx_MEM (DFmode,
4157 plus_constant (Pmode, base,
4158 offset));
4159 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4160 gen_rtx_SET (mem, reg));
4162 else
4164 rtx meml = gen_rtx_MEM (SFmode,
4165 plus_constant (Pmode, base,
4166 offset));
4167 rtx memr = gen_rtx_MEM (SFmode,
4168 plus_constant (Pmode, base,
4169 offset + 4));
4170 rtx regl = gen_rtx_REG (SFmode, i);
4171 rtx regr = gen_rtx_REG (SFmode, i + 1);
4172 rtx setl = gen_rtx_SET (meml, regl);
4173 rtx setr = gen_rtx_SET (memr, regr);
4174 rtvec vec;
4176 RTX_FRAME_RELATED_P (setl) = 1;
4177 RTX_FRAME_RELATED_P (setr) = 1;
4178 vec = gen_rtvec (2, setl, setr);
4179 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4180 gen_rtx_SEQUENCE (VOIDmode, vec));
4183 offset += GET_MODE_SIZE (DFmode);
4184 fr_saved++;
4190 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
4191 Handle case where DISP > 8k by using the add_high_const patterns. */
4193 static void
4194 load_reg (int reg, HOST_WIDE_INT disp, int base)
4196 rtx dest = gen_rtx_REG (word_mode, reg);
4197 rtx basereg = gen_rtx_REG (Pmode, base);
4198 rtx src;
4200 if (VAL_14_BITS_P (disp))
4201 src = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
4202 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
4204 rtx delta = GEN_INT (disp);
4205 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4207 emit_move_insn (tmpreg, delta);
4208 if (TARGET_DISABLE_INDEXING)
4210 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4211 src = gen_rtx_MEM (word_mode, tmpreg);
4213 else
4214 src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4216 else
4218 rtx delta = GEN_INT (disp);
4219 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
4220 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4222 emit_move_insn (tmpreg, high);
4223 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
4226 emit_move_insn (dest, src);
4229 /* Update the total code bytes output to the text section. */
4231 static void
4232 update_total_code_bytes (unsigned int nbytes)
4234 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
4235 && !IN_NAMED_SECTION_P (cfun->decl))
4237 unsigned int old_total = total_code_bytes;
4239 total_code_bytes += nbytes;
4241 /* Be prepared to handle overflows. */
4242 if (old_total > total_code_bytes)
4243 total_code_bytes = UINT_MAX;
4247 /* This function generates the assembly code for function exit.
4248 Args are as for output_function_prologue ().
4250 The function epilogue should not depend on the current stack
4251 pointer! It should use the frame pointer only. This is mandatory
4252 because of alloca; we also take advantage of it to omit stack
4253 adjustments before returning. */
4255 static void
4256 pa_output_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4258 rtx_insn *insn = get_last_insn ();
4259 bool extra_nop;
4261 /* pa_expand_epilogue does the dirty work now. We just need
4262 to output the assembler directives which denote the end
4263 of a function.
4265 To make debuggers happy, emit a nop if the epilogue was completely
4266 eliminated due to a volatile call as the last insn in the
4267 current function. That way the return address (in %r2) will
4268 always point to a valid instruction in the current function. */
4270 /* Get the last real insn. */
4271 if (NOTE_P (insn))
4272 insn = prev_real_insn (insn);
4274 /* If it is a sequence, then look inside. */
4275 if (insn && NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE)
4276 insn = as_a <rtx_sequence *> (PATTERN (insn))-> insn (0);
4278 /* If insn is a CALL_INSN, then it must be a call to a volatile
4279 function (otherwise there would be epilogue insns). */
4280 if (insn && CALL_P (insn))
4282 fputs ("\tnop\n", file);
4283 extra_nop = true;
4285 else
4286 extra_nop = false;
4288 fputs ("\t.EXIT\n\t.PROCEND\n", file);
4290 if (TARGET_SOM && TARGET_GAS)
4292 /* We are done with this subspace except possibly for some additional
4293 debug information. Forget that we are in this subspace to ensure
4294 that the next function is output in its own subspace. */
4295 in_section = NULL;
4296 cfun->machine->in_nsubspa = 2;
4299 /* Thunks do their own insn accounting. */
4300 if (cfun->is_thunk)
4301 return;
4303 if (INSN_ADDRESSES_SET_P ())
4305 last_address = extra_nop ? 4 : 0;
4306 insn = get_last_nonnote_insn ();
4307 if (insn)
4309 last_address += INSN_ADDRESSES (INSN_UID (insn));
4310 if (INSN_P (insn))
4311 last_address += insn_default_length (insn);
4313 last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4314 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
4316 else
4317 last_address = UINT_MAX;
4319 /* Finally, update the total number of code bytes output so far. */
4320 update_total_code_bytes (last_address);
4323 void
4324 pa_expand_epilogue (void)
4326 rtx tmpreg;
4327 HOST_WIDE_INT offset;
4328 HOST_WIDE_INT ret_off = 0;
4329 int i;
4330 int merge_sp_adjust_with_load = 0;
4332 /* We will use this often. */
4333 tmpreg = gen_rtx_REG (word_mode, 1);
4335 /* Try to restore RP early to avoid load/use interlocks when
4336 RP gets used in the return (bv) instruction. This appears to still
4337 be necessary even when we schedule the prologue and epilogue. */
4338 if (rp_saved)
4340 ret_off = TARGET_64BIT ? -16 : -20;
4341 if (frame_pointer_needed)
4343 load_reg (2, ret_off, HARD_FRAME_POINTER_REGNUM);
4344 ret_off = 0;
4346 else
4348 /* No frame pointer, and stack is smaller than 8k. */
4349 if (VAL_14_BITS_P (ret_off - actual_fsize))
4351 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4352 ret_off = 0;
4357 /* General register restores. */
4358 if (frame_pointer_needed)
4360 offset = local_fsize;
4362 /* If the current function calls __builtin_eh_return, then we need
4363 to restore the saved EH data registers. */
4364 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4366 unsigned int i, regno;
4368 for (i = 0; ; ++i)
4370 regno = EH_RETURN_DATA_REGNO (i);
4371 if (regno == INVALID_REGNUM)
4372 break;
4374 load_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4375 offset += UNITS_PER_WORD;
4379 for (i = 18; i >= 4; i--)
4380 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4382 load_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4383 offset += UNITS_PER_WORD;
4386 else
4388 offset = local_fsize - actual_fsize;
4390 /* If the current function calls __builtin_eh_return, then we need
4391 to restore the saved EH data registers. */
4392 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4394 unsigned int i, regno;
4396 for (i = 0; ; ++i)
4398 regno = EH_RETURN_DATA_REGNO (i);
4399 if (regno == INVALID_REGNUM)
4400 break;
4402 /* Only for the first load.
4403 merge_sp_adjust_with_load holds the register load
4404 with which we will merge the sp adjustment. */
4405 if (merge_sp_adjust_with_load == 0
4406 && local_fsize == 0
4407 && VAL_14_BITS_P (-actual_fsize))
4408 merge_sp_adjust_with_load = regno;
4409 else
4410 load_reg (regno, offset, STACK_POINTER_REGNUM);
4411 offset += UNITS_PER_WORD;
4415 for (i = 18; i >= 3; i--)
4417 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4419 /* Only for the first load.
4420 merge_sp_adjust_with_load holds the register load
4421 with which we will merge the sp adjustment. */
4422 if (merge_sp_adjust_with_load == 0
4423 && local_fsize == 0
4424 && VAL_14_BITS_P (-actual_fsize))
4425 merge_sp_adjust_with_load = i;
4426 else
4427 load_reg (i, offset, STACK_POINTER_REGNUM);
4428 offset += UNITS_PER_WORD;
4433 /* Align pointer properly (doubleword boundary). */
4434 offset = (offset + 7) & ~7;
4436 /* FP register restores. */
4437 if (save_fregs)
4439 /* Adjust the register to index off of. */
4440 if (frame_pointer_needed)
4441 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4442 else
4443 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4445 /* Actually do the restores now. */
4446 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4447 if (df_regs_ever_live_p (i)
4448 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4450 rtx src = gen_rtx_MEM (DFmode,
4451 gen_rtx_POST_INC (word_mode, tmpreg));
4452 rtx dest = gen_rtx_REG (DFmode, i);
4453 emit_move_insn (dest, src);
4457 /* Emit a blockage insn here to keep these insns from being moved to
4458 an earlier spot in the epilogue, or into the main instruction stream.
4460 This is necessary as we must not cut the stack back before all the
4461 restores are finished. */
4462 emit_insn (gen_blockage ());
4464 /* Reset stack pointer (and possibly frame pointer). The stack
4465 pointer is initially set to fp + 64 to avoid a race condition. */
4466 if (frame_pointer_needed)
4468 rtx delta = GEN_INT (-64);
4470 set_reg_plus_d (STACK_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM, 64, 0);
4471 emit_insn (gen_pre_load (hard_frame_pointer_rtx,
4472 stack_pointer_rtx, delta));
4474 /* If we were deferring a callee register restore, do it now. */
4475 else if (merge_sp_adjust_with_load)
4477 rtx delta = GEN_INT (-actual_fsize);
4478 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4480 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4482 else if (actual_fsize != 0)
4483 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4484 - actual_fsize, 0);
4486 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4487 frame greater than 8k), do so now. */
4488 if (ret_off != 0)
4489 load_reg (2, ret_off, STACK_POINTER_REGNUM);
4491 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4493 rtx sa = EH_RETURN_STACKADJ_RTX;
4495 emit_insn (gen_blockage ());
4496 emit_insn (TARGET_64BIT
4497 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4498 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4502 bool
4503 pa_can_use_return_insn (void)
4505 if (!reload_completed)
4506 return false;
4508 if (frame_pointer_needed)
4509 return false;
4511 if (df_regs_ever_live_p (2))
4512 return false;
4514 if (crtl->profile)
4515 return false;
4517 return pa_compute_frame_size (get_frame_size (), 0) == 0;
4521 hppa_pic_save_rtx (void)
4523 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4526 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4527 #define NO_DEFERRED_PROFILE_COUNTERS 0
4528 #endif
4531 /* Vector of funcdef numbers. */
4532 static vec<int> funcdef_nos;
4534 /* Output deferred profile counters. */
4535 static void
4536 output_deferred_profile_counters (void)
4538 unsigned int i;
4539 int align, n;
4541 if (funcdef_nos.is_empty ())
4542 return;
4544 switch_to_section (data_section);
4545 align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4546 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4548 for (i = 0; funcdef_nos.iterate (i, &n); i++)
4550 targetm.asm_out.internal_label (asm_out_file, "LP", n);
4551 assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4554 funcdef_nos.release ();
4557 void
4558 hppa_profile_hook (int label_no)
4560 /* We use SImode for the address of the function in both 32 and
4561 64-bit code to avoid having to provide DImode versions of the
4562 lcla2 and load_offset_label_address insn patterns. */
4563 rtx reg = gen_reg_rtx (SImode);
4564 rtx_code_label *label_rtx = gen_label_rtx ();
4565 rtx mcount = gen_rtx_MEM (Pmode, gen_rtx_SYMBOL_REF (Pmode, "_mcount"));
4566 int reg_parm_stack_space = REG_PARM_STACK_SPACE (NULL_TREE);
4567 rtx arg_bytes, begin_label_rtx;
4568 rtx_insn *call_insn;
4569 char begin_label_name[16];
4570 bool use_mcount_pcrel_call;
4572 /* If we can reach _mcount with a pc-relative call, we can optimize
4573 loading the address of the current function. This requires linker
4574 long branch stub support. */
4575 if (!TARGET_PORTABLE_RUNTIME
4576 && !TARGET_LONG_CALLS
4577 && (TARGET_SOM || flag_function_sections))
4578 use_mcount_pcrel_call = TRUE;
4579 else
4580 use_mcount_pcrel_call = FALSE;
4582 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4583 label_no);
4584 begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4586 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4588 if (!use_mcount_pcrel_call)
4590 /* The address of the function is loaded into %r25 with an instruction-
4591 relative sequence that avoids the use of relocations. The sequence
4592 is split so that the load_offset_label_address instruction can
4593 occupy the delay slot of the call to _mcount. */
4594 if (TARGET_PA_20)
4595 emit_insn (gen_lcla2 (reg, label_rtx));
4596 else
4597 emit_insn (gen_lcla1 (reg, label_rtx));
4599 emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25),
4600 reg,
4601 begin_label_rtx,
4602 label_rtx));
4605 if (!NO_DEFERRED_PROFILE_COUNTERS)
4607 rtx count_label_rtx, addr, r24;
4608 char count_label_name[16];
4610 funcdef_nos.safe_push (label_no);
4611 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4612 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode,
4613 ggc_strdup (count_label_name));
4615 addr = force_reg (Pmode, count_label_rtx);
4616 r24 = gen_rtx_REG (Pmode, 24);
4617 emit_move_insn (r24, addr);
4619 arg_bytes = GEN_INT (TARGET_64BIT ? 24 : 12);
4620 if (use_mcount_pcrel_call)
4621 call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes,
4622 begin_label_rtx));
4623 else
4624 call_insn = emit_call_insn (gen_call (mcount, arg_bytes));
4626 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4628 else
4630 arg_bytes = GEN_INT (TARGET_64BIT ? 16 : 8);
4631 if (use_mcount_pcrel_call)
4632 call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes,
4633 begin_label_rtx));
4634 else
4635 call_insn = emit_call_insn (gen_call (mcount, arg_bytes));
4638 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4639 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4641 /* Indicate the _mcount call cannot throw, nor will it execute a
4642 non-local goto. */
4643 make_reg_eh_region_note_nothrow_nononlocal (call_insn);
4645 /* Allocate space for fixed arguments. */
4646 if (reg_parm_stack_space > crtl->outgoing_args_size)
4647 crtl->outgoing_args_size = reg_parm_stack_space;
4650 /* Fetch the return address for the frame COUNT steps up from
4651 the current frame, after the prologue. FRAMEADDR is the
4652 frame pointer of the COUNT frame.
4654 We want to ignore any export stub remnants here. To handle this,
4655 we examine the code at the return address, and if it is an export
4656 stub, we return a memory rtx for the stub return address stored
4657 at frame-24.
4659 The value returned is used in two different ways:
4661 1. To find a function's caller.
4663 2. To change the return address for a function.
4665 This function handles most instances of case 1; however, it will
4666 fail if there are two levels of stubs to execute on the return
4667 path. The only way I believe that can happen is if the return value
4668 needs a parameter relocation, which never happens for C code.
4670 This function handles most instances of case 2; however, it will
4671 fail if we did not originally have stub code on the return path
4672 but will need stub code on the new return path. This can happen if
4673 the caller & callee are both in the main program, but the new
4674 return location is in a shared library. */
4677 pa_return_addr_rtx (int count, rtx frameaddr)
4679 rtx label;
4680 rtx rp;
4681 rtx saved_rp;
4682 rtx ins;
4684 /* The instruction stream at the return address of a PA1.X export stub is:
4686 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4687 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4688 0x00011820 | stub+16: mtsp r1,sr0
4689 0xe0400002 | stub+20: be,n 0(sr0,rp)
4691 0xe0400002 must be specified as -532676606 so that it won't be
4692 rejected as an invalid immediate operand on 64-bit hosts.
4694 The instruction stream at the return address of a PA2.0 export stub is:
4696 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4697 0xe840d002 | stub+12: bve,n (rp)
4700 HOST_WIDE_INT insns[4];
4701 int i, len;
4703 if (count != 0)
4704 return NULL_RTX;
4706 rp = get_hard_reg_initial_val (Pmode, 2);
4708 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4709 return rp;
4711 /* If there is no export stub then just use the value saved from
4712 the return pointer register. */
4714 saved_rp = gen_reg_rtx (Pmode);
4715 emit_move_insn (saved_rp, rp);
4717 /* Get pointer to the instruction stream. We have to mask out the
4718 privilege level from the two low order bits of the return address
4719 pointer here so that ins will point to the start of the first
4720 instruction that would have been executed if we returned. */
4721 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4722 label = gen_label_rtx ();
4724 if (TARGET_PA_20)
4726 insns[0] = 0x4bc23fd1;
4727 insns[1] = -398405630;
4728 len = 2;
4730 else
4732 insns[0] = 0x4bc23fd1;
4733 insns[1] = 0x004010a1;
4734 insns[2] = 0x00011820;
4735 insns[3] = -532676606;
4736 len = 4;
4739 /* Check the instruction stream at the normal return address for the
4740 export stub. If it is an export stub, than our return address is
4741 really in -24[frameaddr]. */
4743 for (i = 0; i < len; i++)
4745 rtx op0 = gen_rtx_MEM (SImode, plus_constant (Pmode, ins, i * 4));
4746 rtx op1 = GEN_INT (insns[i]);
4747 emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label);
4750 /* Here we know that our return address points to an export
4751 stub. We don't want to return the address of the export stub,
4752 but rather the return address of the export stub. That return
4753 address is stored at -24[frameaddr]. */
4755 emit_move_insn (saved_rp,
4756 gen_rtx_MEM (Pmode,
4757 memory_address (Pmode,
4758 plus_constant (Pmode, frameaddr,
4759 -24))));
4761 emit_label (label);
4763 return saved_rp;
4766 void
4767 pa_emit_bcond_fp (rtx operands[])
4769 enum rtx_code code = GET_CODE (operands[0]);
4770 rtx operand0 = operands[1];
4771 rtx operand1 = operands[2];
4772 rtx label = operands[3];
4774 emit_insn (gen_rtx_SET (gen_rtx_REG (CCFPmode, 0),
4775 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1)));
4777 emit_jump_insn (gen_rtx_SET (pc_rtx,
4778 gen_rtx_IF_THEN_ELSE (VOIDmode,
4779 gen_rtx_fmt_ee (NE,
4780 VOIDmode,
4781 gen_rtx_REG (CCFPmode, 0),
4782 const0_rtx),
4783 gen_rtx_LABEL_REF (VOIDmode, label),
4784 pc_rtx)));
4788 /* Adjust the cost of a scheduling dependency. Return the new cost of
4789 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4791 static int
4792 pa_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
4793 unsigned int)
4795 enum attr_type attr_type;
4797 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4798 true dependencies as they are described with bypasses now. */
4799 if (pa_cpu >= PROCESSOR_8000 || dep_type == 0)
4800 return cost;
4802 if (! recog_memoized (insn))
4803 return 0;
4805 attr_type = get_attr_type (insn);
4807 switch (dep_type)
4809 case REG_DEP_ANTI:
4810 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4811 cycles later. */
4813 if (attr_type == TYPE_FPLOAD)
4815 rtx pat = PATTERN (insn);
4816 rtx dep_pat = PATTERN (dep_insn);
4817 if (GET_CODE (pat) == PARALLEL)
4819 /* This happens for the fldXs,mb patterns. */
4820 pat = XVECEXP (pat, 0, 0);
4822 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4823 /* If this happens, we have to extend this to schedule
4824 optimally. Return 0 for now. */
4825 return 0;
4827 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4829 if (! recog_memoized (dep_insn))
4830 return 0;
4831 switch (get_attr_type (dep_insn))
4833 case TYPE_FPALU:
4834 case TYPE_FPMULSGL:
4835 case TYPE_FPMULDBL:
4836 case TYPE_FPDIVSGL:
4837 case TYPE_FPDIVDBL:
4838 case TYPE_FPSQRTSGL:
4839 case TYPE_FPSQRTDBL:
4840 /* A fpload can't be issued until one cycle before a
4841 preceding arithmetic operation has finished if
4842 the target of the fpload is any of the sources
4843 (or destination) of the arithmetic operation. */
4844 return insn_default_latency (dep_insn) - 1;
4846 default:
4847 return 0;
4851 else if (attr_type == TYPE_FPALU)
4853 rtx pat = PATTERN (insn);
4854 rtx dep_pat = PATTERN (dep_insn);
4855 if (GET_CODE (pat) == PARALLEL)
4857 /* This happens for the fldXs,mb patterns. */
4858 pat = XVECEXP (pat, 0, 0);
4860 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4861 /* If this happens, we have to extend this to schedule
4862 optimally. Return 0 for now. */
4863 return 0;
4865 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4867 if (! recog_memoized (dep_insn))
4868 return 0;
4869 switch (get_attr_type (dep_insn))
4871 case TYPE_FPDIVSGL:
4872 case TYPE_FPDIVDBL:
4873 case TYPE_FPSQRTSGL:
4874 case TYPE_FPSQRTDBL:
4875 /* An ALU flop can't be issued until two cycles before a
4876 preceding divide or sqrt operation has finished if
4877 the target of the ALU flop is any of the sources
4878 (or destination) of the divide or sqrt operation. */
4879 return insn_default_latency (dep_insn) - 2;
4881 default:
4882 return 0;
4887 /* For other anti dependencies, the cost is 0. */
4888 return 0;
4890 case REG_DEP_OUTPUT:
4891 /* Output dependency; DEP_INSN writes a register that INSN writes some
4892 cycles later. */
4893 if (attr_type == TYPE_FPLOAD)
4895 rtx pat = PATTERN (insn);
4896 rtx dep_pat = PATTERN (dep_insn);
4897 if (GET_CODE (pat) == PARALLEL)
4899 /* This happens for the fldXs,mb patterns. */
4900 pat = XVECEXP (pat, 0, 0);
4902 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4903 /* If this happens, we have to extend this to schedule
4904 optimally. Return 0 for now. */
4905 return 0;
4907 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4909 if (! recog_memoized (dep_insn))
4910 return 0;
4911 switch (get_attr_type (dep_insn))
4913 case TYPE_FPALU:
4914 case TYPE_FPMULSGL:
4915 case TYPE_FPMULDBL:
4916 case TYPE_FPDIVSGL:
4917 case TYPE_FPDIVDBL:
4918 case TYPE_FPSQRTSGL:
4919 case TYPE_FPSQRTDBL:
4920 /* A fpload can't be issued until one cycle before a
4921 preceding arithmetic operation has finished if
4922 the target of the fpload is the destination of the
4923 arithmetic operation.
4925 Exception: For PA7100LC, PA7200 and PA7300, the cost
4926 is 3 cycles, unless they bundle together. We also
4927 pay the penalty if the second insn is a fpload. */
4928 return insn_default_latency (dep_insn) - 1;
4930 default:
4931 return 0;
4935 else if (attr_type == TYPE_FPALU)
4937 rtx pat = PATTERN (insn);
4938 rtx dep_pat = PATTERN (dep_insn);
4939 if (GET_CODE (pat) == PARALLEL)
4941 /* This happens for the fldXs,mb patterns. */
4942 pat = XVECEXP (pat, 0, 0);
4944 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4945 /* If this happens, we have to extend this to schedule
4946 optimally. Return 0 for now. */
4947 return 0;
4949 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4951 if (! recog_memoized (dep_insn))
4952 return 0;
4953 switch (get_attr_type (dep_insn))
4955 case TYPE_FPDIVSGL:
4956 case TYPE_FPDIVDBL:
4957 case TYPE_FPSQRTSGL:
4958 case TYPE_FPSQRTDBL:
4959 /* An ALU flop can't be issued until two cycles before a
4960 preceding divide or sqrt operation has finished if
4961 the target of the ALU flop is also the target of
4962 the divide or sqrt operation. */
4963 return insn_default_latency (dep_insn) - 2;
4965 default:
4966 return 0;
4971 /* For other output dependencies, the cost is 0. */
4972 return 0;
4974 default:
4975 gcc_unreachable ();
4979 /* Adjust scheduling priorities. We use this to try and keep addil
4980 and the next use of %r1 close together. */
4981 static int
4982 pa_adjust_priority (rtx_insn *insn, int priority)
4984 rtx set = single_set (insn);
4985 rtx src, dest;
4986 if (set)
4988 src = SET_SRC (set);
4989 dest = SET_DEST (set);
4990 if (GET_CODE (src) == LO_SUM
4991 && symbolic_operand (XEXP (src, 1), VOIDmode)
4992 && ! read_only_operand (XEXP (src, 1), VOIDmode))
4993 priority >>= 3;
4995 else if (GET_CODE (src) == MEM
4996 && GET_CODE (XEXP (src, 0)) == LO_SUM
4997 && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
4998 && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
4999 priority >>= 1;
5001 else if (GET_CODE (dest) == MEM
5002 && GET_CODE (XEXP (dest, 0)) == LO_SUM
5003 && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
5004 && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
5005 priority >>= 3;
5007 return priority;
5010 /* The 700 can only issue a single insn at a time.
5011 The 7XXX processors can issue two insns at a time.
5012 The 8000 can issue 4 insns at a time. */
5013 static int
5014 pa_issue_rate (void)
5016 switch (pa_cpu)
5018 case PROCESSOR_700: return 1;
5019 case PROCESSOR_7100: return 2;
5020 case PROCESSOR_7100LC: return 2;
5021 case PROCESSOR_7200: return 2;
5022 case PROCESSOR_7300: return 2;
5023 case PROCESSOR_8000: return 4;
5025 default:
5026 gcc_unreachable ();
5032 /* Return any length plus adjustment needed by INSN which already has
5033 its length computed as LENGTH. Return LENGTH if no adjustment is
5034 necessary.
5036 Also compute the length of an inline block move here as it is too
5037 complicated to express as a length attribute in pa.md. */
5039 pa_adjust_insn_length (rtx_insn *insn, int length)
5041 rtx pat = PATTERN (insn);
5043 /* If length is negative or undefined, provide initial length. */
5044 if ((unsigned int) length >= INT_MAX)
5046 if (GET_CODE (pat) == SEQUENCE)
5047 insn = as_a <rtx_insn *> (XVECEXP (pat, 0, 0));
5049 switch (get_attr_type (insn))
5051 case TYPE_MILLI:
5052 length = pa_attr_length_millicode_call (insn);
5053 break;
5054 case TYPE_CALL:
5055 length = pa_attr_length_call (insn, 0);
5056 break;
5057 case TYPE_SIBCALL:
5058 length = pa_attr_length_call (insn, 1);
5059 break;
5060 case TYPE_DYNCALL:
5061 length = pa_attr_length_indirect_call (insn);
5062 break;
5063 case TYPE_SH_FUNC_ADRS:
5064 length = pa_attr_length_millicode_call (insn) + 20;
5065 break;
5066 default:
5067 gcc_unreachable ();
5071 /* Block move pattern. */
5072 if (NONJUMP_INSN_P (insn)
5073 && GET_CODE (pat) == PARALLEL
5074 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5075 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5076 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
5077 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
5078 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
5079 length += compute_movmem_length (insn) - 4;
5080 /* Block clear pattern. */
5081 else if (NONJUMP_INSN_P (insn)
5082 && GET_CODE (pat) == PARALLEL
5083 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5084 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5085 && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
5086 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
5087 length += compute_clrmem_length (insn) - 4;
5088 /* Conditional branch with an unfilled delay slot. */
5089 else if (JUMP_P (insn) && ! simplejump_p (insn))
5091 /* Adjust a short backwards conditional with an unfilled delay slot. */
5092 if (GET_CODE (pat) == SET
5093 && length == 4
5094 && JUMP_LABEL (insn) != NULL_RTX
5095 && ! forward_branch_p (insn))
5096 length += 4;
5097 else if (GET_CODE (pat) == PARALLEL
5098 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
5099 && length == 4)
5100 length += 4;
5101 /* Adjust dbra insn with short backwards conditional branch with
5102 unfilled delay slot -- only for case where counter is in a
5103 general register register. */
5104 else if (GET_CODE (pat) == PARALLEL
5105 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
5106 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
5107 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
5108 && length == 4
5109 && ! forward_branch_p (insn))
5110 length += 4;
5112 return length;
5115 /* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook. */
5117 static bool
5118 pa_print_operand_punct_valid_p (unsigned char code)
5120 if (code == '@'
5121 || code == '#'
5122 || code == '*'
5123 || code == '^')
5124 return true;
5126 return false;
5129 /* Print operand X (an rtx) in assembler syntax to file FILE.
5130 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
5131 For `%' followed by punctuation, CODE is the punctuation and X is null. */
5133 void
5134 pa_print_operand (FILE *file, rtx x, int code)
5136 switch (code)
5138 case '#':
5139 /* Output a 'nop' if there's nothing for the delay slot. */
5140 if (dbr_sequence_length () == 0)
5141 fputs ("\n\tnop", file);
5142 return;
5143 case '*':
5144 /* Output a nullification completer if there's nothing for the */
5145 /* delay slot or nullification is requested. */
5146 if (dbr_sequence_length () == 0 ||
5147 (final_sequence &&
5148 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
5149 fputs (",n", file);
5150 return;
5151 case 'R':
5152 /* Print out the second register name of a register pair.
5153 I.e., R (6) => 7. */
5154 fputs (reg_names[REGNO (x) + 1], file);
5155 return;
5156 case 'r':
5157 /* A register or zero. */
5158 if (x == const0_rtx
5159 || (x == CONST0_RTX (DFmode))
5160 || (x == CONST0_RTX (SFmode)))
5162 fputs ("%r0", file);
5163 return;
5165 else
5166 break;
5167 case 'f':
5168 /* A register or zero (floating point). */
5169 if (x == const0_rtx
5170 || (x == CONST0_RTX (DFmode))
5171 || (x == CONST0_RTX (SFmode)))
5173 fputs ("%fr0", file);
5174 return;
5176 else
5177 break;
5178 case 'A':
5180 rtx xoperands[2];
5182 xoperands[0] = XEXP (XEXP (x, 0), 0);
5183 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
5184 pa_output_global_address (file, xoperands[1], 0);
5185 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
5186 return;
5189 case 'C': /* Plain (C)ondition */
5190 case 'X':
5191 switch (GET_CODE (x))
5193 case EQ:
5194 fputs ("=", file); break;
5195 case NE:
5196 fputs ("<>", file); break;
5197 case GT:
5198 fputs (">", file); break;
5199 case GE:
5200 fputs (">=", file); break;
5201 case GEU:
5202 fputs (">>=", file); break;
5203 case GTU:
5204 fputs (">>", file); break;
5205 case LT:
5206 fputs ("<", file); break;
5207 case LE:
5208 fputs ("<=", file); break;
5209 case LEU:
5210 fputs ("<<=", file); break;
5211 case LTU:
5212 fputs ("<<", file); break;
5213 default:
5214 gcc_unreachable ();
5216 return;
5217 case 'N': /* Condition, (N)egated */
5218 switch (GET_CODE (x))
5220 case EQ:
5221 fputs ("<>", file); break;
5222 case NE:
5223 fputs ("=", file); break;
5224 case GT:
5225 fputs ("<=", file); break;
5226 case GE:
5227 fputs ("<", file); break;
5228 case GEU:
5229 fputs ("<<", file); break;
5230 case GTU:
5231 fputs ("<<=", file); break;
5232 case LT:
5233 fputs (">=", file); break;
5234 case LE:
5235 fputs (">", file); break;
5236 case LEU:
5237 fputs (">>", file); break;
5238 case LTU:
5239 fputs (">>=", file); break;
5240 default:
5241 gcc_unreachable ();
5243 return;
5244 /* For floating point comparisons. Note that the output
5245 predicates are the complement of the desired mode. The
5246 conditions for GT, GE, LT, LE and LTGT cause an invalid
5247 operation exception if the result is unordered and this
5248 exception is enabled in the floating-point status register. */
5249 case 'Y':
5250 switch (GET_CODE (x))
5252 case EQ:
5253 fputs ("!=", file); break;
5254 case NE:
5255 fputs ("=", file); break;
5256 case GT:
5257 fputs ("!>", file); break;
5258 case GE:
5259 fputs ("!>=", file); break;
5260 case LT:
5261 fputs ("!<", file); break;
5262 case LE:
5263 fputs ("!<=", file); break;
5264 case LTGT:
5265 fputs ("!<>", file); break;
5266 case UNLE:
5267 fputs ("!?<=", file); break;
5268 case UNLT:
5269 fputs ("!?<", file); break;
5270 case UNGE:
5271 fputs ("!?>=", file); break;
5272 case UNGT:
5273 fputs ("!?>", file); break;
5274 case UNEQ:
5275 fputs ("!?=", file); break;
5276 case UNORDERED:
5277 fputs ("!?", file); break;
5278 case ORDERED:
5279 fputs ("?", file); break;
5280 default:
5281 gcc_unreachable ();
5283 return;
5284 case 'S': /* Condition, operands are (S)wapped. */
5285 switch (GET_CODE (x))
5287 case EQ:
5288 fputs ("=", file); break;
5289 case NE:
5290 fputs ("<>", file); break;
5291 case GT:
5292 fputs ("<", file); break;
5293 case GE:
5294 fputs ("<=", file); break;
5295 case GEU:
5296 fputs ("<<=", file); break;
5297 case GTU:
5298 fputs ("<<", file); break;
5299 case LT:
5300 fputs (">", file); break;
5301 case LE:
5302 fputs (">=", file); break;
5303 case LEU:
5304 fputs (">>=", file); break;
5305 case LTU:
5306 fputs (">>", file); break;
5307 default:
5308 gcc_unreachable ();
5310 return;
5311 case 'B': /* Condition, (B)oth swapped and negate. */
5312 switch (GET_CODE (x))
5314 case EQ:
5315 fputs ("<>", file); break;
5316 case NE:
5317 fputs ("=", file); break;
5318 case GT:
5319 fputs (">=", file); break;
5320 case GE:
5321 fputs (">", file); break;
5322 case GEU:
5323 fputs (">>", file); break;
5324 case GTU:
5325 fputs (">>=", file); break;
5326 case LT:
5327 fputs ("<=", file); break;
5328 case LE:
5329 fputs ("<", file); break;
5330 case LEU:
5331 fputs ("<<", file); break;
5332 case LTU:
5333 fputs ("<<=", file); break;
5334 default:
5335 gcc_unreachable ();
5337 return;
5338 case 'k':
5339 gcc_assert (GET_CODE (x) == CONST_INT);
5340 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
5341 return;
5342 case 'Q':
5343 gcc_assert (GET_CODE (x) == CONST_INT);
5344 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
5345 return;
5346 case 'L':
5347 gcc_assert (GET_CODE (x) == CONST_INT);
5348 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
5349 return;
5350 case 'o':
5351 gcc_assert (GET_CODE (x) == CONST_INT
5352 && (INTVAL (x) == 1 || INTVAL (x) == 2 || INTVAL (x) == 3));
5353 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5354 return;
5355 case 'O':
5356 gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
5357 fprintf (file, "%d", exact_log2 (INTVAL (x)));
5358 return;
5359 case 'p':
5360 gcc_assert (GET_CODE (x) == CONST_INT);
5361 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
5362 return;
5363 case 'P':
5364 gcc_assert (GET_CODE (x) == CONST_INT);
5365 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
5366 return;
5367 case 'I':
5368 if (GET_CODE (x) == CONST_INT)
5369 fputs ("i", file);
5370 return;
5371 case 'M':
5372 case 'F':
5373 switch (GET_CODE (XEXP (x, 0)))
5375 case PRE_DEC:
5376 case PRE_INC:
5377 if (ASSEMBLER_DIALECT == 0)
5378 fputs ("s,mb", file);
5379 else
5380 fputs (",mb", file);
5381 break;
5382 case POST_DEC:
5383 case POST_INC:
5384 if (ASSEMBLER_DIALECT == 0)
5385 fputs ("s,ma", file);
5386 else
5387 fputs (",ma", file);
5388 break;
5389 case PLUS:
5390 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5391 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5393 if (ASSEMBLER_DIALECT == 0)
5394 fputs ("x", file);
5396 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5397 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5399 if (ASSEMBLER_DIALECT == 0)
5400 fputs ("x,s", file);
5401 else
5402 fputs (",s", file);
5404 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
5405 fputs ("s", file);
5406 break;
5407 default:
5408 if (code == 'F' && ASSEMBLER_DIALECT == 0)
5409 fputs ("s", file);
5410 break;
5412 return;
5413 case 'G':
5414 pa_output_global_address (file, x, 0);
5415 return;
5416 case 'H':
5417 pa_output_global_address (file, x, 1);
5418 return;
5419 case 0: /* Don't do anything special */
5420 break;
5421 case 'Z':
5423 unsigned op[3];
5424 compute_zdepwi_operands (INTVAL (x), op);
5425 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5426 return;
5428 case 'z':
5430 unsigned op[3];
5431 compute_zdepdi_operands (INTVAL (x), op);
5432 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5433 return;
5435 case 'c':
5436 /* We can get here from a .vtable_inherit due to our
5437 CONSTANT_ADDRESS_P rejecting perfectly good constant
5438 addresses. */
5439 break;
5440 default:
5441 gcc_unreachable ();
5443 if (GET_CODE (x) == REG)
5445 fputs (reg_names [REGNO (x)], file);
5446 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5448 fputs ("R", file);
5449 return;
5451 if (FP_REG_P (x)
5452 && GET_MODE_SIZE (GET_MODE (x)) <= 4
5453 && (REGNO (x) & 1) == 0)
5454 fputs ("L", file);
5456 else if (GET_CODE (x) == MEM)
5458 int size = GET_MODE_SIZE (GET_MODE (x));
5459 rtx base = NULL_RTX;
5460 switch (GET_CODE (XEXP (x, 0)))
5462 case PRE_DEC:
5463 case POST_DEC:
5464 base = XEXP (XEXP (x, 0), 0);
5465 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5466 break;
5467 case PRE_INC:
5468 case POST_INC:
5469 base = XEXP (XEXP (x, 0), 0);
5470 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5471 break;
5472 case PLUS:
5473 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5474 fprintf (file, "%s(%s)",
5475 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5476 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5477 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5478 fprintf (file, "%s(%s)",
5479 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5480 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5481 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5482 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5484 /* Because the REG_POINTER flag can get lost during reload,
5485 pa_legitimate_address_p canonicalizes the order of the
5486 index and base registers in the combined move patterns. */
5487 rtx base = XEXP (XEXP (x, 0), 1);
5488 rtx index = XEXP (XEXP (x, 0), 0);
5490 fprintf (file, "%s(%s)",
5491 reg_names [REGNO (index)], reg_names [REGNO (base)]);
5493 else
5494 output_address (GET_MODE (x), XEXP (x, 0));
5495 break;
5496 default:
5497 output_address (GET_MODE (x), XEXP (x, 0));
5498 break;
5501 else
5502 output_addr_const (file, x);
5505 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
5507 void
5508 pa_output_global_address (FILE *file, rtx x, int round_constant)
5511 /* Imagine (high (const (plus ...))). */
5512 if (GET_CODE (x) == HIGH)
5513 x = XEXP (x, 0);
5515 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5516 output_addr_const (file, x);
5517 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5519 output_addr_const (file, x);
5520 fputs ("-$global$", file);
5522 else if (GET_CODE (x) == CONST)
5524 const char *sep = "";
5525 int offset = 0; /* assembler wants -$global$ at end */
5526 rtx base = NULL_RTX;
5528 switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
5530 case LABEL_REF:
5531 case SYMBOL_REF:
5532 base = XEXP (XEXP (x, 0), 0);
5533 output_addr_const (file, base);
5534 break;
5535 case CONST_INT:
5536 offset = INTVAL (XEXP (XEXP (x, 0), 0));
5537 break;
5538 default:
5539 gcc_unreachable ();
5542 switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
5544 case LABEL_REF:
5545 case SYMBOL_REF:
5546 base = XEXP (XEXP (x, 0), 1);
5547 output_addr_const (file, base);
5548 break;
5549 case CONST_INT:
5550 offset = INTVAL (XEXP (XEXP (x, 0), 1));
5551 break;
5552 default:
5553 gcc_unreachable ();
5556 /* How bogus. The compiler is apparently responsible for
5557 rounding the constant if it uses an LR field selector.
5559 The linker and/or assembler seem a better place since
5560 they have to do this kind of thing already.
5562 If we fail to do this, HP's optimizing linker may eliminate
5563 an addil, but not update the ldw/stw/ldo instruction that
5564 uses the result of the addil. */
5565 if (round_constant)
5566 offset = ((offset + 0x1000) & ~0x1fff);
5568 switch (GET_CODE (XEXP (x, 0)))
5570 case PLUS:
5571 if (offset < 0)
5573 offset = -offset;
5574 sep = "-";
5576 else
5577 sep = "+";
5578 break;
5580 case MINUS:
5581 gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5582 sep = "-";
5583 break;
5585 default:
5586 gcc_unreachable ();
5589 if (!read_only_operand (base, VOIDmode) && !flag_pic)
5590 fputs ("-$global$", file);
5591 if (offset)
5592 fprintf (file, "%s%d", sep, offset);
5594 else
5595 output_addr_const (file, x);
5598 /* Output boilerplate text to appear at the beginning of the file.
5599 There are several possible versions. */
5600 #define aputs(x) fputs(x, asm_out_file)
5601 static inline void
5602 pa_file_start_level (void)
5604 if (TARGET_64BIT)
5605 aputs ("\t.LEVEL 2.0w\n");
5606 else if (TARGET_PA_20)
5607 aputs ("\t.LEVEL 2.0\n");
5608 else if (TARGET_PA_11)
5609 aputs ("\t.LEVEL 1.1\n");
5610 else
5611 aputs ("\t.LEVEL 1.0\n");
5614 static inline void
5615 pa_file_start_space (int sortspace)
5617 aputs ("\t.SPACE $PRIVATE$");
5618 if (sortspace)
5619 aputs (",SORT=16");
5620 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31");
5621 if (flag_tm)
5622 aputs ("\n\t.SUBSPA $TM_CLONE_TABLE$,QUAD=1,ALIGN=8,ACCESS=31");
5623 aputs ("\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5624 "\n\t.SPACE $TEXT$");
5625 if (sortspace)
5626 aputs (",SORT=8");
5627 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5628 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5631 static inline void
5632 pa_file_start_file (int want_version)
5634 if (write_symbols != NO_DEBUG)
5636 output_file_directive (asm_out_file, main_input_filename);
5637 if (want_version)
5638 aputs ("\t.version\t\"01.01\"\n");
5642 static inline void
5643 pa_file_start_mcount (const char *aswhat)
5645 if (profile_flag)
5646 fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5649 static void
5650 pa_elf_file_start (void)
5652 pa_file_start_level ();
5653 pa_file_start_mcount ("ENTRY");
5654 pa_file_start_file (0);
5657 static void
5658 pa_som_file_start (void)
5660 pa_file_start_level ();
5661 pa_file_start_space (0);
5662 aputs ("\t.IMPORT $global$,DATA\n"
5663 "\t.IMPORT $$dyncall,MILLICODE\n");
5664 pa_file_start_mcount ("CODE");
5665 pa_file_start_file (0);
5668 static void
5669 pa_linux_file_start (void)
5671 pa_file_start_file (1);
5672 pa_file_start_level ();
5673 pa_file_start_mcount ("CODE");
5676 static void
5677 pa_hpux64_gas_file_start (void)
5679 pa_file_start_level ();
5680 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5681 if (profile_flag)
5682 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5683 #endif
5684 pa_file_start_file (1);
5687 static void
5688 pa_hpux64_hpas_file_start (void)
5690 pa_file_start_level ();
5691 pa_file_start_space (1);
5692 pa_file_start_mcount ("CODE");
5693 pa_file_start_file (0);
5695 #undef aputs
5697 /* Search the deferred plabel list for SYMBOL and return its internal
5698 label. If an entry for SYMBOL is not found, a new entry is created. */
5701 pa_get_deferred_plabel (rtx symbol)
5703 const char *fname = XSTR (symbol, 0);
5704 size_t i;
5706 /* See if we have already put this function on the list of deferred
5707 plabels. This list is generally small, so a liner search is not
5708 too ugly. If it proves too slow replace it with something faster. */
5709 for (i = 0; i < n_deferred_plabels; i++)
5710 if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
5711 break;
5713 /* If the deferred plabel list is empty, or this entry was not found
5714 on the list, create a new entry on the list. */
5715 if (deferred_plabels == NULL || i == n_deferred_plabels)
5717 tree id;
5719 if (deferred_plabels == 0)
5720 deferred_plabels = ggc_alloc<deferred_plabel> ();
5721 else
5722 deferred_plabels = GGC_RESIZEVEC (struct deferred_plabel,
5723 deferred_plabels,
5724 n_deferred_plabels + 1);
5726 i = n_deferred_plabels++;
5727 deferred_plabels[i].internal_label = gen_label_rtx ();
5728 deferred_plabels[i].symbol = symbol;
5730 /* Gross. We have just implicitly taken the address of this
5731 function. Mark it in the same manner as assemble_name. */
5732 id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5733 if (id)
5734 mark_referenced (id);
5737 return deferred_plabels[i].internal_label;
5740 static void
5741 output_deferred_plabels (void)
5743 size_t i;
5745 /* If we have some deferred plabels, then we need to switch into the
5746 data or readonly data section, and align it to a 4 byte boundary
5747 before outputting the deferred plabels. */
5748 if (n_deferred_plabels)
5750 switch_to_section (flag_pic ? data_section : readonly_data_section);
5751 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5754 /* Now output the deferred plabels. */
5755 for (i = 0; i < n_deferred_plabels; i++)
5757 targetm.asm_out.internal_label (asm_out_file, "L",
5758 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5759 assemble_integer (deferred_plabels[i].symbol,
5760 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5764 /* Initialize optabs to point to emulation routines. */
5766 static void
5767 pa_init_libfuncs (void)
5769 if (HPUX_LONG_DOUBLE_LIBRARY)
5771 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5772 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5773 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5774 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5775 set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5776 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5777 set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5778 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5779 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5781 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5782 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5783 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5784 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5785 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5786 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5787 set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5789 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5790 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5791 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5792 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5794 set_conv_libfunc (sfix_optab, SImode, TFmode,
5795 TARGET_64BIT ? "__U_Qfcnvfxt_quad_to_sgl"
5796 : "_U_Qfcnvfxt_quad_to_sgl");
5797 set_conv_libfunc (sfix_optab, DImode, TFmode,
5798 "_U_Qfcnvfxt_quad_to_dbl");
5799 set_conv_libfunc (ufix_optab, SImode, TFmode,
5800 "_U_Qfcnvfxt_quad_to_usgl");
5801 set_conv_libfunc (ufix_optab, DImode, TFmode,
5802 "_U_Qfcnvfxt_quad_to_udbl");
5804 set_conv_libfunc (sfloat_optab, TFmode, SImode,
5805 "_U_Qfcnvxf_sgl_to_quad");
5806 set_conv_libfunc (sfloat_optab, TFmode, DImode,
5807 "_U_Qfcnvxf_dbl_to_quad");
5808 set_conv_libfunc (ufloat_optab, TFmode, SImode,
5809 "_U_Qfcnvxf_usgl_to_quad");
5810 set_conv_libfunc (ufloat_optab, TFmode, DImode,
5811 "_U_Qfcnvxf_udbl_to_quad");
5814 if (TARGET_SYNC_LIBCALL)
5815 init_sync_libfuncs (8);
5818 /* HP's millicode routines mean something special to the assembler.
5819 Keep track of which ones we have used. */
5821 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5822 static void import_milli (enum millicodes);
5823 static char imported[(int) end1000];
5824 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5825 static const char import_string[] = ".IMPORT $$....,MILLICODE";
5826 #define MILLI_START 10
5828 static void
5829 import_milli (enum millicodes code)
5831 char str[sizeof (import_string)];
5833 if (!imported[(int) code])
5835 imported[(int) code] = 1;
5836 strcpy (str, import_string);
5837 strncpy (str + MILLI_START, milli_names[(int) code], 4);
5838 output_asm_insn (str, 0);
5842 /* The register constraints have put the operands and return value in
5843 the proper registers. */
5845 const char *
5846 pa_output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx_insn *insn)
5848 import_milli (mulI);
5849 return pa_output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5852 /* Emit the rtl for doing a division by a constant. */
5854 /* Do magic division millicodes exist for this value? */
5855 const int pa_magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5857 /* We'll use an array to keep track of the magic millicodes and
5858 whether or not we've used them already. [n][0] is signed, [n][1] is
5859 unsigned. */
5861 static int div_milli[16][2];
5864 pa_emit_hpdiv_const (rtx *operands, int unsignedp)
5866 if (GET_CODE (operands[2]) == CONST_INT
5867 && INTVAL (operands[2]) > 0
5868 && INTVAL (operands[2]) < 16
5869 && pa_magic_milli[INTVAL (operands[2])])
5871 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5873 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
5874 emit
5875 (gen_rtx_PARALLEL
5876 (VOIDmode,
5877 gen_rtvec (6, gen_rtx_SET (gen_rtx_REG (SImode, 29),
5878 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5879 SImode,
5880 gen_rtx_REG (SImode, 26),
5881 operands[2])),
5882 gen_rtx_CLOBBER (VOIDmode, operands[4]),
5883 gen_rtx_CLOBBER (VOIDmode, operands[3]),
5884 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5885 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
5886 gen_rtx_CLOBBER (VOIDmode, ret))));
5887 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
5888 return 1;
5890 return 0;
5893 const char *
5894 pa_output_div_insn (rtx *operands, int unsignedp, rtx_insn *insn)
5896 int divisor;
5898 /* If the divisor is a constant, try to use one of the special
5899 opcodes .*/
5900 if (GET_CODE (operands[0]) == CONST_INT)
5902 static char buf[100];
5903 divisor = INTVAL (operands[0]);
5904 if (!div_milli[divisor][unsignedp])
5906 div_milli[divisor][unsignedp] = 1;
5907 if (unsignedp)
5908 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
5909 else
5910 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
5912 if (unsignedp)
5914 sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
5915 INTVAL (operands[0]));
5916 return pa_output_millicode_call (insn,
5917 gen_rtx_SYMBOL_REF (SImode, buf));
5919 else
5921 sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
5922 INTVAL (operands[0]));
5923 return pa_output_millicode_call (insn,
5924 gen_rtx_SYMBOL_REF (SImode, buf));
5927 /* Divisor isn't a special constant. */
5928 else
5930 if (unsignedp)
5932 import_milli (divU);
5933 return pa_output_millicode_call (insn,
5934 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
5936 else
5938 import_milli (divI);
5939 return pa_output_millicode_call (insn,
5940 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
5945 /* Output a $$rem millicode to do mod. */
5947 const char *
5948 pa_output_mod_insn (int unsignedp, rtx_insn *insn)
5950 if (unsignedp)
5952 import_milli (remU);
5953 return pa_output_millicode_call (insn,
5954 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
5956 else
5958 import_milli (remI);
5959 return pa_output_millicode_call (insn,
5960 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
5964 void
5965 pa_output_arg_descriptor (rtx_insn *call_insn)
5967 const char *arg_regs[4];
5968 machine_mode arg_mode;
5969 rtx link;
5970 int i, output_flag = 0;
5971 int regno;
5973 /* We neither need nor want argument location descriptors for the
5974 64bit runtime environment or the ELF32 environment. */
5975 if (TARGET_64BIT || TARGET_ELF32)
5976 return;
5978 for (i = 0; i < 4; i++)
5979 arg_regs[i] = 0;
5981 /* Specify explicitly that no argument relocations should take place
5982 if using the portable runtime calling conventions. */
5983 if (TARGET_PORTABLE_RUNTIME)
5985 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5986 asm_out_file);
5987 return;
5990 gcc_assert (CALL_P (call_insn));
5991 for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
5992 link; link = XEXP (link, 1))
5994 rtx use = XEXP (link, 0);
5996 if (! (GET_CODE (use) == USE
5997 && GET_CODE (XEXP (use, 0)) == REG
5998 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5999 continue;
6001 arg_mode = GET_MODE (XEXP (use, 0));
6002 regno = REGNO (XEXP (use, 0));
6003 if (regno >= 23 && regno <= 26)
6005 arg_regs[26 - regno] = "GR";
6006 if (arg_mode == DImode)
6007 arg_regs[25 - regno] = "GR";
6009 else if (regno >= 32 && regno <= 39)
6011 if (arg_mode == SFmode)
6012 arg_regs[(regno - 32) / 2] = "FR";
6013 else
6015 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
6016 arg_regs[(regno - 34) / 2] = "FR";
6017 arg_regs[(regno - 34) / 2 + 1] = "FU";
6018 #else
6019 arg_regs[(regno - 34) / 2] = "FU";
6020 arg_regs[(regno - 34) / 2 + 1] = "FR";
6021 #endif
6025 fputs ("\t.CALL ", asm_out_file);
6026 for (i = 0; i < 4; i++)
6028 if (arg_regs[i])
6030 if (output_flag++)
6031 fputc (',', asm_out_file);
6032 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
6035 fputc ('\n', asm_out_file);
6038 /* Inform reload about cases where moving X with a mode MODE to or from
6039 a register in RCLASS requires an extra scratch or immediate register.
6040 Return the class needed for the immediate register. */
6042 static reg_class_t
6043 pa_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
6044 machine_mode mode, secondary_reload_info *sri)
6046 int regno;
6047 enum reg_class rclass = (enum reg_class) rclass_i;
6049 /* Handle the easy stuff first. */
6050 if (rclass == R1_REGS)
6051 return NO_REGS;
6053 if (REG_P (x))
6055 regno = REGNO (x);
6056 if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
6057 return NO_REGS;
6059 else
6060 regno = -1;
6062 /* If we have something like (mem (mem (...)), we can safely assume the
6063 inner MEM will end up in a general register after reloading, so there's
6064 no need for a secondary reload. */
6065 if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
6066 return NO_REGS;
6068 /* Trying to load a constant into a FP register during PIC code
6069 generation requires %r1 as a scratch register. For float modes,
6070 the only legitimate constant is CONST0_RTX. However, there are
6071 a few patterns that accept constant double operands. */
6072 if (flag_pic
6073 && FP_REG_CLASS_P (rclass)
6074 && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
6076 switch (mode)
6078 case SImode:
6079 sri->icode = CODE_FOR_reload_insi_r1;
6080 break;
6082 case DImode:
6083 sri->icode = CODE_FOR_reload_indi_r1;
6084 break;
6086 case SFmode:
6087 sri->icode = CODE_FOR_reload_insf_r1;
6088 break;
6090 case DFmode:
6091 sri->icode = CODE_FOR_reload_indf_r1;
6092 break;
6094 default:
6095 gcc_unreachable ();
6097 return NO_REGS;
6100 /* Secondary reloads of symbolic expressions require %r1 as a scratch
6101 register when we're generating PIC code or when the operand isn't
6102 readonly. */
6103 if (pa_symbolic_expression_p (x))
6105 if (GET_CODE (x) == HIGH)
6106 x = XEXP (x, 0);
6108 if (flag_pic || !read_only_operand (x, VOIDmode))
6110 switch (mode)
6112 case SImode:
6113 sri->icode = CODE_FOR_reload_insi_r1;
6114 break;
6116 case DImode:
6117 sri->icode = CODE_FOR_reload_indi_r1;
6118 break;
6120 default:
6121 gcc_unreachable ();
6123 return NO_REGS;
6127 /* Profiling showed the PA port spends about 1.3% of its compilation
6128 time in true_regnum from calls inside pa_secondary_reload_class. */
6129 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
6130 regno = true_regnum (x);
6132 /* Handle reloads for floating point loads and stores. */
6133 if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
6134 && FP_REG_CLASS_P (rclass))
6136 if (MEM_P (x))
6138 x = XEXP (x, 0);
6140 /* We don't need a secondary reload for indexed memory addresses.
6142 When INT14_OK_STRICT is true, it might appear that we could
6143 directly allow register indirect memory addresses. However,
6144 this doesn't work because we don't support SUBREGs in
6145 floating-point register copies and reload doesn't tell us
6146 when it's going to use a SUBREG. */
6147 if (IS_INDEX_ADDR_P (x))
6148 return NO_REGS;
6151 /* Request a secondary reload with a general scratch register
6152 for everything else. ??? Could symbolic operands be handled
6153 directly when generating non-pic PA 2.0 code? */
6154 sri->icode = (in_p
6155 ? direct_optab_handler (reload_in_optab, mode)
6156 : direct_optab_handler (reload_out_optab, mode));
6157 return NO_REGS;
6160 /* A SAR<->FP register copy requires an intermediate general register
6161 and secondary memory. We need a secondary reload with a general
6162 scratch register for spills. */
6163 if (rclass == SHIFT_REGS)
6165 /* Handle spill. */
6166 if (regno >= FIRST_PSEUDO_REGISTER || regno < 0)
6168 sri->icode = (in_p
6169 ? direct_optab_handler (reload_in_optab, mode)
6170 : direct_optab_handler (reload_out_optab, mode));
6171 return NO_REGS;
6174 /* Handle FP copy. */
6175 if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))
6176 return GENERAL_REGS;
6179 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
6180 && REGNO_REG_CLASS (regno) == SHIFT_REGS
6181 && FP_REG_CLASS_P (rclass))
6182 return GENERAL_REGS;
6184 return NO_REGS;
6187 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY. The argument pointer
6188 is only marked as live on entry by df-scan when it is a fixed
6189 register. It isn't a fixed register in the 64-bit runtime,
6190 so we need to mark it here. */
6192 static void
6193 pa_extra_live_on_entry (bitmap regs)
6195 if (TARGET_64BIT)
6196 bitmap_set_bit (regs, ARG_POINTER_REGNUM);
6199 /* Implement EH_RETURN_HANDLER_RTX. The MEM needs to be volatile
6200 to prevent it from being deleted. */
6203 pa_eh_return_handler_rtx (void)
6205 rtx tmp;
6207 tmp = gen_rtx_PLUS (word_mode, hard_frame_pointer_rtx,
6208 TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20));
6209 tmp = gen_rtx_MEM (word_mode, tmp);
6210 tmp->volatil = 1;
6211 return tmp;
6214 /* In the 32-bit runtime, arguments larger than eight bytes are passed
6215 by invisible reference. As a GCC extension, we also pass anything
6216 with a zero or variable size by reference.
6218 The 64-bit runtime does not describe passing any types by invisible
6219 reference. The internals of GCC can't currently handle passing
6220 empty structures, and zero or variable length arrays when they are
6221 not passed entirely on the stack or by reference. Thus, as a GCC
6222 extension, we pass these types by reference. The HP compiler doesn't
6223 support these types, so hopefully there shouldn't be any compatibility
6224 issues. This may have to be revisited when HP releases a C99 compiler
6225 or updates the ABI. */
6227 static bool
6228 pa_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
6229 machine_mode mode, const_tree type,
6230 bool named ATTRIBUTE_UNUSED)
6232 HOST_WIDE_INT size;
6234 if (type)
6235 size = int_size_in_bytes (type);
6236 else
6237 size = GET_MODE_SIZE (mode);
6239 if (TARGET_64BIT)
6240 return size <= 0;
6241 else
6242 return size <= 0 || size > 8;
6245 enum direction
6246 pa_function_arg_padding (machine_mode mode, const_tree type)
6248 if (mode == BLKmode
6249 || (TARGET_64BIT
6250 && type
6251 && (AGGREGATE_TYPE_P (type)
6252 || TREE_CODE (type) == COMPLEX_TYPE
6253 || TREE_CODE (type) == VECTOR_TYPE)))
6255 /* Return none if justification is not required. */
6256 if (type
6257 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
6258 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
6259 return none;
6261 /* The directions set here are ignored when a BLKmode argument larger
6262 than a word is placed in a register. Different code is used for
6263 the stack and registers. This makes it difficult to have a
6264 consistent data representation for both the stack and registers.
6265 For both runtimes, the justification and padding for arguments on
6266 the stack and in registers should be identical. */
6267 if (TARGET_64BIT)
6268 /* The 64-bit runtime specifies left justification for aggregates. */
6269 return upward;
6270 else
6271 /* The 32-bit runtime architecture specifies right justification.
6272 When the argument is passed on the stack, the argument is padded
6273 with garbage on the left. The HP compiler pads with zeros. */
6274 return downward;
6277 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
6278 return downward;
6279 else
6280 return none;
6284 /* Do what is necessary for `va_start'. We look at the current function
6285 to determine if stdargs or varargs is used and fill in an initial
6286 va_list. A pointer to this constructor is returned. */
6288 static rtx
6289 hppa_builtin_saveregs (void)
6291 rtx offset, dest;
6292 tree fntype = TREE_TYPE (current_function_decl);
6293 int argadj = ((!stdarg_p (fntype))
6294 ? UNITS_PER_WORD : 0);
6296 if (argadj)
6297 offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, argadj);
6298 else
6299 offset = crtl->args.arg_offset_rtx;
6301 if (TARGET_64BIT)
6303 int i, off;
6305 /* Adjust for varargs/stdarg differences. */
6306 if (argadj)
6307 offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, -argadj);
6308 else
6309 offset = crtl->args.arg_offset_rtx;
6311 /* We need to save %r26 .. %r19 inclusive starting at offset -64
6312 from the incoming arg pointer and growing to larger addresses. */
6313 for (i = 26, off = -64; i >= 19; i--, off += 8)
6314 emit_move_insn (gen_rtx_MEM (word_mode,
6315 plus_constant (Pmode,
6316 arg_pointer_rtx, off)),
6317 gen_rtx_REG (word_mode, i));
6319 /* The incoming args pointer points just beyond the flushback area;
6320 normally this is not a serious concern. However, when we are doing
6321 varargs/stdargs we want to make the arg pointer point to the start
6322 of the incoming argument area. */
6323 emit_move_insn (virtual_incoming_args_rtx,
6324 plus_constant (Pmode, arg_pointer_rtx, -64));
6326 /* Now return a pointer to the first anonymous argument. */
6327 return copy_to_reg (expand_binop (Pmode, add_optab,
6328 virtual_incoming_args_rtx,
6329 offset, 0, 0, OPTAB_LIB_WIDEN));
6332 /* Store general registers on the stack. */
6333 dest = gen_rtx_MEM (BLKmode,
6334 plus_constant (Pmode, crtl->args.internal_arg_pointer,
6335 -16));
6336 set_mem_alias_set (dest, get_varargs_alias_set ());
6337 set_mem_align (dest, BITS_PER_WORD);
6338 move_block_from_reg (23, dest, 4);
6340 /* move_block_from_reg will emit code to store the argument registers
6341 individually as scalar stores.
6343 However, other insns may later load from the same addresses for
6344 a structure load (passing a struct to a varargs routine).
6346 The alias code assumes that such aliasing can never happen, so we
6347 have to keep memory referencing insns from moving up beyond the
6348 last argument register store. So we emit a blockage insn here. */
6349 emit_insn (gen_blockage ());
6351 return copy_to_reg (expand_binop (Pmode, add_optab,
6352 crtl->args.internal_arg_pointer,
6353 offset, 0, 0, OPTAB_LIB_WIDEN));
6356 static void
6357 hppa_va_start (tree valist, rtx nextarg)
6359 nextarg = expand_builtin_saveregs ();
6360 std_expand_builtin_va_start (valist, nextarg);
6363 static tree
6364 hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6365 gimple_seq *post_p)
6367 if (TARGET_64BIT)
6369 /* Args grow upward. We can use the generic routines. */
6370 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6372 else /* !TARGET_64BIT */
6374 tree ptr = build_pointer_type (type);
6375 tree valist_type;
6376 tree t, u;
6377 unsigned int size, ofs;
6378 bool indirect;
6380 indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0);
6381 if (indirect)
6383 type = ptr;
6384 ptr = build_pointer_type (type);
6386 size = int_size_in_bytes (type);
6387 valist_type = TREE_TYPE (valist);
6389 /* Args grow down. Not handled by generic routines. */
6391 u = fold_convert (sizetype, size_in_bytes (type));
6392 u = fold_build1 (NEGATE_EXPR, sizetype, u);
6393 t = fold_build_pointer_plus (valist, u);
6395 /* Align to 4 or 8 byte boundary depending on argument size. */
6397 u = build_int_cst (TREE_TYPE (t), (HOST_WIDE_INT)(size > 4 ? -8 : -4));
6398 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u);
6399 t = fold_convert (valist_type, t);
6401 t = build2 (MODIFY_EXPR, valist_type, valist, t);
6403 ofs = (8 - size) % 4;
6404 if (ofs != 0)
6405 t = fold_build_pointer_plus_hwi (t, ofs);
6407 t = fold_convert (ptr, t);
6408 t = build_va_arg_indirect_ref (t);
6410 if (indirect)
6411 t = build_va_arg_indirect_ref (t);
6413 return t;
6417 /* True if MODE is valid for the target. By "valid", we mean able to
6418 be manipulated in non-trivial ways. In particular, this means all
6419 the arithmetic is supported.
6421 Currently, TImode is not valid as the HP 64-bit runtime documentation
6422 doesn't document the alignment and calling conventions for this type.
6423 Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
6424 2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE. */
6426 static bool
6427 pa_scalar_mode_supported_p (machine_mode mode)
6429 int precision = GET_MODE_PRECISION (mode);
6431 switch (GET_MODE_CLASS (mode))
6433 case MODE_PARTIAL_INT:
6434 case MODE_INT:
6435 if (precision == CHAR_TYPE_SIZE)
6436 return true;
6437 if (precision == SHORT_TYPE_SIZE)
6438 return true;
6439 if (precision == INT_TYPE_SIZE)
6440 return true;
6441 if (precision == LONG_TYPE_SIZE)
6442 return true;
6443 if (precision == LONG_LONG_TYPE_SIZE)
6444 return true;
6445 return false;
6447 case MODE_FLOAT:
6448 if (precision == FLOAT_TYPE_SIZE)
6449 return true;
6450 if (precision == DOUBLE_TYPE_SIZE)
6451 return true;
6452 if (precision == LONG_DOUBLE_TYPE_SIZE)
6453 return true;
6454 return false;
6456 case MODE_DECIMAL_FLOAT:
6457 return false;
6459 default:
6460 gcc_unreachable ();
6464 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
6465 it branches into the delay slot. Otherwise, return FALSE. */
6467 static bool
6468 branch_to_delay_slot_p (rtx_insn *insn)
6470 rtx_insn *jump_insn;
6472 if (dbr_sequence_length ())
6473 return FALSE;
6475 jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6476 while (insn)
6478 insn = next_active_insn (insn);
6479 if (jump_insn == insn)
6480 return TRUE;
6482 /* We can't rely on the length of asms. So, we return FALSE when
6483 the branch is followed by an asm. */
6484 if (!insn
6485 || GET_CODE (PATTERN (insn)) == ASM_INPUT
6486 || asm_noperands (PATTERN (insn)) >= 0
6487 || get_attr_length (insn) > 0)
6488 break;
6491 return FALSE;
6494 /* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot.
6496 This occurs when INSN has an unfilled delay slot and is followed
6497 by an asm. Disaster can occur if the asm is empty and the jump
6498 branches into the delay slot. So, we add a nop in the delay slot
6499 when this occurs. */
6501 static bool
6502 branch_needs_nop_p (rtx_insn *insn)
6504 rtx_insn *jump_insn;
6506 if (dbr_sequence_length ())
6507 return FALSE;
6509 jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6510 while (insn)
6512 insn = next_active_insn (insn);
6513 if (!insn || jump_insn == insn)
6514 return TRUE;
6516 if (!(GET_CODE (PATTERN (insn)) == ASM_INPUT
6517 || asm_noperands (PATTERN (insn)) >= 0)
6518 && get_attr_length (insn) > 0)
6519 break;
6522 return FALSE;
6525 /* Return TRUE if INSN, a forward jump insn, can use nullification
6526 to skip the following instruction. This avoids an extra cycle due
6527 to a mis-predicted branch when we fall through. */
6529 static bool
6530 use_skip_p (rtx_insn *insn)
6532 rtx_insn *jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6534 while (insn)
6536 insn = next_active_insn (insn);
6538 /* We can't rely on the length of asms, so we can't skip asms. */
6539 if (!insn
6540 || GET_CODE (PATTERN (insn)) == ASM_INPUT
6541 || asm_noperands (PATTERN (insn)) >= 0)
6542 break;
6543 if (get_attr_length (insn) == 4
6544 && jump_insn == next_active_insn (insn))
6545 return TRUE;
6546 if (get_attr_length (insn) > 0)
6547 break;
6550 return FALSE;
6553 /* This routine handles all the normal conditional branch sequences we
6554 might need to generate. It handles compare immediate vs compare
6555 register, nullification of delay slots, varying length branches,
6556 negated branches, and all combinations of the above. It returns the
6557 output appropriate to emit the branch corresponding to all given
6558 parameters. */
6560 const char *
6561 pa_output_cbranch (rtx *operands, int negated, rtx_insn *insn)
6563 static char buf[100];
6564 bool useskip;
6565 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6566 int length = get_attr_length (insn);
6567 int xdelay;
6569 /* A conditional branch to the following instruction (e.g. the delay slot)
6570 is asking for a disaster. This can happen when not optimizing and
6571 when jump optimization fails.
6573 While it is usually safe to emit nothing, this can fail if the
6574 preceding instruction is a nullified branch with an empty delay
6575 slot and the same branch target as this branch. We could check
6576 for this but jump optimization should eliminate nop jumps. It
6577 is always safe to emit a nop. */
6578 if (branch_to_delay_slot_p (insn))
6579 return "nop";
6581 /* The doubleword form of the cmpib instruction doesn't have the LEU
6582 and GTU conditions while the cmpb instruction does. Since we accept
6583 zero for cmpb, we must ensure that we use cmpb for the comparison. */
6584 if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6585 operands[2] = gen_rtx_REG (DImode, 0);
6586 if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
6587 operands[1] = gen_rtx_REG (DImode, 0);
6589 /* If this is a long branch with its delay slot unfilled, set `nullify'
6590 as it can nullify the delay slot and save a nop. */
6591 if (length == 8 && dbr_sequence_length () == 0)
6592 nullify = 1;
6594 /* If this is a short forward conditional branch which did not get
6595 its delay slot filled, the delay slot can still be nullified. */
6596 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6597 nullify = forward_branch_p (insn);
6599 /* A forward branch over a single nullified insn can be done with a
6600 comclr instruction. This avoids a single cycle penalty due to
6601 mis-predicted branch if we fall through (branch not taken). */
6602 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6604 switch (length)
6606 /* All short conditional branches except backwards with an unfilled
6607 delay slot. */
6608 case 4:
6609 if (useskip)
6610 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6611 else
6612 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6613 if (GET_MODE (operands[1]) == DImode)
6614 strcat (buf, "*");
6615 if (negated)
6616 strcat (buf, "%B3");
6617 else
6618 strcat (buf, "%S3");
6619 if (useskip)
6620 strcat (buf, " %2,%r1,%%r0");
6621 else if (nullify)
6623 if (branch_needs_nop_p (insn))
6624 strcat (buf, ",n %2,%r1,%0%#");
6625 else
6626 strcat (buf, ",n %2,%r1,%0");
6628 else
6629 strcat (buf, " %2,%r1,%0");
6630 break;
6632 /* All long conditionals. Note a short backward branch with an
6633 unfilled delay slot is treated just like a long backward branch
6634 with an unfilled delay slot. */
6635 case 8:
6636 /* Handle weird backwards branch with a filled delay slot
6637 which is nullified. */
6638 if (dbr_sequence_length () != 0
6639 && ! forward_branch_p (insn)
6640 && nullify)
6642 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6643 if (GET_MODE (operands[1]) == DImode)
6644 strcat (buf, "*");
6645 if (negated)
6646 strcat (buf, "%S3");
6647 else
6648 strcat (buf, "%B3");
6649 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6651 /* Handle short backwards branch with an unfilled delay slot.
6652 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6653 taken and untaken branches. */
6654 else if (dbr_sequence_length () == 0
6655 && ! forward_branch_p (insn)
6656 && INSN_ADDRESSES_SET_P ()
6657 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6658 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6660 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6661 if (GET_MODE (operands[1]) == DImode)
6662 strcat (buf, "*");
6663 if (negated)
6664 strcat (buf, "%B3 %2,%r1,%0%#");
6665 else
6666 strcat (buf, "%S3 %2,%r1,%0%#");
6668 else
6670 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6671 if (GET_MODE (operands[1]) == DImode)
6672 strcat (buf, "*");
6673 if (negated)
6674 strcat (buf, "%S3");
6675 else
6676 strcat (buf, "%B3");
6677 if (nullify)
6678 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6679 else
6680 strcat (buf, " %2,%r1,%%r0\n\tb %0");
6682 break;
6684 default:
6685 /* The reversed conditional branch must branch over one additional
6686 instruction if the delay slot is filled and needs to be extracted
6687 by pa_output_lbranch. If the delay slot is empty or this is a
6688 nullified forward branch, the instruction after the reversed
6689 condition branch must be nullified. */
6690 if (dbr_sequence_length () == 0
6691 || (nullify && forward_branch_p (insn)))
6693 nullify = 1;
6694 xdelay = 0;
6695 operands[4] = GEN_INT (length);
6697 else
6699 xdelay = 1;
6700 operands[4] = GEN_INT (length + 4);
6703 /* Create a reversed conditional branch which branches around
6704 the following insns. */
6705 if (GET_MODE (operands[1]) != DImode)
6707 if (nullify)
6709 if (negated)
6710 strcpy (buf,
6711 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6712 else
6713 strcpy (buf,
6714 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6716 else
6718 if (negated)
6719 strcpy (buf,
6720 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6721 else
6722 strcpy (buf,
6723 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6726 else
6728 if (nullify)
6730 if (negated)
6731 strcpy (buf,
6732 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6733 else
6734 strcpy (buf,
6735 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6737 else
6739 if (negated)
6740 strcpy (buf,
6741 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6742 else
6743 strcpy (buf,
6744 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6748 output_asm_insn (buf, operands);
6749 return pa_output_lbranch (operands[0], insn, xdelay);
6751 return buf;
6754 /* Output a PIC pc-relative instruction sequence to load the address of
6755 OPERANDS[0] to register OPERANDS[2]. OPERANDS[0] is a symbol ref
6756 or a code label. OPERANDS[1] specifies the register to use to load
6757 the program counter. OPERANDS[3] may be used for label generation
6758 The sequence is always three instructions in length. The program
6759 counter recorded for PA 1.X is eight bytes more than that for PA 2.0.
6760 Register %r1 is clobbered. */
6762 static void
6763 pa_output_pic_pcrel_sequence (rtx *operands)
6765 gcc_assert (SYMBOL_REF_P (operands[0]) || LABEL_P (operands[0]));
6766 if (TARGET_PA_20)
6768 /* We can use mfia to determine the current program counter. */
6769 if (TARGET_SOM || !TARGET_GAS)
6771 operands[3] = gen_label_rtx ();
6772 targetm.asm_out.internal_label (asm_out_file, "L",
6773 CODE_LABEL_NUMBER (operands[3]));
6774 output_asm_insn ("mfia %1", operands);
6775 output_asm_insn ("addil L'%0-%l3,%1", operands);
6776 output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands);
6778 else
6780 output_asm_insn ("mfia %1", operands);
6781 output_asm_insn ("addil L'%0-$PIC_pcrel$0+12,%1", operands);
6782 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+16(%%r1),%2", operands);
6785 else
6787 /* We need to use a branch to determine the current program counter. */
6788 output_asm_insn ("{bl|b,l} .+8,%1", operands);
6789 if (TARGET_SOM || !TARGET_GAS)
6791 operands[3] = gen_label_rtx ();
6792 output_asm_insn ("addil L'%0-%l3,%1", operands);
6793 targetm.asm_out.internal_label (asm_out_file, "L",
6794 CODE_LABEL_NUMBER (operands[3]));
6795 output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands);
6797 else
6799 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%1", operands);
6800 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%2", operands);
6805 /* This routine handles output of long unconditional branches that
6806 exceed the maximum range of a simple branch instruction. Since
6807 we don't have a register available for the branch, we save register
6808 %r1 in the frame marker, load the branch destination DEST into %r1,
6809 execute the branch, and restore %r1 in the delay slot of the branch.
6811 Since long branches may have an insn in the delay slot and the
6812 delay slot is used to restore %r1, we in general need to extract
6813 this insn and execute it before the branch. However, to facilitate
6814 use of this function by conditional branches, we also provide an
6815 option to not extract the delay insn so that it will be emitted
6816 after the long branch. So, if there is an insn in the delay slot,
6817 it is extracted if XDELAY is nonzero.
6819 The lengths of the various long-branch sequences are 20, 16 and 24
6820 bytes for the portable runtime, non-PIC and PIC cases, respectively. */
6822 const char *
6823 pa_output_lbranch (rtx dest, rtx_insn *insn, int xdelay)
6825 rtx xoperands[4];
6827 xoperands[0] = dest;
6829 /* First, free up the delay slot. */
6830 if (xdelay && dbr_sequence_length () != 0)
6832 /* We can't handle a jump in the delay slot. */
6833 gcc_assert (! JUMP_P (NEXT_INSN (insn)));
6835 final_scan_insn (NEXT_INSN (insn), asm_out_file,
6836 optimize, 0, NULL);
6838 /* Now delete the delay insn. */
6839 SET_INSN_DELETED (NEXT_INSN (insn));
6842 /* Output an insn to save %r1. The runtime documentation doesn't
6843 specify whether the "Clean Up" slot in the callers frame can
6844 be clobbered by the callee. It isn't copied by HP's builtin
6845 alloca, so this suggests that it can be clobbered if necessary.
6846 The "Static Link" location is copied by HP builtin alloca, so
6847 we avoid using it. Using the cleanup slot might be a problem
6848 if we have to interoperate with languages that pass cleanup
6849 information. However, it should be possible to handle these
6850 situations with GCC's asm feature.
6852 The "Current RP" slot is reserved for the called procedure, so
6853 we try to use it when we don't have a frame of our own. It's
6854 rather unlikely that we won't have a frame when we need to emit
6855 a very long branch.
6857 Really the way to go long term is a register scavenger; goto
6858 the target of the jump and find a register which we can use
6859 as a scratch to hold the value in %r1. Then, we wouldn't have
6860 to free up the delay slot or clobber a slot that may be needed
6861 for other purposes. */
6862 if (TARGET_64BIT)
6864 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6865 /* Use the return pointer slot in the frame marker. */
6866 output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
6867 else
6868 /* Use the slot at -40 in the frame marker since HP builtin
6869 alloca doesn't copy it. */
6870 output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
6872 else
6874 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6875 /* Use the return pointer slot in the frame marker. */
6876 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
6877 else
6878 /* Use the "Clean Up" slot in the frame marker. In GCC,
6879 the only other use of this location is for copying a
6880 floating point double argument from a floating-point
6881 register to two general registers. The copy is done
6882 as an "atomic" operation when outputting a call, so it
6883 won't interfere with our using the location here. */
6884 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
6887 if (TARGET_PORTABLE_RUNTIME)
6889 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6890 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6891 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6893 else if (flag_pic)
6895 xoperands[1] = gen_rtx_REG (Pmode, 1);
6896 xoperands[2] = xoperands[1];
6897 pa_output_pic_pcrel_sequence (xoperands);
6898 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6900 else
6901 /* Now output a very long branch to the original target. */
6902 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
6904 /* Now restore the value of %r1 in the delay slot. */
6905 if (TARGET_64BIT)
6907 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6908 return "ldd -16(%%r30),%%r1";
6909 else
6910 return "ldd -40(%%r30),%%r1";
6912 else
6914 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6915 return "ldw -20(%%r30),%%r1";
6916 else
6917 return "ldw -12(%%r30),%%r1";
6921 /* This routine handles all the branch-on-bit conditional branch sequences we
6922 might need to generate. It handles nullification of delay slots,
6923 varying length branches, negated branches and all combinations of the
6924 above. it returns the appropriate output template to emit the branch. */
6926 const char *
6927 pa_output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn, int which)
6929 static char buf[100];
6930 bool useskip;
6931 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6932 int length = get_attr_length (insn);
6933 int xdelay;
6935 /* A conditional branch to the following instruction (e.g. the delay slot) is
6936 asking for a disaster. I do not think this can happen as this pattern
6937 is only used when optimizing; jump optimization should eliminate the
6938 jump. But be prepared just in case. */
6940 if (branch_to_delay_slot_p (insn))
6941 return "nop";
6943 /* If this is a long branch with its delay slot unfilled, set `nullify'
6944 as it can nullify the delay slot and save a nop. */
6945 if (length == 8 && dbr_sequence_length () == 0)
6946 nullify = 1;
6948 /* If this is a short forward conditional branch which did not get
6949 its delay slot filled, the delay slot can still be nullified. */
6950 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6951 nullify = forward_branch_p (insn);
6953 /* A forward branch over a single nullified insn can be done with a
6954 extrs instruction. This avoids a single cycle penalty due to
6955 mis-predicted branch if we fall through (branch not taken). */
6956 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6958 switch (length)
6961 /* All short conditional branches except backwards with an unfilled
6962 delay slot. */
6963 case 4:
6964 if (useskip)
6965 strcpy (buf, "{extrs,|extrw,s,}");
6966 else
6967 strcpy (buf, "bb,");
6968 if (useskip && GET_MODE (operands[0]) == DImode)
6969 strcpy (buf, "extrd,s,*");
6970 else if (GET_MODE (operands[0]) == DImode)
6971 strcpy (buf, "bb,*");
6972 if ((which == 0 && negated)
6973 || (which == 1 && ! negated))
6974 strcat (buf, ">=");
6975 else
6976 strcat (buf, "<");
6977 if (useskip)
6978 strcat (buf, " %0,%1,1,%%r0");
6979 else if (nullify && negated)
6981 if (branch_needs_nop_p (insn))
6982 strcat (buf, ",n %0,%1,%3%#");
6983 else
6984 strcat (buf, ",n %0,%1,%3");
6986 else if (nullify && ! negated)
6988 if (branch_needs_nop_p (insn))
6989 strcat (buf, ",n %0,%1,%2%#");
6990 else
6991 strcat (buf, ",n %0,%1,%2");
6993 else if (! nullify && negated)
6994 strcat (buf, " %0,%1,%3");
6995 else if (! nullify && ! negated)
6996 strcat (buf, " %0,%1,%2");
6997 break;
6999 /* All long conditionals. Note a short backward branch with an
7000 unfilled delay slot is treated just like a long backward branch
7001 with an unfilled delay slot. */
7002 case 8:
7003 /* Handle weird backwards branch with a filled delay slot
7004 which is nullified. */
7005 if (dbr_sequence_length () != 0
7006 && ! forward_branch_p (insn)
7007 && nullify)
7009 strcpy (buf, "bb,");
7010 if (GET_MODE (operands[0]) == DImode)
7011 strcat (buf, "*");
7012 if ((which == 0 && negated)
7013 || (which == 1 && ! negated))
7014 strcat (buf, "<");
7015 else
7016 strcat (buf, ">=");
7017 if (negated)
7018 strcat (buf, ",n %0,%1,.+12\n\tb %3");
7019 else
7020 strcat (buf, ",n %0,%1,.+12\n\tb %2");
7022 /* Handle short backwards branch with an unfilled delay slot.
7023 Using a bb;nop rather than extrs;bl saves 1 cycle for both
7024 taken and untaken branches. */
7025 else if (dbr_sequence_length () == 0
7026 && ! forward_branch_p (insn)
7027 && INSN_ADDRESSES_SET_P ()
7028 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7029 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7031 strcpy (buf, "bb,");
7032 if (GET_MODE (operands[0]) == DImode)
7033 strcat (buf, "*");
7034 if ((which == 0 && negated)
7035 || (which == 1 && ! negated))
7036 strcat (buf, ">=");
7037 else
7038 strcat (buf, "<");
7039 if (negated)
7040 strcat (buf, " %0,%1,%3%#");
7041 else
7042 strcat (buf, " %0,%1,%2%#");
7044 else
7046 if (GET_MODE (operands[0]) == DImode)
7047 strcpy (buf, "extrd,s,*");
7048 else
7049 strcpy (buf, "{extrs,|extrw,s,}");
7050 if ((which == 0 && negated)
7051 || (which == 1 && ! negated))
7052 strcat (buf, "<");
7053 else
7054 strcat (buf, ">=");
7055 if (nullify && negated)
7056 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
7057 else if (nullify && ! negated)
7058 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
7059 else if (negated)
7060 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
7061 else
7062 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
7064 break;
7066 default:
7067 /* The reversed conditional branch must branch over one additional
7068 instruction if the delay slot is filled and needs to be extracted
7069 by pa_output_lbranch. If the delay slot is empty or this is a
7070 nullified forward branch, the instruction after the reversed
7071 condition branch must be nullified. */
7072 if (dbr_sequence_length () == 0
7073 || (nullify && forward_branch_p (insn)))
7075 nullify = 1;
7076 xdelay = 0;
7077 operands[4] = GEN_INT (length);
7079 else
7081 xdelay = 1;
7082 operands[4] = GEN_INT (length + 4);
7085 if (GET_MODE (operands[0]) == DImode)
7086 strcpy (buf, "bb,*");
7087 else
7088 strcpy (buf, "bb,");
7089 if ((which == 0 && negated)
7090 || (which == 1 && !negated))
7091 strcat (buf, "<");
7092 else
7093 strcat (buf, ">=");
7094 if (nullify)
7095 strcat (buf, ",n %0,%1,.+%4");
7096 else
7097 strcat (buf, " %0,%1,.+%4");
7098 output_asm_insn (buf, operands);
7099 return pa_output_lbranch (negated ? operands[3] : operands[2],
7100 insn, xdelay);
7102 return buf;
7105 /* This routine handles all the branch-on-variable-bit conditional branch
7106 sequences we might need to generate. It handles nullification of delay
7107 slots, varying length branches, negated branches and all combinations
7108 of the above. it returns the appropriate output template to emit the
7109 branch. */
7111 const char *
7112 pa_output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn,
7113 int which)
7115 static char buf[100];
7116 bool useskip;
7117 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7118 int length = get_attr_length (insn);
7119 int xdelay;
7121 /* A conditional branch to the following instruction (e.g. the delay slot) is
7122 asking for a disaster. I do not think this can happen as this pattern
7123 is only used when optimizing; jump optimization should eliminate the
7124 jump. But be prepared just in case. */
7126 if (branch_to_delay_slot_p (insn))
7127 return "nop";
7129 /* If this is a long branch with its delay slot unfilled, set `nullify'
7130 as it can nullify the delay slot and save a nop. */
7131 if (length == 8 && dbr_sequence_length () == 0)
7132 nullify = 1;
7134 /* If this is a short forward conditional branch which did not get
7135 its delay slot filled, the delay slot can still be nullified. */
7136 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7137 nullify = forward_branch_p (insn);
7139 /* A forward branch over a single nullified insn can be done with a
7140 extrs instruction. This avoids a single cycle penalty due to
7141 mis-predicted branch if we fall through (branch not taken). */
7142 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
7144 switch (length)
7147 /* All short conditional branches except backwards with an unfilled
7148 delay slot. */
7149 case 4:
7150 if (useskip)
7151 strcpy (buf, "{vextrs,|extrw,s,}");
7152 else
7153 strcpy (buf, "{bvb,|bb,}");
7154 if (useskip && GET_MODE (operands[0]) == DImode)
7155 strcpy (buf, "extrd,s,*");
7156 else if (GET_MODE (operands[0]) == DImode)
7157 strcpy (buf, "bb,*");
7158 if ((which == 0 && negated)
7159 || (which == 1 && ! negated))
7160 strcat (buf, ">=");
7161 else
7162 strcat (buf, "<");
7163 if (useskip)
7164 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
7165 else if (nullify && negated)
7167 if (branch_needs_nop_p (insn))
7168 strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
7169 else
7170 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
7172 else if (nullify && ! negated)
7174 if (branch_needs_nop_p (insn))
7175 strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
7176 else
7177 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
7179 else if (! nullify && negated)
7180 strcat (buf, "{ %0,%3| %0,%%sar,%3}");
7181 else if (! nullify && ! negated)
7182 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
7183 break;
7185 /* All long conditionals. Note a short backward branch with an
7186 unfilled delay slot is treated just like a long backward branch
7187 with an unfilled delay slot. */
7188 case 8:
7189 /* Handle weird backwards branch with a filled delay slot
7190 which is nullified. */
7191 if (dbr_sequence_length () != 0
7192 && ! forward_branch_p (insn)
7193 && nullify)
7195 strcpy (buf, "{bvb,|bb,}");
7196 if (GET_MODE (operands[0]) == DImode)
7197 strcat (buf, "*");
7198 if ((which == 0 && negated)
7199 || (which == 1 && ! negated))
7200 strcat (buf, "<");
7201 else
7202 strcat (buf, ">=");
7203 if (negated)
7204 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
7205 else
7206 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
7208 /* Handle short backwards branch with an unfilled delay slot.
7209 Using a bb;nop rather than extrs;bl saves 1 cycle for both
7210 taken and untaken branches. */
7211 else if (dbr_sequence_length () == 0
7212 && ! forward_branch_p (insn)
7213 && INSN_ADDRESSES_SET_P ()
7214 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7215 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7217 strcpy (buf, "{bvb,|bb,}");
7218 if (GET_MODE (operands[0]) == DImode)
7219 strcat (buf, "*");
7220 if ((which == 0 && negated)
7221 || (which == 1 && ! negated))
7222 strcat (buf, ">=");
7223 else
7224 strcat (buf, "<");
7225 if (negated)
7226 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
7227 else
7228 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
7230 else
7232 strcpy (buf, "{vextrs,|extrw,s,}");
7233 if (GET_MODE (operands[0]) == DImode)
7234 strcpy (buf, "extrd,s,*");
7235 if ((which == 0 && negated)
7236 || (which == 1 && ! negated))
7237 strcat (buf, "<");
7238 else
7239 strcat (buf, ">=");
7240 if (nullify && negated)
7241 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
7242 else if (nullify && ! negated)
7243 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
7244 else if (negated)
7245 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
7246 else
7247 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
7249 break;
7251 default:
7252 /* The reversed conditional branch must branch over one additional
7253 instruction if the delay slot is filled and needs to be extracted
7254 by pa_output_lbranch. If the delay slot is empty or this is a
7255 nullified forward branch, the instruction after the reversed
7256 condition branch must be nullified. */
7257 if (dbr_sequence_length () == 0
7258 || (nullify && forward_branch_p (insn)))
7260 nullify = 1;
7261 xdelay = 0;
7262 operands[4] = GEN_INT (length);
7264 else
7266 xdelay = 1;
7267 operands[4] = GEN_INT (length + 4);
7270 if (GET_MODE (operands[0]) == DImode)
7271 strcpy (buf, "bb,*");
7272 else
7273 strcpy (buf, "{bvb,|bb,}");
7274 if ((which == 0 && negated)
7275 || (which == 1 && !negated))
7276 strcat (buf, "<");
7277 else
7278 strcat (buf, ">=");
7279 if (nullify)
7280 strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
7281 else
7282 strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
7283 output_asm_insn (buf, operands);
7284 return pa_output_lbranch (negated ? operands[3] : operands[2],
7285 insn, xdelay);
7287 return buf;
7290 /* Return the output template for emitting a dbra type insn.
7292 Note it may perform some output operations on its own before
7293 returning the final output string. */
7294 const char *
7295 pa_output_dbra (rtx *operands, rtx_insn *insn, int which_alternative)
7297 int length = get_attr_length (insn);
7299 /* A conditional branch to the following instruction (e.g. the delay slot) is
7300 asking for a disaster. Be prepared! */
7302 if (branch_to_delay_slot_p (insn))
7304 if (which_alternative == 0)
7305 return "ldo %1(%0),%0";
7306 else if (which_alternative == 1)
7308 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
7309 output_asm_insn ("ldw -16(%%r30),%4", operands);
7310 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7311 return "{fldws|fldw} -16(%%r30),%0";
7313 else
7315 output_asm_insn ("ldw %0,%4", operands);
7316 return "ldo %1(%4),%4\n\tstw %4,%0";
7320 if (which_alternative == 0)
7322 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7323 int xdelay;
7325 /* If this is a long branch with its delay slot unfilled, set `nullify'
7326 as it can nullify the delay slot and save a nop. */
7327 if (length == 8 && dbr_sequence_length () == 0)
7328 nullify = 1;
7330 /* If this is a short forward conditional branch which did not get
7331 its delay slot filled, the delay slot can still be nullified. */
7332 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7333 nullify = forward_branch_p (insn);
7335 switch (length)
7337 case 4:
7338 if (nullify)
7340 if (branch_needs_nop_p (insn))
7341 return "addib,%C2,n %1,%0,%3%#";
7342 else
7343 return "addib,%C2,n %1,%0,%3";
7345 else
7346 return "addib,%C2 %1,%0,%3";
7348 case 8:
7349 /* Handle weird backwards branch with a fulled delay slot
7350 which is nullified. */
7351 if (dbr_sequence_length () != 0
7352 && ! forward_branch_p (insn)
7353 && nullify)
7354 return "addib,%N2,n %1,%0,.+12\n\tb %3";
7355 /* Handle short backwards branch with an unfilled delay slot.
7356 Using a addb;nop rather than addi;bl saves 1 cycle for both
7357 taken and untaken branches. */
7358 else if (dbr_sequence_length () == 0
7359 && ! forward_branch_p (insn)
7360 && INSN_ADDRESSES_SET_P ()
7361 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7362 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7363 return "addib,%C2 %1,%0,%3%#";
7365 /* Handle normal cases. */
7366 if (nullify)
7367 return "addi,%N2 %1,%0,%0\n\tb,n %3";
7368 else
7369 return "addi,%N2 %1,%0,%0\n\tb %3";
7371 default:
7372 /* The reversed conditional branch must branch over one additional
7373 instruction if the delay slot is filled and needs to be extracted
7374 by pa_output_lbranch. If the delay slot is empty or this is a
7375 nullified forward branch, the instruction after the reversed
7376 condition branch must be nullified. */
7377 if (dbr_sequence_length () == 0
7378 || (nullify && forward_branch_p (insn)))
7380 nullify = 1;
7381 xdelay = 0;
7382 operands[4] = GEN_INT (length);
7384 else
7386 xdelay = 1;
7387 operands[4] = GEN_INT (length + 4);
7390 if (nullify)
7391 output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
7392 else
7393 output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
7395 return pa_output_lbranch (operands[3], insn, xdelay);
7399 /* Deal with gross reload from FP register case. */
7400 else if (which_alternative == 1)
7402 /* Move loop counter from FP register to MEM then into a GR,
7403 increment the GR, store the GR into MEM, and finally reload
7404 the FP register from MEM from within the branch's delay slot. */
7405 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
7406 operands);
7407 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7408 if (length == 24)
7409 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
7410 else if (length == 28)
7411 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7412 else
7414 operands[5] = GEN_INT (length - 16);
7415 output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
7416 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7417 return pa_output_lbranch (operands[3], insn, 0);
7420 /* Deal with gross reload from memory case. */
7421 else
7423 /* Reload loop counter from memory, the store back to memory
7424 happens in the branch's delay slot. */
7425 output_asm_insn ("ldw %0,%4", operands);
7426 if (length == 12)
7427 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
7428 else if (length == 16)
7429 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
7430 else
7432 operands[5] = GEN_INT (length - 4);
7433 output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
7434 return pa_output_lbranch (operands[3], insn, 0);
7439 /* Return the output template for emitting a movb type insn.
7441 Note it may perform some output operations on its own before
7442 returning the final output string. */
7443 const char *
7444 pa_output_movb (rtx *operands, rtx_insn *insn, int which_alternative,
7445 int reverse_comparison)
7447 int length = get_attr_length (insn);
7449 /* A conditional branch to the following instruction (e.g. the delay slot) is
7450 asking for a disaster. Be prepared! */
7452 if (branch_to_delay_slot_p (insn))
7454 if (which_alternative == 0)
7455 return "copy %1,%0";
7456 else if (which_alternative == 1)
7458 output_asm_insn ("stw %1,-16(%%r30)", operands);
7459 return "{fldws|fldw} -16(%%r30),%0";
7461 else if (which_alternative == 2)
7462 return "stw %1,%0";
7463 else
7464 return "mtsar %r1";
7467 /* Support the second variant. */
7468 if (reverse_comparison)
7469 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
7471 if (which_alternative == 0)
7473 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7474 int xdelay;
7476 /* If this is a long branch with its delay slot unfilled, set `nullify'
7477 as it can nullify the delay slot and save a nop. */
7478 if (length == 8 && dbr_sequence_length () == 0)
7479 nullify = 1;
7481 /* If this is a short forward conditional branch which did not get
7482 its delay slot filled, the delay slot can still be nullified. */
7483 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7484 nullify = forward_branch_p (insn);
7486 switch (length)
7488 case 4:
7489 if (nullify)
7491 if (branch_needs_nop_p (insn))
7492 return "movb,%C2,n %1,%0,%3%#";
7493 else
7494 return "movb,%C2,n %1,%0,%3";
7496 else
7497 return "movb,%C2 %1,%0,%3";
7499 case 8:
7500 /* Handle weird backwards branch with a filled delay slot
7501 which is nullified. */
7502 if (dbr_sequence_length () != 0
7503 && ! forward_branch_p (insn)
7504 && nullify)
7505 return "movb,%N2,n %1,%0,.+12\n\tb %3";
7507 /* Handle short backwards branch with an unfilled delay slot.
7508 Using a movb;nop rather than or;bl saves 1 cycle for both
7509 taken and untaken branches. */
7510 else if (dbr_sequence_length () == 0
7511 && ! forward_branch_p (insn)
7512 && INSN_ADDRESSES_SET_P ()
7513 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7514 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7515 return "movb,%C2 %1,%0,%3%#";
7516 /* Handle normal cases. */
7517 if (nullify)
7518 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
7519 else
7520 return "or,%N2 %1,%%r0,%0\n\tb %3";
7522 default:
7523 /* The reversed conditional branch must branch over one additional
7524 instruction if the delay slot is filled and needs to be extracted
7525 by pa_output_lbranch. If the delay slot is empty or this is a
7526 nullified forward branch, the instruction after the reversed
7527 condition branch must be nullified. */
7528 if (dbr_sequence_length () == 0
7529 || (nullify && forward_branch_p (insn)))
7531 nullify = 1;
7532 xdelay = 0;
7533 operands[4] = GEN_INT (length);
7535 else
7537 xdelay = 1;
7538 operands[4] = GEN_INT (length + 4);
7541 if (nullify)
7542 output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
7543 else
7544 output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
7546 return pa_output_lbranch (operands[3], insn, xdelay);
7549 /* Deal with gross reload for FP destination register case. */
7550 else if (which_alternative == 1)
7552 /* Move source register to MEM, perform the branch test, then
7553 finally load the FP register from MEM from within the branch's
7554 delay slot. */
7555 output_asm_insn ("stw %1,-16(%%r30)", operands);
7556 if (length == 12)
7557 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7558 else if (length == 16)
7559 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7560 else
7562 operands[4] = GEN_INT (length - 4);
7563 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
7564 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7565 return pa_output_lbranch (operands[3], insn, 0);
7568 /* Deal with gross reload from memory case. */
7569 else if (which_alternative == 2)
7571 /* Reload loop counter from memory, the store back to memory
7572 happens in the branch's delay slot. */
7573 if (length == 8)
7574 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7575 else if (length == 12)
7576 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7577 else
7579 operands[4] = GEN_INT (length);
7580 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7581 operands);
7582 return pa_output_lbranch (operands[3], insn, 0);
7585 /* Handle SAR as a destination. */
7586 else
7588 if (length == 8)
7589 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7590 else if (length == 12)
7591 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7592 else
7594 operands[4] = GEN_INT (length);
7595 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7596 operands);
7597 return pa_output_lbranch (operands[3], insn, 0);
7602 /* Copy any FP arguments in INSN into integer registers. */
7603 static void
7604 copy_fp_args (rtx_insn *insn)
7606 rtx link;
7607 rtx xoperands[2];
7609 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7611 int arg_mode, regno;
7612 rtx use = XEXP (link, 0);
7614 if (! (GET_CODE (use) == USE
7615 && GET_CODE (XEXP (use, 0)) == REG
7616 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7617 continue;
7619 arg_mode = GET_MODE (XEXP (use, 0));
7620 regno = REGNO (XEXP (use, 0));
7622 /* Is it a floating point register? */
7623 if (regno >= 32 && regno <= 39)
7625 /* Copy the FP register into an integer register via memory. */
7626 if (arg_mode == SFmode)
7628 xoperands[0] = XEXP (use, 0);
7629 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
7630 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
7631 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7633 else
7635 xoperands[0] = XEXP (use, 0);
7636 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
7637 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
7638 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
7639 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7645 /* Compute length of the FP argument copy sequence for INSN. */
7646 static int
7647 length_fp_args (rtx_insn *insn)
7649 int length = 0;
7650 rtx link;
7652 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7654 int arg_mode, regno;
7655 rtx use = XEXP (link, 0);
7657 if (! (GET_CODE (use) == USE
7658 && GET_CODE (XEXP (use, 0)) == REG
7659 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7660 continue;
7662 arg_mode = GET_MODE (XEXP (use, 0));
7663 regno = REGNO (XEXP (use, 0));
7665 /* Is it a floating point register? */
7666 if (regno >= 32 && regno <= 39)
7668 if (arg_mode == SFmode)
7669 length += 8;
7670 else
7671 length += 12;
7675 return length;
7678 /* Return the attribute length for the millicode call instruction INSN.
7679 The length must match the code generated by pa_output_millicode_call.
7680 We include the delay slot in the returned length as it is better to
7681 over estimate the length than to under estimate it. */
7684 pa_attr_length_millicode_call (rtx_insn *insn)
7686 unsigned long distance = -1;
7687 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7689 if (INSN_ADDRESSES_SET_P ())
7691 distance = (total + insn_current_reference_address (insn));
7692 if (distance < total)
7693 distance = -1;
7696 if (TARGET_64BIT)
7698 if (!TARGET_LONG_CALLS && distance < 7600000)
7699 return 8;
7701 return 20;
7703 else if (TARGET_PORTABLE_RUNTIME)
7704 return 24;
7705 else
7707 if (!TARGET_LONG_CALLS && distance < MAX_PCREL17F_OFFSET)
7708 return 8;
7710 if (!flag_pic)
7711 return 12;
7713 return 24;
7717 /* INSN is a function call.
7719 CALL_DEST is the routine we are calling. */
7721 const char *
7722 pa_output_millicode_call (rtx_insn *insn, rtx call_dest)
7724 int attr_length = get_attr_length (insn);
7725 int seq_length = dbr_sequence_length ();
7726 rtx xoperands[4];
7728 xoperands[0] = call_dest;
7730 /* Handle the common case where we are sure that the branch will
7731 reach the beginning of the $CODE$ subspace. The within reach
7732 form of the $$sh_func_adrs call has a length of 28. Because it
7733 has an attribute type of sh_func_adrs, it never has a nonzero
7734 sequence length (i.e., the delay slot is never filled). */
7735 if (!TARGET_LONG_CALLS
7736 && (attr_length == 8
7737 || (attr_length == 28
7738 && get_attr_type (insn) == TYPE_SH_FUNC_ADRS)))
7740 xoperands[1] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
7741 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7743 else
7745 if (TARGET_64BIT)
7747 /* It might seem that one insn could be saved by accessing
7748 the millicode function using the linkage table. However,
7749 this doesn't work in shared libraries and other dynamically
7750 loaded objects. Using a pc-relative sequence also avoids
7751 problems related to the implicit use of the gp register. */
7752 xoperands[1] = gen_rtx_REG (Pmode, 1);
7753 xoperands[2] = xoperands[1];
7754 pa_output_pic_pcrel_sequence (xoperands);
7755 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7757 else if (TARGET_PORTABLE_RUNTIME)
7759 /* Pure portable runtime doesn't allow be/ble; we also don't
7760 have PIC support in the assembler/linker, so this sequence
7761 is needed. */
7763 /* Get the address of our target into %r1. */
7764 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7765 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7767 /* Get our return address into %r31. */
7768 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7769 output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
7771 /* Jump to our target address in %r1. */
7772 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7774 else if (!flag_pic)
7776 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7777 if (TARGET_PA_20)
7778 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
7779 else
7780 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7782 else
7784 xoperands[1] = gen_rtx_REG (Pmode, 31);
7785 xoperands[2] = gen_rtx_REG (Pmode, 1);
7786 pa_output_pic_pcrel_sequence (xoperands);
7788 /* Adjust return address. */
7789 output_asm_insn ("ldo {16|24}(%%r31),%%r31", xoperands);
7791 /* Jump to our target address in %r1. */
7792 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7796 if (seq_length == 0)
7797 output_asm_insn ("nop", xoperands);
7799 return "";
7802 /* Return the attribute length of the call instruction INSN. The SIBCALL
7803 flag indicates whether INSN is a regular call or a sibling call. The
7804 length returned must be longer than the code actually generated by
7805 pa_output_call. Since branch shortening is done before delay branch
7806 sequencing, there is no way to determine whether or not the delay
7807 slot will be filled during branch shortening. Even when the delay
7808 slot is filled, we may have to add a nop if the delay slot contains
7809 a branch that can't reach its target. Thus, we always have to include
7810 the delay slot in the length estimate. This used to be done in
7811 pa_adjust_insn_length but we do it here now as some sequences always
7812 fill the delay slot and we can save four bytes in the estimate for
7813 these sequences. */
7816 pa_attr_length_call (rtx_insn *insn, int sibcall)
7818 int local_call;
7819 rtx call, call_dest;
7820 tree call_decl;
7821 int length = 0;
7822 rtx pat = PATTERN (insn);
7823 unsigned long distance = -1;
7825 gcc_assert (CALL_P (insn));
7827 if (INSN_ADDRESSES_SET_P ())
7829 unsigned long total;
7831 total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7832 distance = (total + insn_current_reference_address (insn));
7833 if (distance < total)
7834 distance = -1;
7837 gcc_assert (GET_CODE (pat) == PARALLEL);
7839 /* Get the call rtx. */
7840 call = XVECEXP (pat, 0, 0);
7841 if (GET_CODE (call) == SET)
7842 call = SET_SRC (call);
7844 gcc_assert (GET_CODE (call) == CALL);
7846 /* Determine if this is a local call. */
7847 call_dest = XEXP (XEXP (call, 0), 0);
7848 call_decl = SYMBOL_REF_DECL (call_dest);
7849 local_call = call_decl && targetm.binds_local_p (call_decl);
7851 /* pc-relative branch. */
7852 if (!TARGET_LONG_CALLS
7853 && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7854 || distance < MAX_PCREL17F_OFFSET))
7855 length += 8;
7857 /* 64-bit plabel sequence. */
7858 else if (TARGET_64BIT && !local_call)
7859 length += sibcall ? 28 : 24;
7861 /* non-pic long absolute branch sequence. */
7862 else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7863 length += 12;
7865 /* long pc-relative branch sequence. */
7866 else if (TARGET_LONG_PIC_SDIFF_CALL
7867 || (TARGET_GAS && !TARGET_SOM && local_call))
7869 length += 20;
7871 if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7872 length += 8;
7875 /* 32-bit plabel sequence. */
7876 else
7878 length += 32;
7880 if (TARGET_SOM)
7881 length += length_fp_args (insn);
7883 if (flag_pic)
7884 length += 4;
7886 if (!TARGET_PA_20)
7888 if (!sibcall)
7889 length += 8;
7891 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7892 length += 8;
7896 return length;
7899 /* INSN is a function call.
7901 CALL_DEST is the routine we are calling. */
7903 const char *
7904 pa_output_call (rtx_insn *insn, rtx call_dest, int sibcall)
7906 int seq_length = dbr_sequence_length ();
7907 tree call_decl = SYMBOL_REF_DECL (call_dest);
7908 int local_call = call_decl && targetm.binds_local_p (call_decl);
7909 rtx xoperands[4];
7911 xoperands[0] = call_dest;
7913 /* Handle the common case where we're sure that the branch will reach
7914 the beginning of the "$CODE$" subspace. This is the beginning of
7915 the current function if we are in a named section. */
7916 if (!TARGET_LONG_CALLS && pa_attr_length_call (insn, sibcall) == 8)
7918 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
7919 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7921 else
7923 if (TARGET_64BIT && !local_call)
7925 /* ??? As far as I can tell, the HP linker doesn't support the
7926 long pc-relative sequence described in the 64-bit runtime
7927 architecture. So, we use a slightly longer indirect call. */
7928 xoperands[0] = pa_get_deferred_plabel (call_dest);
7929 xoperands[1] = gen_label_rtx ();
7931 /* If this isn't a sibcall, we put the load of %r27 into the
7932 delay slot. We can't do this in a sibcall as we don't
7933 have a second call-clobbered scratch register available.
7934 We don't need to do anything when generating fast indirect
7935 calls. */
7936 if (seq_length != 0 && !sibcall)
7938 final_scan_insn (NEXT_INSN (insn), asm_out_file,
7939 optimize, 0, NULL);
7941 /* Now delete the delay insn. */
7942 SET_INSN_DELETED (NEXT_INSN (insn));
7943 seq_length = 0;
7946 output_asm_insn ("addil LT'%0,%%r27", xoperands);
7947 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
7948 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
7950 if (sibcall)
7952 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7953 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands);
7954 output_asm_insn ("bve (%%r1)", xoperands);
7956 else
7958 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
7959 output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
7960 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7961 seq_length = 1;
7964 else
7966 int indirect_call = 0;
7968 /* Emit a long call. There are several different sequences
7969 of increasing length and complexity. In most cases,
7970 they don't allow an instruction in the delay slot. */
7971 if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7972 && !TARGET_LONG_PIC_SDIFF_CALL
7973 && !(TARGET_GAS && !TARGET_SOM && local_call)
7974 && !TARGET_64BIT)
7975 indirect_call = 1;
7977 if (seq_length != 0
7978 && !sibcall
7979 && (!TARGET_PA_20
7980 || indirect_call
7981 || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)))
7983 /* A non-jump insn in the delay slot. By definition we can
7984 emit this insn before the call (and in fact before argument
7985 relocating. */
7986 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
7987 NULL);
7989 /* Now delete the delay insn. */
7990 SET_INSN_DELETED (NEXT_INSN (insn));
7991 seq_length = 0;
7994 if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7996 /* This is the best sequence for making long calls in
7997 non-pic code. Unfortunately, GNU ld doesn't provide
7998 the stub needed for external calls, and GAS's support
7999 for this with the SOM linker is buggy. It is safe
8000 to use this for local calls. */
8001 output_asm_insn ("ldil L'%0,%%r1", xoperands);
8002 if (sibcall)
8003 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
8004 else
8006 if (TARGET_PA_20)
8007 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
8008 xoperands);
8009 else
8010 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
8012 output_asm_insn ("copy %%r31,%%r2", xoperands);
8013 seq_length = 1;
8016 else
8018 /* The HP assembler and linker can handle relocations for
8019 the difference of two symbols. The HP assembler
8020 recognizes the sequence as a pc-relative call and
8021 the linker provides stubs when needed. */
8023 /* GAS currently can't generate the relocations that
8024 are needed for the SOM linker under HP-UX using this
8025 sequence. The GNU linker doesn't generate the stubs
8026 that are needed for external calls on TARGET_ELF32
8027 with this sequence. For now, we have to use a longer
8028 plabel sequence when using GAS for non local calls. */
8029 if (TARGET_LONG_PIC_SDIFF_CALL
8030 || (TARGET_GAS && !TARGET_SOM && local_call))
8032 xoperands[1] = gen_rtx_REG (Pmode, 1);
8033 xoperands[2] = xoperands[1];
8034 pa_output_pic_pcrel_sequence (xoperands);
8036 else
8038 /* Emit a long plabel-based call sequence. This is
8039 essentially an inline implementation of $$dyncall.
8040 We don't actually try to call $$dyncall as this is
8041 as difficult as calling the function itself. */
8042 xoperands[0] = pa_get_deferred_plabel (call_dest);
8043 xoperands[1] = gen_label_rtx ();
8045 /* Since the call is indirect, FP arguments in registers
8046 need to be copied to the general registers. Then, the
8047 argument relocation stub will copy them back. */
8048 if (TARGET_SOM)
8049 copy_fp_args (insn);
8051 if (flag_pic)
8053 output_asm_insn ("addil LT'%0,%%r19", xoperands);
8054 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
8055 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands);
8057 else
8059 output_asm_insn ("addil LR'%0-$global$,%%r27",
8060 xoperands);
8061 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
8062 xoperands);
8065 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands);
8066 output_asm_insn ("depi 0,31,2,%%r1", xoperands);
8067 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands);
8068 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands);
8070 if (!sibcall && !TARGET_PA_20)
8072 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
8073 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8074 output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
8075 else
8076 output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
8080 if (TARGET_PA_20)
8082 if (sibcall)
8083 output_asm_insn ("bve (%%r1)", xoperands);
8084 else
8086 if (indirect_call)
8088 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8089 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
8090 seq_length = 1;
8092 else
8093 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8096 else
8098 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
8099 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
8100 xoperands);
8102 if (sibcall)
8104 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8105 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
8106 else
8107 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
8109 else
8111 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8112 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
8113 else
8114 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
8116 if (indirect_call)
8117 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
8118 else
8119 output_asm_insn ("copy %%r31,%%r2", xoperands);
8120 seq_length = 1;
8127 if (seq_length == 0)
8128 output_asm_insn ("nop", xoperands);
8130 return "";
8133 /* Return the attribute length of the indirect call instruction INSN.
8134 The length must match the code generated by output_indirect call.
8135 The returned length includes the delay slot. Currently, the delay
8136 slot of an indirect call sequence is not exposed and it is used by
8137 the sequence itself. */
8140 pa_attr_length_indirect_call (rtx_insn *insn)
8142 unsigned long distance = -1;
8143 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
8145 if (INSN_ADDRESSES_SET_P ())
8147 distance = (total + insn_current_reference_address (insn));
8148 if (distance < total)
8149 distance = -1;
8152 if (TARGET_64BIT)
8153 return 12;
8155 if (TARGET_FAST_INDIRECT_CALLS)
8156 return 8;
8158 if (TARGET_PORTABLE_RUNTIME)
8159 return 16;
8161 /* Inline version of $$dyncall. */
8162 if ((TARGET_NO_SPACE_REGS || TARGET_PA_20) && !optimize_size)
8163 return 20;
8165 if (!TARGET_LONG_CALLS
8166 && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
8167 || distance < MAX_PCREL17F_OFFSET))
8168 return 8;
8170 /* Out of reach, can use ble. */
8171 if (!flag_pic)
8172 return 12;
8174 /* Inline version of $$dyncall. */
8175 if (TARGET_NO_SPACE_REGS || TARGET_PA_20)
8176 return 20;
8178 if (!optimize_size)
8179 return 36;
8181 /* Long PIC pc-relative call. */
8182 return 20;
8185 const char *
8186 pa_output_indirect_call (rtx_insn *insn, rtx call_dest)
8188 rtx xoperands[4];
8189 int length;
8191 if (TARGET_64BIT)
8193 xoperands[0] = call_dest;
8194 output_asm_insn ("ldd 16(%0),%%r2\n\t"
8195 "bve,l (%%r2),%%r2\n\t"
8196 "ldd 24(%0),%%r27", xoperands);
8197 return "";
8200 /* First the special case for kernels, level 0 systems, etc. */
8201 if (TARGET_FAST_INDIRECT_CALLS)
8203 pa_output_arg_descriptor (insn);
8204 if (TARGET_PA_20)
8205 return "bve,l,n (%%r22),%%r2\n\tnop";
8206 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8209 if (TARGET_PORTABLE_RUNTIME)
8211 output_asm_insn ("ldil L'$$dyncall,%%r31\n\t"
8212 "ldo R'$$dyncall(%%r31),%%r31", xoperands);
8213 pa_output_arg_descriptor (insn);
8214 return "blr %%r0,%%r2\n\tbv,n %%r0(%%r31)";
8217 /* Maybe emit a fast inline version of $$dyncall. */
8218 if ((TARGET_NO_SPACE_REGS || TARGET_PA_20) && !optimize_size)
8220 output_asm_insn ("bb,>=,n %%r22,30,.+12\n\t"
8221 "ldw 2(%%r22),%%r19\n\t"
8222 "ldw -2(%%r22),%%r22", xoperands);
8223 pa_output_arg_descriptor (insn);
8224 if (TARGET_NO_SPACE_REGS)
8226 if (TARGET_PA_20)
8227 return "bve,l,n (%%r22),%%r2\n\tnop";
8228 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8230 return "bve,l (%%r22),%%r2\n\tstw %%r2,-24(%%sp)";
8233 /* Now the normal case -- we can reach $$dyncall directly or
8234 we're sure that we can get there via a long-branch stub.
8236 No need to check target flags as the length uniquely identifies
8237 the remaining cases. */
8238 length = pa_attr_length_indirect_call (insn);
8239 if (length == 8)
8241 pa_output_arg_descriptor (insn);
8243 /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
8244 $$dyncall. Since BLE uses %r31 as the link register, the 22-bit
8245 variant of the B,L instruction can't be used on the SOM target. */
8246 if (TARGET_PA_20 && !TARGET_SOM)
8247 return "b,l,n $$dyncall,%%r2\n\tnop";
8248 else
8249 return "bl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
8252 /* Long millicode call, but we are not generating PIC or portable runtime
8253 code. */
8254 if (length == 12)
8256 output_asm_insn ("ldil L'$$dyncall,%%r2", xoperands);
8257 pa_output_arg_descriptor (insn);
8258 return "ble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
8261 /* Maybe emit a fast inline version of $$dyncall. The long PIC
8262 pc-relative call sequence is five instructions. The inline PA 2.0
8263 version of $$dyncall is also five instructions. The PA 1.X versions
8264 are longer but still an overall win. */
8265 if (TARGET_NO_SPACE_REGS || TARGET_PA_20 || !optimize_size)
8267 output_asm_insn ("bb,>=,n %%r22,30,.+12\n\t"
8268 "ldw 2(%%r22),%%r19\n\t"
8269 "ldw -2(%%r22),%%r22", xoperands);
8270 if (TARGET_NO_SPACE_REGS)
8272 pa_output_arg_descriptor (insn);
8273 if (TARGET_PA_20)
8274 return "bve,l,n (%%r22),%%r2\n\tnop";
8275 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8277 if (TARGET_PA_20)
8279 pa_output_arg_descriptor (insn);
8280 return "bve,l (%%r22),%%r2\n\tstw %%r2,-24(%%sp)";
8282 output_asm_insn ("bl .+8,%%r2\n\t"
8283 "ldo 16(%%r2),%%r2\n\t"
8284 "ldsid (%%r22),%%r1\n\t"
8285 "mtsp %%r1,%%sr0", xoperands);
8286 pa_output_arg_descriptor (insn);
8287 return "be 0(%%sr0,%%r22)\n\tstw %%r2,-24(%%sp)";
8290 /* We need a long PIC call to $$dyncall. */
8291 xoperands[0] = gen_rtx_SYMBOL_REF (Pmode, "$$dyncall");
8292 xoperands[1] = gen_rtx_REG (Pmode, 2);
8293 xoperands[2] = gen_rtx_REG (Pmode, 1);
8294 pa_output_pic_pcrel_sequence (xoperands);
8295 pa_output_arg_descriptor (insn);
8296 return "bv %%r0(%%r1)\n\tldo {12|20}(%%r2),%%r2";
8299 /* In HPUX 8.0's shared library scheme, special relocations are needed
8300 for function labels if they might be passed to a function
8301 in a shared library (because shared libraries don't live in code
8302 space), and special magic is needed to construct their address. */
8304 void
8305 pa_encode_label (rtx sym)
8307 const char *str = XSTR (sym, 0);
8308 int len = strlen (str) + 1;
8309 char *newstr, *p;
8311 p = newstr = XALLOCAVEC (char, len + 1);
8312 *p++ = '@';
8313 strcpy (p, str);
8315 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
8318 static void
8319 pa_encode_section_info (tree decl, rtx rtl, int first)
8321 int old_referenced = 0;
8323 if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF)
8324 old_referenced
8325 = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED;
8327 default_encode_section_info (decl, rtl, first);
8329 if (first && TEXT_SPACE_P (decl))
8331 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
8332 if (TREE_CODE (decl) == FUNCTION_DECL)
8333 pa_encode_label (XEXP (rtl, 0));
8335 else if (old_referenced)
8336 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced;
8339 /* This is sort of inverse to pa_encode_section_info. */
8341 static const char *
8342 pa_strip_name_encoding (const char *str)
8344 str += (*str == '@');
8345 str += (*str == '*');
8346 return str;
8349 /* Returns 1 if OP is a function label involved in a simple addition
8350 with a constant. Used to keep certain patterns from matching
8351 during instruction combination. */
8353 pa_is_function_label_plus_const (rtx op)
8355 /* Strip off any CONST. */
8356 if (GET_CODE (op) == CONST)
8357 op = XEXP (op, 0);
8359 return (GET_CODE (op) == PLUS
8360 && function_label_operand (XEXP (op, 0), VOIDmode)
8361 && GET_CODE (XEXP (op, 1)) == CONST_INT);
8364 /* Output assembly code for a thunk to FUNCTION. */
8366 static void
8367 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
8368 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
8369 tree function)
8371 static unsigned int current_thunk_number;
8372 int val_14 = VAL_14_BITS_P (delta);
8373 unsigned int old_last_address = last_address, nbytes = 0;
8374 char label[17];
8375 rtx xoperands[4];
8377 xoperands[0] = XEXP (DECL_RTL (function), 0);
8378 xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
8379 xoperands[2] = GEN_INT (delta);
8381 final_start_function (emit_barrier (), file, 1);
8383 /* Output the thunk. We know that the function is in the same
8384 translation unit (i.e., the same space) as the thunk, and that
8385 thunks are output after their method. Thus, we don't need an
8386 external branch to reach the function. With SOM and GAS,
8387 functions and thunks are effectively in different sections.
8388 Thus, we can always use a IA-relative branch and the linker
8389 will add a long branch stub if necessary.
8391 However, we have to be careful when generating PIC code on the
8392 SOM port to ensure that the sequence does not transfer to an
8393 import stub for the target function as this could clobber the
8394 return value saved at SP-24. This would also apply to the
8395 32-bit linux port if the multi-space model is implemented. */
8396 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8397 && !(flag_pic && TREE_PUBLIC (function))
8398 && (TARGET_GAS || last_address < 262132))
8399 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8400 && ((targetm_common.have_named_sections
8401 && DECL_SECTION_NAME (thunk_fndecl) != NULL
8402 /* The GNU 64-bit linker has rather poor stub management.
8403 So, we use a long branch from thunks that aren't in
8404 the same section as the target function. */
8405 && ((!TARGET_64BIT
8406 && (DECL_SECTION_NAME (thunk_fndecl)
8407 != DECL_SECTION_NAME (function)))
8408 || ((DECL_SECTION_NAME (thunk_fndecl)
8409 == DECL_SECTION_NAME (function))
8410 && last_address < 262132)))
8411 /* In this case, we need to be able to reach the start of
8412 the stub table even though the function is likely closer
8413 and can be jumped to directly. */
8414 || (targetm_common.have_named_sections
8415 && DECL_SECTION_NAME (thunk_fndecl) == NULL
8416 && DECL_SECTION_NAME (function) == NULL
8417 && total_code_bytes < MAX_PCREL17F_OFFSET)
8418 /* Likewise. */
8419 || (!targetm_common.have_named_sections
8420 && total_code_bytes < MAX_PCREL17F_OFFSET))))
8422 if (!val_14)
8423 output_asm_insn ("addil L'%2,%%r26", xoperands);
8425 output_asm_insn ("b %0", xoperands);
8427 if (val_14)
8429 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8430 nbytes += 8;
8432 else
8434 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8435 nbytes += 12;
8438 else if (TARGET_64BIT)
8440 rtx xop[4];
8442 /* We only have one call-clobbered scratch register, so we can't
8443 make use of the delay slot if delta doesn't fit in 14 bits. */
8444 if (!val_14)
8446 output_asm_insn ("addil L'%2,%%r26", xoperands);
8447 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8450 /* Load function address into %r1. */
8451 xop[0] = xoperands[0];
8452 xop[1] = gen_rtx_REG (Pmode, 1);
8453 xop[2] = xop[1];
8454 pa_output_pic_pcrel_sequence (xop);
8456 if (val_14)
8458 output_asm_insn ("bv %%r0(%%r1)", xoperands);
8459 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8460 nbytes += 20;
8462 else
8464 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8465 nbytes += 24;
8468 else if (TARGET_PORTABLE_RUNTIME)
8470 output_asm_insn ("ldil L'%0,%%r1", xoperands);
8471 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
8473 if (!val_14)
8474 output_asm_insn ("ldil L'%2,%%r26", xoperands);
8476 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8478 if (val_14)
8480 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8481 nbytes += 16;
8483 else
8485 output_asm_insn ("ldo R'%2(%%r26),%%r26", xoperands);
8486 nbytes += 20;
8489 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8491 /* The function is accessible from outside this module. The only
8492 way to avoid an import stub between the thunk and function is to
8493 call the function directly with an indirect sequence similar to
8494 that used by $$dyncall. This is possible because $$dyncall acts
8495 as the import stub in an indirect call. */
8496 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8497 xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8498 output_asm_insn ("addil LT'%3,%%r19", xoperands);
8499 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8500 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8501 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8502 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8503 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8504 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8506 if (!val_14)
8508 output_asm_insn ("addil L'%2,%%r26", xoperands);
8509 nbytes += 4;
8512 if (TARGET_PA_20)
8514 output_asm_insn ("bve (%%r22)", xoperands);
8515 nbytes += 36;
8517 else if (TARGET_NO_SPACE_REGS)
8519 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
8520 nbytes += 36;
8522 else
8524 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8525 output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8526 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
8527 nbytes += 44;
8530 if (val_14)
8531 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8532 else
8533 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8535 else if (flag_pic)
8537 rtx xop[4];
8539 /* Load function address into %r22. */
8540 xop[0] = xoperands[0];
8541 xop[1] = gen_rtx_REG (Pmode, 1);
8542 xop[2] = gen_rtx_REG (Pmode, 22);
8543 pa_output_pic_pcrel_sequence (xop);
8545 if (!val_14)
8546 output_asm_insn ("addil L'%2,%%r26", xoperands);
8548 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8550 if (val_14)
8552 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8553 nbytes += 20;
8555 else
8557 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8558 nbytes += 24;
8561 else
8563 if (!val_14)
8564 output_asm_insn ("addil L'%2,%%r26", xoperands);
8566 output_asm_insn ("ldil L'%0,%%r22", xoperands);
8567 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8569 if (val_14)
8571 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8572 nbytes += 12;
8574 else
8576 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8577 nbytes += 16;
8581 final_end_function ();
8583 if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8585 switch_to_section (data_section);
8586 output_asm_insn (".align 4", xoperands);
8587 ASM_OUTPUT_LABEL (file, label);
8588 output_asm_insn (".word P'%0", xoperands);
8591 current_thunk_number++;
8592 nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
8593 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
8594 last_address += nbytes;
8595 if (old_last_address > last_address)
8596 last_address = UINT_MAX;
8597 update_total_code_bytes (nbytes);
8600 /* Only direct calls to static functions are allowed to be sibling (tail)
8601 call optimized.
8603 This restriction is necessary because some linker generated stubs will
8604 store return pointers into rp' in some cases which might clobber a
8605 live value already in rp'.
8607 In a sibcall the current function and the target function share stack
8608 space. Thus if the path to the current function and the path to the
8609 target function save a value in rp', they save the value into the
8610 same stack slot, which has undesirable consequences.
8612 Because of the deferred binding nature of shared libraries any function
8613 with external scope could be in a different load module and thus require
8614 rp' to be saved when calling that function. So sibcall optimizations
8615 can only be safe for static function.
8617 Note that GCC never needs return value relocations, so we don't have to
8618 worry about static calls with return value relocations (which require
8619 saving rp').
8621 It is safe to perform a sibcall optimization when the target function
8622 will never return. */
8623 static bool
8624 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8626 if (TARGET_PORTABLE_RUNTIME)
8627 return false;
8629 /* Sibcalls are not ok because the arg pointer register is not a fixed
8630 register. This prevents the sibcall optimization from occurring. In
8631 addition, there are problems with stub placement using GNU ld. This
8632 is because a normal sibcall branch uses a 17-bit relocation while
8633 a regular call branch uses a 22-bit relocation. As a result, more
8634 care needs to be taken in the placement of long-branch stubs. */
8635 if (TARGET_64BIT)
8636 return false;
8638 /* Sibcalls are only ok within a translation unit. */
8639 return (decl && !TREE_PUBLIC (decl));
8642 /* ??? Addition is not commutative on the PA due to the weird implicit
8643 space register selection rules for memory addresses. Therefore, we
8644 don't consider a + b == b + a, as this might be inside a MEM. */
8645 static bool
8646 pa_commutative_p (const_rtx x, int outer_code)
8648 return (COMMUTATIVE_P (x)
8649 && (TARGET_NO_SPACE_REGS
8650 || (outer_code != UNKNOWN && outer_code != MEM)
8651 || GET_CODE (x) != PLUS));
8654 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8655 use in fmpyadd instructions. */
8657 pa_fmpyaddoperands (rtx *operands)
8659 machine_mode mode = GET_MODE (operands[0]);
8661 /* Must be a floating point mode. */
8662 if (mode != SFmode && mode != DFmode)
8663 return 0;
8665 /* All modes must be the same. */
8666 if (! (mode == GET_MODE (operands[1])
8667 && mode == GET_MODE (operands[2])
8668 && mode == GET_MODE (operands[3])
8669 && mode == GET_MODE (operands[4])
8670 && mode == GET_MODE (operands[5])))
8671 return 0;
8673 /* All operands must be registers. */
8674 if (! (GET_CODE (operands[1]) == REG
8675 && GET_CODE (operands[2]) == REG
8676 && GET_CODE (operands[3]) == REG
8677 && GET_CODE (operands[4]) == REG
8678 && GET_CODE (operands[5]) == REG))
8679 return 0;
8681 /* Only 2 real operands to the addition. One of the input operands must
8682 be the same as the output operand. */
8683 if (! rtx_equal_p (operands[3], operands[4])
8684 && ! rtx_equal_p (operands[3], operands[5]))
8685 return 0;
8687 /* Inout operand of add cannot conflict with any operands from multiply. */
8688 if (rtx_equal_p (operands[3], operands[0])
8689 || rtx_equal_p (operands[3], operands[1])
8690 || rtx_equal_p (operands[3], operands[2]))
8691 return 0;
8693 /* multiply cannot feed into addition operands. */
8694 if (rtx_equal_p (operands[4], operands[0])
8695 || rtx_equal_p (operands[5], operands[0]))
8696 return 0;
8698 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8699 if (mode == SFmode
8700 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8701 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8702 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8703 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8704 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8705 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8706 return 0;
8708 /* Passed. Operands are suitable for fmpyadd. */
8709 return 1;
8712 #if !defined(USE_COLLECT2)
8713 static void
8714 pa_asm_out_constructor (rtx symbol, int priority)
8716 if (!function_label_operand (symbol, VOIDmode))
8717 pa_encode_label (symbol);
8719 #ifdef CTORS_SECTION_ASM_OP
8720 default_ctor_section_asm_out_constructor (symbol, priority);
8721 #else
8722 # ifdef TARGET_ASM_NAMED_SECTION
8723 default_named_section_asm_out_constructor (symbol, priority);
8724 # else
8725 default_stabs_asm_out_constructor (symbol, priority);
8726 # endif
8727 #endif
8730 static void
8731 pa_asm_out_destructor (rtx symbol, int priority)
8733 if (!function_label_operand (symbol, VOIDmode))
8734 pa_encode_label (symbol);
8736 #ifdef DTORS_SECTION_ASM_OP
8737 default_dtor_section_asm_out_destructor (symbol, priority);
8738 #else
8739 # ifdef TARGET_ASM_NAMED_SECTION
8740 default_named_section_asm_out_destructor (symbol, priority);
8741 # else
8742 default_stabs_asm_out_destructor (symbol, priority);
8743 # endif
8744 #endif
8746 #endif
8748 /* This function places uninitialized global data in the bss section.
8749 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
8750 function on the SOM port to prevent uninitialized global data from
8751 being placed in the data section. */
8753 void
8754 pa_asm_output_aligned_bss (FILE *stream,
8755 const char *name,
8756 unsigned HOST_WIDE_INT size,
8757 unsigned int align)
8759 switch_to_section (bss_section);
8760 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8762 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
8763 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
8764 #endif
8766 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
8767 ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
8768 #endif
8770 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8771 ASM_OUTPUT_LABEL (stream, name);
8772 fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8775 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
8776 that doesn't allow the alignment of global common storage to be directly
8777 specified. The SOM linker aligns common storage based on the rounded
8778 value of the NUM_BYTES parameter in the .comm directive. It's not
8779 possible to use the .align directive as it doesn't affect the alignment
8780 of the label associated with a .comm directive. */
8782 void
8783 pa_asm_output_aligned_common (FILE *stream,
8784 const char *name,
8785 unsigned HOST_WIDE_INT size,
8786 unsigned int align)
8788 unsigned int max_common_align;
8790 max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
8791 if (align > max_common_align)
8793 warning (0, "alignment (%u) for %s exceeds maximum alignment "
8794 "for global common data. Using %u",
8795 align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT);
8796 align = max_common_align;
8799 switch_to_section (bss_section);
8801 assemble_name (stream, name);
8802 fprintf (stream, "\t.comm " HOST_WIDE_INT_PRINT_UNSIGNED"\n",
8803 MAX (size, align / BITS_PER_UNIT));
8806 /* We can't use .comm for local common storage as the SOM linker effectively
8807 treats the symbol as universal and uses the same storage for local symbols
8808 with the same name in different object files. The .block directive
8809 reserves an uninitialized block of storage. However, it's not common
8810 storage. Fortunately, GCC never requests common storage with the same
8811 name in any given translation unit. */
8813 void
8814 pa_asm_output_aligned_local (FILE *stream,
8815 const char *name,
8816 unsigned HOST_WIDE_INT size,
8817 unsigned int align)
8819 switch_to_section (bss_section);
8820 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8822 #ifdef LOCAL_ASM_OP
8823 fprintf (stream, "%s", LOCAL_ASM_OP);
8824 assemble_name (stream, name);
8825 fprintf (stream, "\n");
8826 #endif
8828 ASM_OUTPUT_LABEL (stream, name);
8829 fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8832 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8833 use in fmpysub instructions. */
8835 pa_fmpysuboperands (rtx *operands)
8837 machine_mode mode = GET_MODE (operands[0]);
8839 /* Must be a floating point mode. */
8840 if (mode != SFmode && mode != DFmode)
8841 return 0;
8843 /* All modes must be the same. */
8844 if (! (mode == GET_MODE (operands[1])
8845 && mode == GET_MODE (operands[2])
8846 && mode == GET_MODE (operands[3])
8847 && mode == GET_MODE (operands[4])
8848 && mode == GET_MODE (operands[5])))
8849 return 0;
8851 /* All operands must be registers. */
8852 if (! (GET_CODE (operands[1]) == REG
8853 && GET_CODE (operands[2]) == REG
8854 && GET_CODE (operands[3]) == REG
8855 && GET_CODE (operands[4]) == REG
8856 && GET_CODE (operands[5]) == REG))
8857 return 0;
8859 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
8860 operation, so operands[4] must be the same as operand[3]. */
8861 if (! rtx_equal_p (operands[3], operands[4]))
8862 return 0;
8864 /* multiply cannot feed into subtraction. */
8865 if (rtx_equal_p (operands[5], operands[0]))
8866 return 0;
8868 /* Inout operand of sub cannot conflict with any operands from multiply. */
8869 if (rtx_equal_p (operands[3], operands[0])
8870 || rtx_equal_p (operands[3], operands[1])
8871 || rtx_equal_p (operands[3], operands[2]))
8872 return 0;
8874 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8875 if (mode == SFmode
8876 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8877 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8878 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8879 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8880 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8881 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8882 return 0;
8884 /* Passed. Operands are suitable for fmpysub. */
8885 return 1;
8888 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
8889 constants for a MULT embedded inside a memory address. */
8891 pa_mem_shadd_constant_p (int val)
8893 if (val == 2 || val == 4 || val == 8)
8894 return 1;
8895 else
8896 return 0;
8899 /* Return 1 if the given constant is 1, 2, or 3. These are the valid
8900 constants for shadd instructions. */
8902 pa_shadd_constant_p (int val)
8904 if (val == 1 || val == 2 || val == 3)
8905 return 1;
8906 else
8907 return 0;
8910 /* Return TRUE if INSN branches forward. */
8912 static bool
8913 forward_branch_p (rtx_insn *insn)
8915 rtx lab = JUMP_LABEL (insn);
8917 /* The INSN must have a jump label. */
8918 gcc_assert (lab != NULL_RTX);
8920 if (INSN_ADDRESSES_SET_P ())
8921 return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn));
8923 while (insn)
8925 if (insn == lab)
8926 return true;
8927 else
8928 insn = NEXT_INSN (insn);
8931 return false;
8934 /* Output an unconditional move and branch insn. */
8936 const char *
8937 pa_output_parallel_movb (rtx *operands, rtx_insn *insn)
8939 int length = get_attr_length (insn);
8941 /* These are the cases in which we win. */
8942 if (length == 4)
8943 return "mov%I1b,tr %1,%0,%2";
8945 /* None of the following cases win, but they don't lose either. */
8946 if (length == 8)
8948 if (dbr_sequence_length () == 0)
8950 /* Nothing in the delay slot, fake it by putting the combined
8951 insn (the copy or add) in the delay slot of a bl. */
8952 if (GET_CODE (operands[1]) == CONST_INT)
8953 return "b %2\n\tldi %1,%0";
8954 else
8955 return "b %2\n\tcopy %1,%0";
8957 else
8959 /* Something in the delay slot, but we've got a long branch. */
8960 if (GET_CODE (operands[1]) == CONST_INT)
8961 return "ldi %1,%0\n\tb %2";
8962 else
8963 return "copy %1,%0\n\tb %2";
8967 if (GET_CODE (operands[1]) == CONST_INT)
8968 output_asm_insn ("ldi %1,%0", operands);
8969 else
8970 output_asm_insn ("copy %1,%0", operands);
8971 return pa_output_lbranch (operands[2], insn, 1);
8974 /* Output an unconditional add and branch insn. */
8976 const char *
8977 pa_output_parallel_addb (rtx *operands, rtx_insn *insn)
8979 int length = get_attr_length (insn);
8981 /* To make life easy we want operand0 to be the shared input/output
8982 operand and operand1 to be the readonly operand. */
8983 if (operands[0] == operands[1])
8984 operands[1] = operands[2];
8986 /* These are the cases in which we win. */
8987 if (length == 4)
8988 return "add%I1b,tr %1,%0,%3";
8990 /* None of the following cases win, but they don't lose either. */
8991 if (length == 8)
8993 if (dbr_sequence_length () == 0)
8994 /* Nothing in the delay slot, fake it by putting the combined
8995 insn (the copy or add) in the delay slot of a bl. */
8996 return "b %3\n\tadd%I1 %1,%0,%0";
8997 else
8998 /* Something in the delay slot, but we've got a long branch. */
8999 return "add%I1 %1,%0,%0\n\tb %3";
9002 output_asm_insn ("add%I1 %1,%0,%0", operands);
9003 return pa_output_lbranch (operands[3], insn, 1);
9006 /* We use this hook to perform a PA specific optimization which is difficult
9007 to do in earlier passes. */
9009 static void
9010 pa_reorg (void)
9012 remove_useless_addtr_insns (1);
9014 if (pa_cpu < PROCESSOR_8000)
9015 pa_combine_instructions ();
9018 /* The PA has a number of odd instructions which can perform multiple
9019 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
9020 it may be profitable to combine two instructions into one instruction
9021 with two outputs. It's not profitable PA2.0 machines because the
9022 two outputs would take two slots in the reorder buffers.
9024 This routine finds instructions which can be combined and combines
9025 them. We only support some of the potential combinations, and we
9026 only try common ways to find suitable instructions.
9028 * addb can add two registers or a register and a small integer
9029 and jump to a nearby (+-8k) location. Normally the jump to the
9030 nearby location is conditional on the result of the add, but by
9031 using the "true" condition we can make the jump unconditional.
9032 Thus addb can perform two independent operations in one insn.
9034 * movb is similar to addb in that it can perform a reg->reg
9035 or small immediate->reg copy and jump to a nearby (+-8k location).
9037 * fmpyadd and fmpysub can perform a FP multiply and either an
9038 FP add or FP sub if the operands of the multiply and add/sub are
9039 independent (there are other minor restrictions). Note both
9040 the fmpy and fadd/fsub can in theory move to better spots according
9041 to data dependencies, but for now we require the fmpy stay at a
9042 fixed location.
9044 * Many of the memory operations can perform pre & post updates
9045 of index registers. GCC's pre/post increment/decrement addressing
9046 is far too simple to take advantage of all the possibilities. This
9047 pass may not be suitable since those insns may not be independent.
9049 * comclr can compare two ints or an int and a register, nullify
9050 the following instruction and zero some other register. This
9051 is more difficult to use as it's harder to find an insn which
9052 will generate a comclr than finding something like an unconditional
9053 branch. (conditional moves & long branches create comclr insns).
9055 * Most arithmetic operations can conditionally skip the next
9056 instruction. They can be viewed as "perform this operation
9057 and conditionally jump to this nearby location" (where nearby
9058 is an insns away). These are difficult to use due to the
9059 branch length restrictions. */
9061 static void
9062 pa_combine_instructions (void)
9064 rtx_insn *anchor;
9066 /* This can get expensive since the basic algorithm is on the
9067 order of O(n^2) (or worse). Only do it for -O2 or higher
9068 levels of optimization. */
9069 if (optimize < 2)
9070 return;
9072 /* Walk down the list of insns looking for "anchor" insns which
9073 may be combined with "floating" insns. As the name implies,
9074 "anchor" instructions don't move, while "floating" insns may
9075 move around. */
9076 rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
9077 rtx_insn *new_rtx = make_insn_raw (par);
9079 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
9081 enum attr_pa_combine_type anchor_attr;
9082 enum attr_pa_combine_type floater_attr;
9084 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
9085 Also ignore any special USE insns. */
9086 if ((! NONJUMP_INSN_P (anchor) && ! JUMP_P (anchor) && ! CALL_P (anchor))
9087 || GET_CODE (PATTERN (anchor)) == USE
9088 || GET_CODE (PATTERN (anchor)) == CLOBBER)
9089 continue;
9091 anchor_attr = get_attr_pa_combine_type (anchor);
9092 /* See if anchor is an insn suitable for combination. */
9093 if (anchor_attr == PA_COMBINE_TYPE_FMPY
9094 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
9095 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9096 && ! forward_branch_p (anchor)))
9098 rtx_insn *floater;
9100 for (floater = PREV_INSN (anchor);
9101 floater;
9102 floater = PREV_INSN (floater))
9104 if (NOTE_P (floater)
9105 || (NONJUMP_INSN_P (floater)
9106 && (GET_CODE (PATTERN (floater)) == USE
9107 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9108 continue;
9110 /* Anything except a regular INSN will stop our search. */
9111 if (! NONJUMP_INSN_P (floater))
9113 floater = NULL;
9114 break;
9117 /* See if FLOATER is suitable for combination with the
9118 anchor. */
9119 floater_attr = get_attr_pa_combine_type (floater);
9120 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9121 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9122 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9123 && floater_attr == PA_COMBINE_TYPE_FMPY))
9125 /* If ANCHOR and FLOATER can be combined, then we're
9126 done with this pass. */
9127 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9128 SET_DEST (PATTERN (floater)),
9129 XEXP (SET_SRC (PATTERN (floater)), 0),
9130 XEXP (SET_SRC (PATTERN (floater)), 1)))
9131 break;
9134 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9135 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
9137 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
9139 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9140 SET_DEST (PATTERN (floater)),
9141 XEXP (SET_SRC (PATTERN (floater)), 0),
9142 XEXP (SET_SRC (PATTERN (floater)), 1)))
9143 break;
9145 else
9147 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9148 SET_DEST (PATTERN (floater)),
9149 SET_SRC (PATTERN (floater)),
9150 SET_SRC (PATTERN (floater))))
9151 break;
9156 /* If we didn't find anything on the backwards scan try forwards. */
9157 if (!floater
9158 && (anchor_attr == PA_COMBINE_TYPE_FMPY
9159 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
9161 for (floater = anchor; floater; floater = NEXT_INSN (floater))
9163 if (NOTE_P (floater)
9164 || (NONJUMP_INSN_P (floater)
9165 && (GET_CODE (PATTERN (floater)) == USE
9166 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9168 continue;
9170 /* Anything except a regular INSN will stop our search. */
9171 if (! NONJUMP_INSN_P (floater))
9173 floater = NULL;
9174 break;
9177 /* See if FLOATER is suitable for combination with the
9178 anchor. */
9179 floater_attr = get_attr_pa_combine_type (floater);
9180 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9181 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9182 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9183 && floater_attr == PA_COMBINE_TYPE_FMPY))
9185 /* If ANCHOR and FLOATER can be combined, then we're
9186 done with this pass. */
9187 if (pa_can_combine_p (new_rtx, anchor, floater, 1,
9188 SET_DEST (PATTERN (floater)),
9189 XEXP (SET_SRC (PATTERN (floater)),
9191 XEXP (SET_SRC (PATTERN (floater)),
9192 1)))
9193 break;
9198 /* FLOATER will be nonzero if we found a suitable floating
9199 insn for combination with ANCHOR. */
9200 if (floater
9201 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9202 || anchor_attr == PA_COMBINE_TYPE_FMPY))
9204 /* Emit the new instruction and delete the old anchor. */
9205 rtvec vtemp = gen_rtvec (2, copy_rtx (PATTERN (anchor)),
9206 copy_rtx (PATTERN (floater)));
9207 rtx temp = gen_rtx_PARALLEL (VOIDmode, vtemp);
9208 emit_insn_before (temp, anchor);
9210 SET_INSN_DELETED (anchor);
9212 /* Emit a special USE insn for FLOATER, then delete
9213 the floating insn. */
9214 temp = copy_rtx (PATTERN (floater));
9215 emit_insn_before (gen_rtx_USE (VOIDmode, temp), floater);
9216 delete_insn (floater);
9218 continue;
9220 else if (floater
9221 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
9223 /* Emit the new_jump instruction and delete the old anchor. */
9224 rtvec vtemp = gen_rtvec (2, copy_rtx (PATTERN (anchor)),
9225 copy_rtx (PATTERN (floater)));
9226 rtx temp = gen_rtx_PARALLEL (VOIDmode, vtemp);
9227 temp = emit_jump_insn_before (temp, anchor);
9229 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
9230 SET_INSN_DELETED (anchor);
9232 /* Emit a special USE insn for FLOATER, then delete
9233 the floating insn. */
9234 temp = copy_rtx (PATTERN (floater));
9235 emit_insn_before (gen_rtx_USE (VOIDmode, temp), floater);
9236 delete_insn (floater);
9237 continue;
9243 static int
9244 pa_can_combine_p (rtx_insn *new_rtx, rtx_insn *anchor, rtx_insn *floater,
9245 int reversed, rtx dest,
9246 rtx src1, rtx src2)
9248 int insn_code_number;
9249 rtx_insn *start, *end;
9251 /* Create a PARALLEL with the patterns of ANCHOR and
9252 FLOATER, try to recognize it, then test constraints
9253 for the resulting pattern.
9255 If the pattern doesn't match or the constraints
9256 aren't met keep searching for a suitable floater
9257 insn. */
9258 XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor);
9259 XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater);
9260 INSN_CODE (new_rtx) = -1;
9261 insn_code_number = recog_memoized (new_rtx);
9262 basic_block bb = BLOCK_FOR_INSN (anchor);
9263 if (insn_code_number < 0
9264 || (extract_insn (new_rtx),
9265 !constrain_operands (1, get_preferred_alternatives (new_rtx, bb))))
9266 return 0;
9268 if (reversed)
9270 start = anchor;
9271 end = floater;
9273 else
9275 start = floater;
9276 end = anchor;
9279 /* There's up to three operands to consider. One
9280 output and two inputs.
9282 The output must not be used between FLOATER & ANCHOR
9283 exclusive. The inputs must not be set between
9284 FLOATER and ANCHOR exclusive. */
9286 if (reg_used_between_p (dest, start, end))
9287 return 0;
9289 if (reg_set_between_p (src1, start, end))
9290 return 0;
9292 if (reg_set_between_p (src2, start, end))
9293 return 0;
9295 /* If we get here, then everything is good. */
9296 return 1;
9299 /* Return nonzero if references for INSN are delayed.
9301 Millicode insns are actually function calls with some special
9302 constraints on arguments and register usage.
9304 Millicode calls always expect their arguments in the integer argument
9305 registers, and always return their result in %r29 (ret1). They
9306 are expected to clobber their arguments, %r1, %r29, and the return
9307 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9309 This function tells reorg that the references to arguments and
9310 millicode calls do not appear to happen until after the millicode call.
9311 This allows reorg to put insns which set the argument registers into the
9312 delay slot of the millicode call -- thus they act more like traditional
9313 CALL_INSNs.
9315 Note we cannot consider side effects of the insn to be delayed because
9316 the branch and link insn will clobber the return pointer. If we happened
9317 to use the return pointer in the delay slot of the call, then we lose.
9319 get_attr_type will try to recognize the given insn, so make sure to
9320 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9321 in particular. */
9323 pa_insn_refs_are_delayed (rtx_insn *insn)
9325 return ((NONJUMP_INSN_P (insn)
9326 && GET_CODE (PATTERN (insn)) != SEQUENCE
9327 && GET_CODE (PATTERN (insn)) != USE
9328 && GET_CODE (PATTERN (insn)) != CLOBBER
9329 && get_attr_type (insn) == TYPE_MILLI));
9332 /* Promote the return value, but not the arguments. */
9334 static machine_mode
9335 pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9336 machine_mode mode,
9337 int *punsignedp ATTRIBUTE_UNUSED,
9338 const_tree fntype ATTRIBUTE_UNUSED,
9339 int for_return)
9341 if (for_return == 0)
9342 return mode;
9343 return promote_mode (type, mode, punsignedp);
9346 /* On the HP-PA the value is found in register(s) 28(-29), unless
9347 the mode is SF or DF. Then the value is returned in fr4 (32).
9349 This must perform the same promotions as PROMOTE_MODE, else promoting
9350 return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
9352 Small structures must be returned in a PARALLEL on PA64 in order
9353 to match the HP Compiler ABI. */
9355 static rtx
9356 pa_function_value (const_tree valtype,
9357 const_tree func ATTRIBUTE_UNUSED,
9358 bool outgoing ATTRIBUTE_UNUSED)
9360 machine_mode valmode;
9362 if (AGGREGATE_TYPE_P (valtype)
9363 || TREE_CODE (valtype) == COMPLEX_TYPE
9364 || TREE_CODE (valtype) == VECTOR_TYPE)
9366 HOST_WIDE_INT valsize = int_size_in_bytes (valtype);
9368 /* Handle aggregates that fit exactly in a word or double word. */
9369 if ((valsize & (UNITS_PER_WORD - 1)) == 0)
9370 return gen_rtx_REG (TYPE_MODE (valtype), 28);
9372 if (TARGET_64BIT)
9374 /* Aggregates with a size less than or equal to 128 bits are
9375 returned in GR 28(-29). They are left justified. The pad
9376 bits are undefined. Larger aggregates are returned in
9377 memory. */
9378 rtx loc[2];
9379 int i, offset = 0;
9380 int ub = valsize <= UNITS_PER_WORD ? 1 : 2;
9382 for (i = 0; i < ub; i++)
9384 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9385 gen_rtx_REG (DImode, 28 + i),
9386 GEN_INT (offset));
9387 offset += 8;
9390 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
9392 else if (valsize > UNITS_PER_WORD)
9394 /* Aggregates 5 to 8 bytes in size are returned in general
9395 registers r28-r29 in the same manner as other non
9396 floating-point objects. The data is right-justified and
9397 zero-extended to 64 bits. This is opposite to the normal
9398 justification used on big endian targets and requires
9399 special treatment. */
9400 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9401 gen_rtx_REG (DImode, 28), const0_rtx);
9402 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9406 if ((INTEGRAL_TYPE_P (valtype)
9407 && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD)
9408 || POINTER_TYPE_P (valtype))
9409 valmode = word_mode;
9410 else
9411 valmode = TYPE_MODE (valtype);
9413 if (TREE_CODE (valtype) == REAL_TYPE
9414 && !AGGREGATE_TYPE_P (valtype)
9415 && TYPE_MODE (valtype) != TFmode
9416 && !TARGET_SOFT_FLOAT)
9417 return gen_rtx_REG (valmode, 32);
9419 return gen_rtx_REG (valmode, 28);
9422 /* Implement the TARGET_LIBCALL_VALUE hook. */
9424 static rtx
9425 pa_libcall_value (machine_mode mode,
9426 const_rtx fun ATTRIBUTE_UNUSED)
9428 if (! TARGET_SOFT_FLOAT
9429 && (mode == SFmode || mode == DFmode))
9430 return gen_rtx_REG (mode, 32);
9431 else
9432 return gen_rtx_REG (mode, 28);
9435 /* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook. */
9437 static bool
9438 pa_function_value_regno_p (const unsigned int regno)
9440 if (regno == 28
9441 || (! TARGET_SOFT_FLOAT && regno == 32))
9442 return true;
9444 return false;
9447 /* Update the data in CUM to advance over an argument
9448 of mode MODE and data type TYPE.
9449 (TYPE is null for libcalls where that information may not be available.) */
9451 static void
9452 pa_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
9453 const_tree type, bool named ATTRIBUTE_UNUSED)
9455 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9456 int arg_size = FUNCTION_ARG_SIZE (mode, type);
9458 cum->nargs_prototype--;
9459 cum->words += (arg_size
9460 + ((cum->words & 01)
9461 && type != NULL_TREE
9462 && arg_size > 1));
9465 /* Return the location of a parameter that is passed in a register or NULL
9466 if the parameter has any component that is passed in memory.
9468 This is new code and will be pushed to into the net sources after
9469 further testing.
9471 ??? We might want to restructure this so that it looks more like other
9472 ports. */
9473 static rtx
9474 pa_function_arg (cumulative_args_t cum_v, machine_mode mode,
9475 const_tree type, bool named ATTRIBUTE_UNUSED)
9477 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9478 int max_arg_words = (TARGET_64BIT ? 8 : 4);
9479 int alignment = 0;
9480 int arg_size;
9481 int fpr_reg_base;
9482 int gpr_reg_base;
9483 rtx retval;
9485 if (mode == VOIDmode)
9486 return NULL_RTX;
9488 arg_size = FUNCTION_ARG_SIZE (mode, type);
9490 /* If this arg would be passed partially or totally on the stack, then
9491 this routine should return zero. pa_arg_partial_bytes will
9492 handle arguments which are split between regs and stack slots if
9493 the ABI mandates split arguments. */
9494 if (!TARGET_64BIT)
9496 /* The 32-bit ABI does not split arguments. */
9497 if (cum->words + arg_size > max_arg_words)
9498 return NULL_RTX;
9500 else
9502 if (arg_size > 1)
9503 alignment = cum->words & 1;
9504 if (cum->words + alignment >= max_arg_words)
9505 return NULL_RTX;
9508 /* The 32bit ABIs and the 64bit ABIs are rather different,
9509 particularly in their handling of FP registers. We might
9510 be able to cleverly share code between them, but I'm not
9511 going to bother in the hope that splitting them up results
9512 in code that is more easily understood. */
9514 if (TARGET_64BIT)
9516 /* Advance the base registers to their current locations.
9518 Remember, gprs grow towards smaller register numbers while
9519 fprs grow to higher register numbers. Also remember that
9520 although FP regs are 32-bit addressable, we pretend that
9521 the registers are 64-bits wide. */
9522 gpr_reg_base = 26 - cum->words;
9523 fpr_reg_base = 32 + cum->words;
9525 /* Arguments wider than one word and small aggregates need special
9526 treatment. */
9527 if (arg_size > 1
9528 || mode == BLKmode
9529 || (type && (AGGREGATE_TYPE_P (type)
9530 || TREE_CODE (type) == COMPLEX_TYPE
9531 || TREE_CODE (type) == VECTOR_TYPE)))
9533 /* Double-extended precision (80-bit), quad-precision (128-bit)
9534 and aggregates including complex numbers are aligned on
9535 128-bit boundaries. The first eight 64-bit argument slots
9536 are associated one-to-one, with general registers r26
9537 through r19, and also with floating-point registers fr4
9538 through fr11. Arguments larger than one word are always
9539 passed in general registers.
9541 Using a PARALLEL with a word mode register results in left
9542 justified data on a big-endian target. */
9544 rtx loc[8];
9545 int i, offset = 0, ub = arg_size;
9547 /* Align the base register. */
9548 gpr_reg_base -= alignment;
9550 ub = MIN (ub, max_arg_words - cum->words - alignment);
9551 for (i = 0; i < ub; i++)
9553 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9554 gen_rtx_REG (DImode, gpr_reg_base),
9555 GEN_INT (offset));
9556 gpr_reg_base -= 1;
9557 offset += 8;
9560 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
9563 else
9565 /* If the argument is larger than a word, then we know precisely
9566 which registers we must use. */
9567 if (arg_size > 1)
9569 if (cum->words)
9571 gpr_reg_base = 23;
9572 fpr_reg_base = 38;
9574 else
9576 gpr_reg_base = 25;
9577 fpr_reg_base = 34;
9580 /* Structures 5 to 8 bytes in size are passed in the general
9581 registers in the same manner as other non floating-point
9582 objects. The data is right-justified and zero-extended
9583 to 64 bits. This is opposite to the normal justification
9584 used on big endian targets and requires special treatment.
9585 We now define BLOCK_REG_PADDING to pad these objects.
9586 Aggregates, complex and vector types are passed in the same
9587 manner as structures. */
9588 if (mode == BLKmode
9589 || (type && (AGGREGATE_TYPE_P (type)
9590 || TREE_CODE (type) == COMPLEX_TYPE
9591 || TREE_CODE (type) == VECTOR_TYPE)))
9593 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9594 gen_rtx_REG (DImode, gpr_reg_base),
9595 const0_rtx);
9596 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9599 else
9601 /* We have a single word (32 bits). A simple computation
9602 will get us the register #s we need. */
9603 gpr_reg_base = 26 - cum->words;
9604 fpr_reg_base = 32 + 2 * cum->words;
9608 /* Determine if the argument needs to be passed in both general and
9609 floating point registers. */
9610 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
9611 /* If we are doing soft-float with portable runtime, then there
9612 is no need to worry about FP regs. */
9613 && !TARGET_SOFT_FLOAT
9614 /* The parameter must be some kind of scalar float, else we just
9615 pass it in integer registers. */
9616 && GET_MODE_CLASS (mode) == MODE_FLOAT
9617 /* The target function must not have a prototype. */
9618 && cum->nargs_prototype <= 0
9619 /* libcalls do not need to pass items in both FP and general
9620 registers. */
9621 && type != NULL_TREE
9622 /* All this hair applies to "outgoing" args only. This includes
9623 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
9624 && !cum->incoming)
9625 /* Also pass outgoing floating arguments in both registers in indirect
9626 calls with the 32 bit ABI and the HP assembler since there is no
9627 way to the specify argument locations in static functions. */
9628 || (!TARGET_64BIT
9629 && !TARGET_GAS
9630 && !cum->incoming
9631 && cum->indirect
9632 && GET_MODE_CLASS (mode) == MODE_FLOAT))
9634 retval
9635 = gen_rtx_PARALLEL
9636 (mode,
9637 gen_rtvec (2,
9638 gen_rtx_EXPR_LIST (VOIDmode,
9639 gen_rtx_REG (mode, fpr_reg_base),
9640 const0_rtx),
9641 gen_rtx_EXPR_LIST (VOIDmode,
9642 gen_rtx_REG (mode, gpr_reg_base),
9643 const0_rtx)));
9645 else
9647 /* See if we should pass this parameter in a general register. */
9648 if (TARGET_SOFT_FLOAT
9649 /* Indirect calls in the normal 32bit ABI require all arguments
9650 to be passed in general registers. */
9651 || (!TARGET_PORTABLE_RUNTIME
9652 && !TARGET_64BIT
9653 && !TARGET_ELF32
9654 && cum->indirect)
9655 /* If the parameter is not a scalar floating-point parameter,
9656 then it belongs in GPRs. */
9657 || GET_MODE_CLASS (mode) != MODE_FLOAT
9658 /* Structure with single SFmode field belongs in GPR. */
9659 || (type && AGGREGATE_TYPE_P (type)))
9660 retval = gen_rtx_REG (mode, gpr_reg_base);
9661 else
9662 retval = gen_rtx_REG (mode, fpr_reg_base);
9664 return retval;
9667 /* Arguments larger than one word are double word aligned. */
9669 static unsigned int
9670 pa_function_arg_boundary (machine_mode mode, const_tree type)
9672 bool singleword = (type
9673 ? (integer_zerop (TYPE_SIZE (type))
9674 || !TREE_CONSTANT (TYPE_SIZE (type))
9675 || int_size_in_bytes (type) <= UNITS_PER_WORD)
9676 : GET_MODE_SIZE (mode) <= UNITS_PER_WORD);
9678 return singleword ? PARM_BOUNDARY : MAX_PARM_BOUNDARY;
9681 /* If this arg would be passed totally in registers or totally on the stack,
9682 then this routine should return zero. */
9684 static int
9685 pa_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
9686 tree type, bool named ATTRIBUTE_UNUSED)
9688 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9689 unsigned int max_arg_words = 8;
9690 unsigned int offset = 0;
9692 if (!TARGET_64BIT)
9693 return 0;
9695 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
9696 offset = 1;
9698 if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
9699 /* Arg fits fully into registers. */
9700 return 0;
9701 else if (cum->words + offset >= max_arg_words)
9702 /* Arg fully on the stack. */
9703 return 0;
9704 else
9705 /* Arg is split. */
9706 return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
9710 /* A get_unnamed_section callback for switching to the text section.
9712 This function is only used with SOM. Because we don't support
9713 named subspaces, we can only create a new subspace or switch back
9714 to the default text subspace. */
9716 static void
9717 som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED)
9719 gcc_assert (TARGET_SOM);
9720 if (TARGET_GAS)
9722 if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
9724 /* We only want to emit a .nsubspa directive once at the
9725 start of the function. */
9726 cfun->machine->in_nsubspa = 1;
9728 /* Create a new subspace for the text. This provides
9729 better stub placement and one-only functions. */
9730 if (cfun->decl
9731 && DECL_ONE_ONLY (cfun->decl)
9732 && !DECL_WEAK (cfun->decl))
9734 output_section_asm_op ("\t.SPACE $TEXT$\n"
9735 "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
9736 "ACCESS=44,SORT=24,COMDAT");
9737 return;
9740 else
9742 /* There isn't a current function or the body of the current
9743 function has been completed. So, we are changing to the
9744 text section to output debugging information. Thus, we
9745 need to forget that we are in the text section so that
9746 varasm.c will call us when text_section is selected again. */
9747 gcc_assert (!cfun || !cfun->machine
9748 || cfun->machine->in_nsubspa == 2);
9749 in_section = NULL;
9751 output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
9752 return;
9754 output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
9757 /* A get_unnamed_section callback for switching to comdat data
9758 sections. This function is only used with SOM. */
9760 static void
9761 som_output_comdat_data_section_asm_op (const void *data)
9763 in_section = NULL;
9764 output_section_asm_op (data);
9767 /* Implement TARGET_ASM_INITIALIZE_SECTIONS */
9769 static void
9770 pa_som_asm_init_sections (void)
9772 text_section
9773 = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
9775 /* SOM puts readonly data in the default $LIT$ subspace when PIC code
9776 is not being generated. */
9777 som_readonly_data_section
9778 = get_unnamed_section (0, output_section_asm_op,
9779 "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
9781 /* When secondary definitions are not supported, SOM makes readonly
9782 data one-only by creating a new $LIT$ subspace in $TEXT$ with
9783 the comdat flag. */
9784 som_one_only_readonly_data_section
9785 = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
9786 "\t.SPACE $TEXT$\n"
9787 "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
9788 "ACCESS=0x2c,SORT=16,COMDAT");
9791 /* When secondary definitions are not supported, SOM makes data one-only
9792 by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */
9793 som_one_only_data_section
9794 = get_unnamed_section (SECTION_WRITE,
9795 som_output_comdat_data_section_asm_op,
9796 "\t.SPACE $PRIVATE$\n"
9797 "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
9798 "ACCESS=31,SORT=24,COMDAT");
9800 if (flag_tm)
9801 som_tm_clone_table_section
9802 = get_unnamed_section (0, output_section_asm_op,
9803 "\t.SPACE $PRIVATE$\n\t.SUBSPA $TM_CLONE_TABLE$");
9805 /* FIXME: HPUX ld generates incorrect GOT entries for "T" fixups
9806 which reference data within the $TEXT$ space (for example constant
9807 strings in the $LIT$ subspace).
9809 The assemblers (GAS and HP as) both have problems with handling
9810 the difference of two symbols which is the other correct way to
9811 reference constant data during PIC code generation.
9813 So, there's no way to reference constant data which is in the
9814 $TEXT$ space during PIC generation. Instead place all constant
9815 data into the $PRIVATE$ subspace (this reduces sharing, but it
9816 works correctly). */
9817 readonly_data_section = flag_pic ? data_section : som_readonly_data_section;
9819 /* We must not have a reference to an external symbol defined in a
9820 shared library in a readonly section, else the SOM linker will
9821 complain.
9823 So, we force exception information into the data section. */
9824 exception_section = data_section;
9827 /* Implement TARGET_ASM_TM_CLONE_TABLE_SECTION. */
9829 static section *
9830 pa_som_tm_clone_table_section (void)
9832 return som_tm_clone_table_section;
9835 /* On hpux10, the linker will give an error if we have a reference
9836 in the read-only data section to a symbol defined in a shared
9837 library. Therefore, expressions that might require a reloc can
9838 not be placed in the read-only data section. */
9840 static section *
9841 pa_select_section (tree exp, int reloc,
9842 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
9844 if (TREE_CODE (exp) == VAR_DECL
9845 && TREE_READONLY (exp)
9846 && !TREE_THIS_VOLATILE (exp)
9847 && DECL_INITIAL (exp)
9848 && (DECL_INITIAL (exp) == error_mark_node
9849 || TREE_CONSTANT (DECL_INITIAL (exp)))
9850 && !reloc)
9852 if (TARGET_SOM
9853 && DECL_ONE_ONLY (exp)
9854 && !DECL_WEAK (exp))
9855 return som_one_only_readonly_data_section;
9856 else
9857 return readonly_data_section;
9859 else if (CONSTANT_CLASS_P (exp) && !reloc)
9860 return readonly_data_section;
9861 else if (TARGET_SOM
9862 && TREE_CODE (exp) == VAR_DECL
9863 && DECL_ONE_ONLY (exp)
9864 && !DECL_WEAK (exp))
9865 return som_one_only_data_section;
9866 else
9867 return data_section;
9870 /* Implement pa_reloc_rw_mask. */
9872 static int
9873 pa_reloc_rw_mask (void)
9875 /* We force (const (plus (symbol) (const_int))) to memory when the
9876 const_int doesn't fit in a 14-bit integer. The SOM linker can't
9877 handle this construct in read-only memory and we want to avoid
9878 this for ELF. So, we always force an RTX needing relocation to
9879 the data section. */
9880 return 3;
9883 static void
9884 pa_globalize_label (FILE *stream, const char *name)
9886 /* We only handle DATA objects here, functions are globalized in
9887 ASM_DECLARE_FUNCTION_NAME. */
9888 if (! FUNCTION_NAME_P (name))
9890 fputs ("\t.EXPORT ", stream);
9891 assemble_name (stream, name);
9892 fputs (",DATA\n", stream);
9896 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
9898 static rtx
9899 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
9900 int incoming ATTRIBUTE_UNUSED)
9902 return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
9905 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9907 bool
9908 pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9910 /* SOM ABI says that objects larger than 64 bits are returned in memory.
9911 PA64 ABI says that objects larger than 128 bits are returned in memory.
9912 Note, int_size_in_bytes can return -1 if the size of the object is
9913 variable or larger than the maximum value that can be expressed as
9914 a HOST_WIDE_INT. It can also return zero for an empty type. The
9915 simplest way to handle variable and empty types is to pass them in
9916 memory. This avoids problems in defining the boundaries of argument
9917 slots, allocating registers, etc. */
9918 return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
9919 || int_size_in_bytes (type) <= 0);
9922 /* Structure to hold declaration and name of external symbols that are
9923 emitted by GCC. We generate a vector of these symbols and output them
9924 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
9925 This avoids putting out names that are never really used. */
9927 typedef struct GTY(()) extern_symbol
9929 tree decl;
9930 const char *name;
9931 } extern_symbol;
9933 /* Define gc'd vector type for extern_symbol. */
9935 /* Vector of extern_symbol pointers. */
9936 static GTY(()) vec<extern_symbol, va_gc> *extern_symbols;
9938 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9939 /* Mark DECL (name NAME) as an external reference (assembler output
9940 file FILE). This saves the names to output at the end of the file
9941 if actually referenced. */
9943 void
9944 pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
9946 gcc_assert (file == asm_out_file);
9947 extern_symbol p = {decl, name};
9948 vec_safe_push (extern_symbols, p);
9951 /* Output text required at the end of an assembler file.
9952 This includes deferred plabels and .import directives for
9953 all external symbols that were actually referenced. */
9955 static void
9956 pa_hpux_file_end (void)
9958 unsigned int i;
9959 extern_symbol *p;
9961 if (!NO_DEFERRED_PROFILE_COUNTERS)
9962 output_deferred_profile_counters ();
9964 output_deferred_plabels ();
9966 for (i = 0; vec_safe_iterate (extern_symbols, i, &p); i++)
9968 tree decl = p->decl;
9970 if (!TREE_ASM_WRITTEN (decl)
9971 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
9972 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
9975 vec_free (extern_symbols);
9977 #endif
9979 /* Return true if a change from mode FROM to mode TO for a register
9980 in register class RCLASS is invalid. */
9982 bool
9983 pa_cannot_change_mode_class (machine_mode from, machine_mode to,
9984 enum reg_class rclass)
9986 if (from == to)
9987 return false;
9989 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to))
9990 return false;
9992 /* Reject changes to/from modes with zero size. */
9993 if (!GET_MODE_SIZE (from) || !GET_MODE_SIZE (to))
9994 return true;
9996 /* Reject changes to/from complex and vector modes. */
9997 if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from)
9998 || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to))
9999 return true;
10001 /* There is no way to load QImode or HImode values directly from memory
10002 to a FP register. SImode loads to the FP registers are not zero
10003 extended. On the 64-bit target, this conflicts with the definition
10004 of LOAD_EXTEND_OP. Thus, we can't allow changing between modes with
10005 different sizes in the floating-point registers. */
10006 if (MAYBE_FP_REG_CLASS_P (rclass))
10007 return true;
10009 /* HARD_REGNO_MODE_OK places modes with sizes larger than a word
10010 in specific sets of registers. Thus, we cannot allow changing
10011 to a larger mode when it's larger than a word. */
10012 if (GET_MODE_SIZE (to) > UNITS_PER_WORD
10013 && GET_MODE_SIZE (to) > GET_MODE_SIZE (from))
10014 return true;
10016 return false;
10019 /* Returns TRUE if it is a good idea to tie two pseudo registers
10020 when one has mode MODE1 and one has mode MODE2.
10021 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
10022 for any hard reg, then this must be FALSE for correct output.
10024 We should return FALSE for QImode and HImode because these modes
10025 are not ok in the floating-point registers. However, this prevents
10026 tieing these modes to SImode and DImode in the general registers.
10027 So, this isn't a good idea. We rely on HARD_REGNO_MODE_OK and
10028 CANNOT_CHANGE_MODE_CLASS to prevent these modes from being used
10029 in the floating-point registers. */
10031 bool
10032 pa_modes_tieable_p (machine_mode mode1, machine_mode mode2)
10034 /* Don't tie modes in different classes. */
10035 if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2))
10036 return false;
10038 return true;
10042 /* Length in units of the trampoline instruction code. */
10044 #define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 32 : 40))
10047 /* Output assembler code for a block containing the constant parts
10048 of a trampoline, leaving space for the variable parts.\
10050 The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
10051 and then branches to the specified routine.
10053 This code template is copied from text segment to stack location
10054 and then patched with pa_trampoline_init to contain valid values,
10055 and then entered as a subroutine.
10057 It is best to keep this as small as possible to avoid having to
10058 flush multiple lines in the cache. */
10060 static void
10061 pa_asm_trampoline_template (FILE *f)
10063 if (!TARGET_64BIT)
10065 fputs ("\tldw 36(%r22),%r21\n", f);
10066 fputs ("\tbb,>=,n %r21,30,.+16\n", f);
10067 if (ASSEMBLER_DIALECT == 0)
10068 fputs ("\tdepi 0,31,2,%r21\n", f);
10069 else
10070 fputs ("\tdepwi 0,31,2,%r21\n", f);
10071 fputs ("\tldw 4(%r21),%r19\n", f);
10072 fputs ("\tldw 0(%r21),%r21\n", f);
10073 if (TARGET_PA_20)
10075 fputs ("\tbve (%r21)\n", f);
10076 fputs ("\tldw 40(%r22),%r29\n", f);
10077 fputs ("\t.word 0\n", f);
10078 fputs ("\t.word 0\n", f);
10080 else
10082 fputs ("\tldsid (%r21),%r1\n", f);
10083 fputs ("\tmtsp %r1,%sr0\n", f);
10084 fputs ("\tbe 0(%sr0,%r21)\n", f);
10085 fputs ("\tldw 40(%r22),%r29\n", f);
10087 fputs ("\t.word 0\n", f);
10088 fputs ("\t.word 0\n", f);
10089 fputs ("\t.word 0\n", f);
10090 fputs ("\t.word 0\n", f);
10092 else
10094 fputs ("\t.dword 0\n", f);
10095 fputs ("\t.dword 0\n", f);
10096 fputs ("\t.dword 0\n", f);
10097 fputs ("\t.dword 0\n", f);
10098 fputs ("\tmfia %r31\n", f);
10099 fputs ("\tldd 24(%r31),%r1\n", f);
10100 fputs ("\tldd 24(%r1),%r27\n", f);
10101 fputs ("\tldd 16(%r1),%r1\n", f);
10102 fputs ("\tbve (%r1)\n", f);
10103 fputs ("\tldd 32(%r31),%r31\n", f);
10104 fputs ("\t.dword 0 ; fptr\n", f);
10105 fputs ("\t.dword 0 ; static link\n", f);
10109 /* Emit RTL insns to initialize the variable parts of a trampoline.
10110 FNADDR is an RTX for the address of the function's pure code.
10111 CXT is an RTX for the static chain value for the function.
10113 Move the function address to the trampoline template at offset 36.
10114 Move the static chain value to trampoline template at offset 40.
10115 Move the trampoline address to trampoline template at offset 44.
10116 Move r19 to trampoline template at offset 48. The latter two
10117 words create a plabel for the indirect call to the trampoline.
10119 A similar sequence is used for the 64-bit port but the plabel is
10120 at the beginning of the trampoline.
10122 Finally, the cache entries for the trampoline code are flushed.
10123 This is necessary to ensure that the trampoline instruction sequence
10124 is written to memory prior to any attempts at prefetching the code
10125 sequence. */
10127 static void
10128 pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
10130 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10131 rtx start_addr = gen_reg_rtx (Pmode);
10132 rtx end_addr = gen_reg_rtx (Pmode);
10133 rtx line_length = gen_reg_rtx (Pmode);
10134 rtx r_tramp, tmp;
10136 emit_block_move (m_tramp, assemble_trampoline_template (),
10137 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
10138 r_tramp = force_reg (Pmode, XEXP (m_tramp, 0));
10140 if (!TARGET_64BIT)
10142 tmp = adjust_address (m_tramp, Pmode, 36);
10143 emit_move_insn (tmp, fnaddr);
10144 tmp = adjust_address (m_tramp, Pmode, 40);
10145 emit_move_insn (tmp, chain_value);
10147 /* Create a fat pointer for the trampoline. */
10148 tmp = adjust_address (m_tramp, Pmode, 44);
10149 emit_move_insn (tmp, r_tramp);
10150 tmp = adjust_address (m_tramp, Pmode, 48);
10151 emit_move_insn (tmp, gen_rtx_REG (Pmode, 19));
10153 /* fdc and fic only use registers for the address to flush,
10154 they do not accept integer displacements. We align the
10155 start and end addresses to the beginning of their respective
10156 cache lines to minimize the number of lines flushed. */
10157 emit_insn (gen_andsi3 (start_addr, r_tramp,
10158 GEN_INT (-MIN_CACHELINE_SIZE)));
10159 tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp,
10160 TRAMPOLINE_CODE_SIZE-1));
10161 emit_insn (gen_andsi3 (end_addr, tmp,
10162 GEN_INT (-MIN_CACHELINE_SIZE)));
10163 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10164 emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length));
10165 emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length,
10166 gen_reg_rtx (Pmode),
10167 gen_reg_rtx (Pmode)));
10169 else
10171 tmp = adjust_address (m_tramp, Pmode, 56);
10172 emit_move_insn (tmp, fnaddr);
10173 tmp = adjust_address (m_tramp, Pmode, 64);
10174 emit_move_insn (tmp, chain_value);
10176 /* Create a fat pointer for the trampoline. */
10177 tmp = adjust_address (m_tramp, Pmode, 16);
10178 emit_move_insn (tmp, force_reg (Pmode, plus_constant (Pmode,
10179 r_tramp, 32)));
10180 tmp = adjust_address (m_tramp, Pmode, 24);
10181 emit_move_insn (tmp, gen_rtx_REG (Pmode, 27));
10183 /* fdc and fic only use registers for the address to flush,
10184 they do not accept integer displacements. We align the
10185 start and end addresses to the beginning of their respective
10186 cache lines to minimize the number of lines flushed. */
10187 tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp, 32));
10188 emit_insn (gen_anddi3 (start_addr, tmp,
10189 GEN_INT (-MIN_CACHELINE_SIZE)));
10190 tmp = force_reg (Pmode, plus_constant (Pmode, tmp,
10191 TRAMPOLINE_CODE_SIZE - 1));
10192 emit_insn (gen_anddi3 (end_addr, tmp,
10193 GEN_INT (-MIN_CACHELINE_SIZE)));
10194 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10195 emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length));
10196 emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length,
10197 gen_reg_rtx (Pmode),
10198 gen_reg_rtx (Pmode)));
10201 #ifdef HAVE_ENABLE_EXECUTE_STACK
10202  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
10203      LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
10204 #endif
10207 /* Perform any machine-specific adjustment in the address of the trampoline.
10208 ADDR contains the address that was passed to pa_trampoline_init.
10209 Adjust the trampoline address to point to the plabel at offset 44. */
10211 static rtx
10212 pa_trampoline_adjust_address (rtx addr)
10214 if (!TARGET_64BIT)
10215 addr = memory_address (Pmode, plus_constant (Pmode, addr, 46));
10216 return addr;
10219 static rtx
10220 pa_delegitimize_address (rtx orig_x)
10222 rtx x = delegitimize_mem_from_attrs (orig_x);
10224 if (GET_CODE (x) == LO_SUM
10225 && GET_CODE (XEXP (x, 1)) == UNSPEC
10226 && XINT (XEXP (x, 1), 1) == UNSPEC_DLTIND14R)
10227 return gen_const_mem (Pmode, XVECEXP (XEXP (x, 1), 0, 0));
10228 return x;
10231 static rtx
10232 pa_internal_arg_pointer (void)
10234 /* The argument pointer and the hard frame pointer are the same in
10235 the 32-bit runtime, so we don't need a copy. */
10236 if (TARGET_64BIT)
10237 return copy_to_reg (virtual_incoming_args_rtx);
10238 else
10239 return virtual_incoming_args_rtx;
10242 /* Given FROM and TO register numbers, say whether this elimination is allowed.
10243 Frame pointer elimination is automatically handled. */
10245 static bool
10246 pa_can_eliminate (const int from, const int to)
10248 /* The argument cannot be eliminated in the 64-bit runtime. */
10249 if (TARGET_64BIT && from == ARG_POINTER_REGNUM)
10250 return false;
10252 return (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
10253 ? ! frame_pointer_needed
10254 : true);
10257 /* Define the offset between two registers, FROM to be eliminated and its
10258 replacement TO, at the start of a routine. */
10259 HOST_WIDE_INT
10260 pa_initial_elimination_offset (int from, int to)
10262 HOST_WIDE_INT offset;
10264 if ((from == HARD_FRAME_POINTER_REGNUM || from == FRAME_POINTER_REGNUM)
10265 && to == STACK_POINTER_REGNUM)
10266 offset = -pa_compute_frame_size (get_frame_size (), 0);
10267 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10268 offset = 0;
10269 else
10270 gcc_unreachable ();
10272 return offset;
10275 static void
10276 pa_conditional_register_usage (void)
10278 int i;
10280 if (!TARGET_64BIT && !TARGET_PA_11)
10282 for (i = 56; i <= FP_REG_LAST; i++)
10283 fixed_regs[i] = call_used_regs[i] = 1;
10284 for (i = 33; i < 56; i += 2)
10285 fixed_regs[i] = call_used_regs[i] = 1;
10287 if (TARGET_DISABLE_FPREGS || TARGET_SOFT_FLOAT)
10289 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
10290 fixed_regs[i] = call_used_regs[i] = 1;
10292 if (flag_pic)
10293 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
10296 /* Target hook for c_mode_for_suffix. */
10298 static machine_mode
10299 pa_c_mode_for_suffix (char suffix)
10301 if (HPUX_LONG_DOUBLE_LIBRARY)
10303 if (suffix == 'q')
10304 return TFmode;
10307 return VOIDmode;
10310 /* Target hook for function_section. */
10312 static section *
10313 pa_function_section (tree decl, enum node_frequency freq,
10314 bool startup, bool exit)
10316 /* Put functions in text section if target doesn't have named sections. */
10317 if (!targetm_common.have_named_sections)
10318 return text_section;
10320 /* Force nested functions into the same section as the containing
10321 function. */
10322 if (decl
10323 && DECL_SECTION_NAME (decl) == NULL
10324 && DECL_CONTEXT (decl) != NULL_TREE
10325 && TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL
10326 && DECL_SECTION_NAME (DECL_CONTEXT (decl)) == NULL)
10327 return function_section (DECL_CONTEXT (decl));
10329 /* Otherwise, use the default function section. */
10330 return default_function_section (decl, freq, startup, exit);
10333 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
10335 In 64-bit mode, we reject CONST_DOUBLES. We also reject CONST_INTS
10336 that need more than three instructions to load prior to reload. This
10337 limit is somewhat arbitrary. It takes three instructions to load a
10338 CONST_INT from memory but two are memory accesses. It may be better
10339 to increase the allowed range for CONST_INTS. We may also be able
10340 to handle CONST_DOUBLES. */
10342 static bool
10343 pa_legitimate_constant_p (machine_mode mode, rtx x)
10345 if (GET_MODE_CLASS (mode) == MODE_FLOAT && x != CONST0_RTX (mode))
10346 return false;
10348 if (!NEW_HP_ASSEMBLER && !TARGET_GAS && GET_CODE (x) == LABEL_REF)
10349 return false;
10351 /* TLS_MODEL_GLOBAL_DYNAMIC and TLS_MODEL_LOCAL_DYNAMIC are not
10352 legitimate constants. The other variants can't be handled by
10353 the move patterns after reload starts. */
10354 if (tls_referenced_p (x))
10355 return false;
10357 if (TARGET_64BIT && GET_CODE (x) == CONST_DOUBLE)
10358 return false;
10360 if (TARGET_64BIT
10361 && HOST_BITS_PER_WIDE_INT > 32
10362 && GET_CODE (x) == CONST_INT
10363 && !reload_in_progress
10364 && !reload_completed
10365 && !LEGITIMATE_64BIT_CONST_INT_P (INTVAL (x))
10366 && !pa_cint_ok_for_move (UINTVAL (x)))
10367 return false;
10369 if (function_label_operand (x, mode))
10370 return false;
10372 return true;
10375 /* Implement TARGET_SECTION_TYPE_FLAGS. */
10377 static unsigned int
10378 pa_section_type_flags (tree decl, const char *name, int reloc)
10380 unsigned int flags;
10382 flags = default_section_type_flags (decl, name, reloc);
10384 /* Function labels are placed in the constant pool. This can
10385 cause a section conflict if decls are put in ".data.rel.ro"
10386 or ".data.rel.ro.local" using the __attribute__ construct. */
10387 if (strcmp (name, ".data.rel.ro") == 0
10388 || strcmp (name, ".data.rel.ro.local") == 0)
10389 flags |= SECTION_WRITE | SECTION_RELRO;
10391 return flags;
10394 /* pa_legitimate_address_p recognizes an RTL expression that is a
10395 valid memory address for an instruction. The MODE argument is the
10396 machine mode for the MEM expression that wants to use this address.
10398 On HP PA-RISC, the legitimate address forms are REG+SMALLINT,
10399 REG+REG, and REG+(REG*SCALE). The indexed address forms are only
10400 available with floating point loads and stores, and integer loads.
10401 We get better code by allowing indexed addresses in the initial
10402 RTL generation.
10404 The acceptance of indexed addresses as legitimate implies that we
10405 must provide patterns for doing indexed integer stores, or the move
10406 expanders must force the address of an indexed store to a register.
10407 We have adopted the latter approach.
10409 Another function of pa_legitimate_address_p is to ensure that
10410 the base register is a valid pointer for indexed instructions.
10411 On targets that have non-equivalent space registers, we have to
10412 know at the time of assembler output which register in a REG+REG
10413 pair is the base register. The REG_POINTER flag is sometimes lost
10414 in reload and the following passes, so it can't be relied on during
10415 code generation. Thus, we either have to canonicalize the order
10416 of the registers in REG+REG indexed addresses, or treat REG+REG
10417 addresses separately and provide patterns for both permutations.
10419 The latter approach requires several hundred additional lines of
10420 code in pa.md. The downside to canonicalizing is that a PLUS
10421 in the wrong order can't combine to form to make a scaled indexed
10422 memory operand. As we won't need to canonicalize the operands if
10423 the REG_POINTER lossage can be fixed, it seems better canonicalize.
10425 We initially break out scaled indexed addresses in canonical order
10426 in pa_emit_move_sequence. LEGITIMIZE_ADDRESS also canonicalizes
10427 scaled indexed addresses during RTL generation. However, fold_rtx
10428 has its own opinion on how the operands of a PLUS should be ordered.
10429 If one of the operands is equivalent to a constant, it will make
10430 that operand the second operand. As the base register is likely to
10431 be equivalent to a SYMBOL_REF, we have made it the second operand.
10433 pa_legitimate_address_p accepts REG+REG as legitimate when the
10434 operands are in the order INDEX+BASE on targets with non-equivalent
10435 space registers, and in any order on targets with equivalent space
10436 registers. It accepts both MULT+BASE and BASE+MULT for scaled indexing.
10438 We treat a SYMBOL_REF as legitimate if it is part of the current
10439 function's constant-pool, because such addresses can actually be
10440 output as REG+SMALLINT. */
10442 static bool
10443 pa_legitimate_address_p (machine_mode mode, rtx x, bool strict)
10445 if ((REG_P (x)
10446 && (strict ? STRICT_REG_OK_FOR_BASE_P (x)
10447 : REG_OK_FOR_BASE_P (x)))
10448 || ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC
10449 || GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC)
10450 && REG_P (XEXP (x, 0))
10451 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10452 : REG_OK_FOR_BASE_P (XEXP (x, 0)))))
10453 return true;
10455 if (GET_CODE (x) == PLUS)
10457 rtx base, index;
10459 /* For REG+REG, the base register should be in XEXP (x, 1),
10460 so check it first. */
10461 if (REG_P (XEXP (x, 1))
10462 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 1))
10463 : REG_OK_FOR_BASE_P (XEXP (x, 1))))
10464 base = XEXP (x, 1), index = XEXP (x, 0);
10465 else if (REG_P (XEXP (x, 0))
10466 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10467 : REG_OK_FOR_BASE_P (XEXP (x, 0))))
10468 base = XEXP (x, 0), index = XEXP (x, 1);
10469 else
10470 return false;
10472 if (GET_CODE (index) == CONST_INT)
10474 if (INT_5_BITS (index))
10475 return true;
10477 /* When INT14_OK_STRICT is false, a secondary reload is needed
10478 to adjust the displacement of SImode and DImode floating point
10479 instructions but this may fail when the register also needs
10480 reloading. So, we return false when STRICT is true. We
10481 also reject long displacements for float mode addresses since
10482 the majority of accesses will use floating point instructions
10483 that don't support 14-bit offsets. */
10484 if (!INT14_OK_STRICT
10485 && (strict || !(reload_in_progress || reload_completed))
10486 && mode != QImode
10487 && mode != HImode)
10488 return false;
10490 return base14_operand (index, mode);
10493 if (!TARGET_DISABLE_INDEXING
10494 /* Only accept the "canonical" INDEX+BASE operand order
10495 on targets with non-equivalent space registers. */
10496 && (TARGET_NO_SPACE_REGS
10497 ? REG_P (index)
10498 : (base == XEXP (x, 1) && REG_P (index)
10499 && (reload_completed
10500 || (reload_in_progress && HARD_REGISTER_P (base))
10501 || REG_POINTER (base))
10502 && (reload_completed
10503 || (reload_in_progress && HARD_REGISTER_P (index))
10504 || !REG_POINTER (index))))
10505 && MODE_OK_FOR_UNSCALED_INDEXING_P (mode)
10506 && (strict ? STRICT_REG_OK_FOR_INDEX_P (index)
10507 : REG_OK_FOR_INDEX_P (index))
10508 && borx_reg_operand (base, Pmode)
10509 && borx_reg_operand (index, Pmode))
10510 return true;
10512 if (!TARGET_DISABLE_INDEXING
10513 && GET_CODE (index) == MULT
10514 && MODE_OK_FOR_SCALED_INDEXING_P (mode)
10515 && REG_P (XEXP (index, 0))
10516 && GET_MODE (XEXP (index, 0)) == Pmode
10517 && (strict ? STRICT_REG_OK_FOR_INDEX_P (XEXP (index, 0))
10518 : REG_OK_FOR_INDEX_P (XEXP (index, 0)))
10519 && GET_CODE (XEXP (index, 1)) == CONST_INT
10520 && INTVAL (XEXP (index, 1))
10521 == (HOST_WIDE_INT) GET_MODE_SIZE (mode)
10522 && borx_reg_operand (base, Pmode))
10523 return true;
10525 return false;
10528 if (GET_CODE (x) == LO_SUM)
10530 rtx y = XEXP (x, 0);
10532 if (GET_CODE (y) == SUBREG)
10533 y = SUBREG_REG (y);
10535 if (REG_P (y)
10536 && (strict ? STRICT_REG_OK_FOR_BASE_P (y)
10537 : REG_OK_FOR_BASE_P (y)))
10539 /* Needed for -fPIC */
10540 if (mode == Pmode
10541 && GET_CODE (XEXP (x, 1)) == UNSPEC)
10542 return true;
10544 if (!INT14_OK_STRICT
10545 && (strict || !(reload_in_progress || reload_completed))
10546 && mode != QImode
10547 && mode != HImode)
10548 return false;
10550 if (CONSTANT_P (XEXP (x, 1)))
10551 return true;
10553 return false;
10556 if (GET_CODE (x) == CONST_INT && INT_5_BITS (x))
10557 return true;
10559 return false;
10562 /* Look for machine dependent ways to make the invalid address AD a
10563 valid address.
10565 For the PA, transform:
10567 memory(X + <large int>)
10569 into:
10571 if (<large int> & mask) >= 16
10572 Y = (<large int> & ~mask) + mask + 1 Round up.
10573 else
10574 Y = (<large int> & ~mask) Round down.
10575 Z = X + Y
10576 memory (Z + (<large int> - Y));
10578 This makes reload inheritance and reload_cse work better since Z
10579 can be reused.
10581 There may be more opportunities to improve code with this hook. */
10584 pa_legitimize_reload_address (rtx ad, machine_mode mode,
10585 int opnum, int type,
10586 int ind_levels ATTRIBUTE_UNUSED)
10588 long offset, newoffset, mask;
10589 rtx new_rtx, temp = NULL_RTX;
10591 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
10592 && !INT14_OK_STRICT ? 0x1f : 0x3fff);
10594 if (optimize && GET_CODE (ad) == PLUS)
10595 temp = simplify_binary_operation (PLUS, Pmode,
10596 XEXP (ad, 0), XEXP (ad, 1));
10598 new_rtx = temp ? temp : ad;
10600 if (optimize
10601 && GET_CODE (new_rtx) == PLUS
10602 && GET_CODE (XEXP (new_rtx, 0)) == REG
10603 && GET_CODE (XEXP (new_rtx, 1)) == CONST_INT)
10605 offset = INTVAL (XEXP ((new_rtx), 1));
10607 /* Choose rounding direction. Round up if we are >= halfway. */
10608 if ((offset & mask) >= ((mask + 1) / 2))
10609 newoffset = (offset & ~mask) + mask + 1;
10610 else
10611 newoffset = offset & ~mask;
10613 /* Ensure that long displacements are aligned. */
10614 if (mask == 0x3fff
10615 && (GET_MODE_CLASS (mode) == MODE_FLOAT
10616 || (TARGET_64BIT && (mode) == DImode)))
10617 newoffset &= ~(GET_MODE_SIZE (mode) - 1);
10619 if (newoffset != 0 && VAL_14_BITS_P (newoffset))
10621 temp = gen_rtx_PLUS (Pmode, XEXP (new_rtx, 0),
10622 GEN_INT (newoffset));
10623 ad = gen_rtx_PLUS (Pmode, temp, GEN_INT (offset - newoffset));
10624 push_reload (XEXP (ad, 0), 0, &XEXP (ad, 0), 0,
10625 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
10626 opnum, (enum reload_type) type);
10627 return ad;
10631 return NULL_RTX;
10634 /* Output address vector. */
10636 void
10637 pa_output_addr_vec (rtx lab, rtx body)
10639 int idx, vlen = XVECLEN (body, 0);
10641 targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10642 if (TARGET_GAS)
10643 fputs ("\t.begin_brtab\n", asm_out_file);
10644 for (idx = 0; idx < vlen; idx++)
10646 ASM_OUTPUT_ADDR_VEC_ELT
10647 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
10649 if (TARGET_GAS)
10650 fputs ("\t.end_brtab\n", asm_out_file);
10653 /* Output address difference vector. */
10655 void
10656 pa_output_addr_diff_vec (rtx lab, rtx body)
10658 rtx base = XEXP (XEXP (body, 0), 0);
10659 int idx, vlen = XVECLEN (body, 1);
10661 targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10662 if (TARGET_GAS)
10663 fputs ("\t.begin_brtab\n", asm_out_file);
10664 for (idx = 0; idx < vlen; idx++)
10666 ASM_OUTPUT_ADDR_DIFF_ELT
10667 (asm_out_file,
10668 body,
10669 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
10670 CODE_LABEL_NUMBER (base));
10672 if (TARGET_GAS)
10673 fputs ("\t.end_brtab\n", asm_out_file);
10676 /* This is a helper function for the other atomic operations. This function
10677 emits a loop that contains SEQ that iterates until a compare-and-swap
10678 operation at the end succeeds. MEM is the memory to be modified. SEQ is
10679 a set of instructions that takes a value from OLD_REG as an input and
10680 produces a value in NEW_REG as an output. Before SEQ, OLD_REG will be
10681 set to the current contents of MEM. After SEQ, a compare-and-swap will
10682 attempt to update MEM with NEW_REG. The function returns true when the
10683 loop was generated successfully. */
10685 static bool
10686 pa_expand_compare_and_swap_loop (rtx mem, rtx old_reg, rtx new_reg, rtx seq)
10688 machine_mode mode = GET_MODE (mem);
10689 rtx_code_label *label;
10690 rtx cmp_reg, success, oldval;
10692 /* The loop we want to generate looks like
10694 cmp_reg = mem;
10695 label:
10696 old_reg = cmp_reg;
10697 seq;
10698 (success, cmp_reg) = compare-and-swap(mem, old_reg, new_reg)
10699 if (success)
10700 goto label;
10702 Note that we only do the plain load from memory once. Subsequent
10703 iterations use the value loaded by the compare-and-swap pattern. */
10705 label = gen_label_rtx ();
10706 cmp_reg = gen_reg_rtx (mode);
10708 emit_move_insn (cmp_reg, mem);
10709 emit_label (label);
10710 emit_move_insn (old_reg, cmp_reg);
10711 if (seq)
10712 emit_insn (seq);
10714 success = NULL_RTX;
10715 oldval = cmp_reg;
10716 if (!expand_atomic_compare_and_swap (&success, &oldval, mem, old_reg,
10717 new_reg, false, MEMMODEL_SYNC_SEQ_CST,
10718 MEMMODEL_RELAXED))
10719 return false;
10721 if (oldval != cmp_reg)
10722 emit_move_insn (cmp_reg, oldval);
10724 /* Mark this jump predicted not taken. */
10725 emit_cmp_and_jump_insns (success, const0_rtx, EQ, const0_rtx,
10726 GET_MODE (success), 1, label, 0);
10727 return true;
10730 /* This function tries to implement an atomic exchange operation using a
10731 compare_and_swap loop. VAL is written to *MEM. The previous contents of
10732 *MEM are returned, using TARGET if possible. No memory model is required
10733 since a compare_and_swap loop is seq-cst. */
10736 pa_maybe_emit_compare_and_swap_exchange_loop (rtx target, rtx mem, rtx val)
10738 machine_mode mode = GET_MODE (mem);
10740 if (can_compare_and_swap_p (mode, true))
10742 if (!target || !register_operand (target, mode))
10743 target = gen_reg_rtx (mode);
10744 if (pa_expand_compare_and_swap_loop (mem, target, val, NULL_RTX))
10745 return target;
10748 return NULL_RTX;
10751 /* Implement TARGET_CALLEE_COPIES. The callee is responsible for copying
10752 arguments passed by hidden reference in the 32-bit HP runtime. Users
10753 can override this behavior for better compatibility with openmp at the
10754 risk of library incompatibilities. Arguments are always passed by value
10755 in the 64-bit HP runtime. */
10757 static bool
10758 pa_callee_copies (cumulative_args_t cum ATTRIBUTE_UNUSED,
10759 machine_mode mode ATTRIBUTE_UNUSED,
10760 const_tree type ATTRIBUTE_UNUSED,
10761 bool named ATTRIBUTE_UNUSED)
10763 return !TARGET_CALLER_COPIES;
10766 #include "gt-pa.h"