Update Copyright years for files modified in 2011 and/or 2012.
[official-gcc.git] / gcc / config / pa / pa.c
blobfacd2a64f04ca323785a69b4d943b5024eb153a7
1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
4 Free Software Foundation, Inc.
5 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "rtl.h"
28 #include "regs.h"
29 #include "hard-reg-set.h"
30 #include "insn-config.h"
31 #include "conditions.h"
32 #include "insn-attr.h"
33 #include "flags.h"
34 #include "tree.h"
35 #include "output.h"
36 #include "dbxout.h"
37 #include "except.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "reload.h"
41 #include "function.h"
42 #include "diagnostic-core.h"
43 #include "ggc.h"
44 #include "recog.h"
45 #include "predict.h"
46 #include "tm_p.h"
47 #include "target.h"
48 #include "common/common-target.h"
49 #include "target-def.h"
50 #include "langhooks.h"
51 #include "df.h"
52 #include "opts.h"
54 /* Return nonzero if there is a bypass for the output of
55 OUT_INSN and the fp store IN_INSN. */
56 int
57 pa_fpstore_bypass_p (rtx out_insn, rtx in_insn)
59 enum machine_mode store_mode;
60 enum machine_mode other_mode;
61 rtx set;
63 if (recog_memoized (in_insn) < 0
64 || (get_attr_type (in_insn) != TYPE_FPSTORE
65 && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
66 || recog_memoized (out_insn) < 0)
67 return 0;
69 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
71 set = single_set (out_insn);
72 if (!set)
73 return 0;
75 other_mode = GET_MODE (SET_SRC (set));
77 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
81 #ifndef DO_FRAME_NOTES
82 #ifdef INCOMING_RETURN_ADDR_RTX
83 #define DO_FRAME_NOTES 1
84 #else
85 #define DO_FRAME_NOTES 0
86 #endif
87 #endif
89 static void pa_option_override (void);
90 static void copy_reg_pointer (rtx, rtx);
91 static void fix_range (const char *);
92 static int hppa_register_move_cost (enum machine_mode mode, reg_class_t,
93 reg_class_t);
94 static int hppa_address_cost (rtx, enum machine_mode mode, addr_space_t, bool);
95 static bool hppa_rtx_costs (rtx, int, int, int, int *, bool);
96 static inline rtx force_mode (enum machine_mode, rtx);
97 static void pa_reorg (void);
98 static void pa_combine_instructions (void);
99 static int pa_can_combine_p (rtx, rtx, rtx, int, rtx, rtx, rtx);
100 static bool forward_branch_p (rtx);
101 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
102 static void compute_zdepdi_operands (unsigned HOST_WIDE_INT, unsigned *);
103 static int compute_movmem_length (rtx);
104 static int compute_clrmem_length (rtx);
105 static bool pa_assemble_integer (rtx, unsigned int, int);
106 static void remove_useless_addtr_insns (int);
107 static void store_reg (int, HOST_WIDE_INT, int);
108 static void store_reg_modify (int, int, HOST_WIDE_INT);
109 static void load_reg (int, HOST_WIDE_INT, int);
110 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
111 static rtx pa_function_value (const_tree, const_tree, bool);
112 static rtx pa_libcall_value (enum machine_mode, const_rtx);
113 static bool pa_function_value_regno_p (const unsigned int);
114 static void pa_output_function_prologue (FILE *, HOST_WIDE_INT);
115 static void update_total_code_bytes (unsigned int);
116 static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT);
117 static int pa_adjust_cost (rtx, rtx, rtx, int);
118 static int pa_adjust_priority (rtx, int);
119 static int pa_issue_rate (void);
120 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
121 static section *pa_som_tm_clone_table_section (void) ATTRIBUTE_UNUSED;
122 static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
123 ATTRIBUTE_UNUSED;
124 static void pa_encode_section_info (tree, rtx, int);
125 static const char *pa_strip_name_encoding (const char *);
126 static bool pa_function_ok_for_sibcall (tree, tree);
127 static void pa_globalize_label (FILE *, const char *)
128 ATTRIBUTE_UNUSED;
129 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
130 HOST_WIDE_INT, tree);
131 #if !defined(USE_COLLECT2)
132 static void pa_asm_out_constructor (rtx, int);
133 static void pa_asm_out_destructor (rtx, int);
134 #endif
135 static void pa_init_builtins (void);
136 static rtx pa_expand_builtin (tree, rtx, rtx, enum machine_mode mode, int);
137 static rtx hppa_builtin_saveregs (void);
138 static void hppa_va_start (tree, rtx);
139 static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
140 static bool pa_scalar_mode_supported_p (enum machine_mode);
141 static bool pa_commutative_p (const_rtx x, int outer_code);
142 static void copy_fp_args (rtx) ATTRIBUTE_UNUSED;
143 static int length_fp_args (rtx) ATTRIBUTE_UNUSED;
144 static rtx hppa_legitimize_address (rtx, rtx, enum machine_mode);
145 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
146 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
147 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
148 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
149 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
150 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
151 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
152 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
153 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
154 static void output_deferred_plabels (void);
155 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
156 #ifdef ASM_OUTPUT_EXTERNAL_REAL
157 static void pa_hpux_file_end (void);
158 #endif
159 static void pa_init_libfuncs (void);
160 static rtx pa_struct_value_rtx (tree, int);
161 static bool pa_pass_by_reference (cumulative_args_t, enum machine_mode,
162 const_tree, bool);
163 static int pa_arg_partial_bytes (cumulative_args_t, enum machine_mode,
164 tree, bool);
165 static void pa_function_arg_advance (cumulative_args_t, enum machine_mode,
166 const_tree, bool);
167 static rtx pa_function_arg (cumulative_args_t, enum machine_mode,
168 const_tree, bool);
169 static unsigned int pa_function_arg_boundary (enum machine_mode, const_tree);
170 static struct machine_function * pa_init_machine_status (void);
171 static reg_class_t pa_secondary_reload (bool, rtx, reg_class_t,
172 enum machine_mode,
173 secondary_reload_info *);
174 static void pa_extra_live_on_entry (bitmap);
175 static enum machine_mode pa_promote_function_mode (const_tree,
176 enum machine_mode, int *,
177 const_tree, int);
179 static void pa_asm_trampoline_template (FILE *);
180 static void pa_trampoline_init (rtx, tree, rtx);
181 static rtx pa_trampoline_adjust_address (rtx);
182 static rtx pa_delegitimize_address (rtx);
183 static bool pa_print_operand_punct_valid_p (unsigned char);
184 static rtx pa_internal_arg_pointer (void);
185 static bool pa_can_eliminate (const int, const int);
186 static void pa_conditional_register_usage (void);
187 static enum machine_mode pa_c_mode_for_suffix (char);
188 static section *pa_function_section (tree, enum node_frequency, bool, bool);
189 static bool pa_cannot_force_const_mem (enum machine_mode, rtx);
190 static bool pa_legitimate_constant_p (enum machine_mode, rtx);
191 static unsigned int pa_section_type_flags (tree, const char *, int);
193 /* The following extra sections are only used for SOM. */
194 static GTY(()) section *som_readonly_data_section;
195 static GTY(()) section *som_one_only_readonly_data_section;
196 static GTY(()) section *som_one_only_data_section;
197 static GTY(()) section *som_tm_clone_table_section;
199 /* Counts for the number of callee-saved general and floating point
200 registers which were saved by the current function's prologue. */
201 static int gr_saved, fr_saved;
203 /* Boolean indicating whether the return pointer was saved by the
204 current function's prologue. */
205 static bool rp_saved;
207 static rtx find_addr_reg (rtx);
209 /* Keep track of the number of bytes we have output in the CODE subspace
210 during this compilation so we'll know when to emit inline long-calls. */
211 unsigned long total_code_bytes;
213 /* The last address of the previous function plus the number of bytes in
214 associated thunks that have been output. This is used to determine if
215 a thunk can use an IA-relative branch to reach its target function. */
216 static unsigned int last_address;
218 /* Variables to handle plabels that we discover are necessary at assembly
219 output time. They are output after the current function. */
220 struct GTY(()) deferred_plabel
222 rtx internal_label;
223 rtx symbol;
225 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
226 deferred_plabels;
227 static size_t n_deferred_plabels = 0;
229 /* Initialize the GCC target structure. */
231 #undef TARGET_OPTION_OVERRIDE
232 #define TARGET_OPTION_OVERRIDE pa_option_override
234 #undef TARGET_ASM_ALIGNED_HI_OP
235 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
236 #undef TARGET_ASM_ALIGNED_SI_OP
237 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
238 #undef TARGET_ASM_ALIGNED_DI_OP
239 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
240 #undef TARGET_ASM_UNALIGNED_HI_OP
241 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
242 #undef TARGET_ASM_UNALIGNED_SI_OP
243 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
244 #undef TARGET_ASM_UNALIGNED_DI_OP
245 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
246 #undef TARGET_ASM_INTEGER
247 #define TARGET_ASM_INTEGER pa_assemble_integer
249 #undef TARGET_ASM_FUNCTION_PROLOGUE
250 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
251 #undef TARGET_ASM_FUNCTION_EPILOGUE
252 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
254 #undef TARGET_FUNCTION_VALUE
255 #define TARGET_FUNCTION_VALUE pa_function_value
256 #undef TARGET_LIBCALL_VALUE
257 #define TARGET_LIBCALL_VALUE pa_libcall_value
258 #undef TARGET_FUNCTION_VALUE_REGNO_P
259 #define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p
261 #undef TARGET_LEGITIMIZE_ADDRESS
262 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
264 #undef TARGET_SCHED_ADJUST_COST
265 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
266 #undef TARGET_SCHED_ADJUST_PRIORITY
267 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
268 #undef TARGET_SCHED_ISSUE_RATE
269 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
271 #undef TARGET_ENCODE_SECTION_INFO
272 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
273 #undef TARGET_STRIP_NAME_ENCODING
274 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
276 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
277 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
279 #undef TARGET_COMMUTATIVE_P
280 #define TARGET_COMMUTATIVE_P pa_commutative_p
282 #undef TARGET_ASM_OUTPUT_MI_THUNK
283 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
284 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
285 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
287 #undef TARGET_ASM_FILE_END
288 #ifdef ASM_OUTPUT_EXTERNAL_REAL
289 #define TARGET_ASM_FILE_END pa_hpux_file_end
290 #else
291 #define TARGET_ASM_FILE_END output_deferred_plabels
292 #endif
294 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
295 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p
297 #if !defined(USE_COLLECT2)
298 #undef TARGET_ASM_CONSTRUCTOR
299 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
300 #undef TARGET_ASM_DESTRUCTOR
301 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
302 #endif
304 #undef TARGET_INIT_BUILTINS
305 #define TARGET_INIT_BUILTINS pa_init_builtins
307 #undef TARGET_EXPAND_BUILTIN
308 #define TARGET_EXPAND_BUILTIN pa_expand_builtin
310 #undef TARGET_REGISTER_MOVE_COST
311 #define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
312 #undef TARGET_RTX_COSTS
313 #define TARGET_RTX_COSTS hppa_rtx_costs
314 #undef TARGET_ADDRESS_COST
315 #define TARGET_ADDRESS_COST hppa_address_cost
317 #undef TARGET_MACHINE_DEPENDENT_REORG
318 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
320 #undef TARGET_INIT_LIBFUNCS
321 #define TARGET_INIT_LIBFUNCS pa_init_libfuncs
323 #undef TARGET_PROMOTE_FUNCTION_MODE
324 #define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
325 #undef TARGET_PROMOTE_PROTOTYPES
326 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
328 #undef TARGET_STRUCT_VALUE_RTX
329 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
330 #undef TARGET_RETURN_IN_MEMORY
331 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
332 #undef TARGET_MUST_PASS_IN_STACK
333 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
334 #undef TARGET_PASS_BY_REFERENCE
335 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
336 #undef TARGET_CALLEE_COPIES
337 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true
338 #undef TARGET_ARG_PARTIAL_BYTES
339 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
340 #undef TARGET_FUNCTION_ARG
341 #define TARGET_FUNCTION_ARG pa_function_arg
342 #undef TARGET_FUNCTION_ARG_ADVANCE
343 #define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance
344 #undef TARGET_FUNCTION_ARG_BOUNDARY
345 #define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary
347 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
348 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
349 #undef TARGET_EXPAND_BUILTIN_VA_START
350 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
351 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
352 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
354 #undef TARGET_SCALAR_MODE_SUPPORTED_P
355 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
357 #undef TARGET_CANNOT_FORCE_CONST_MEM
358 #define TARGET_CANNOT_FORCE_CONST_MEM pa_cannot_force_const_mem
360 #undef TARGET_SECONDARY_RELOAD
361 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
363 #undef TARGET_EXTRA_LIVE_ON_ENTRY
364 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
366 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
367 #define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
368 #undef TARGET_TRAMPOLINE_INIT
369 #define TARGET_TRAMPOLINE_INIT pa_trampoline_init
370 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
371 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
372 #undef TARGET_DELEGITIMIZE_ADDRESS
373 #define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
374 #undef TARGET_INTERNAL_ARG_POINTER
375 #define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer
376 #undef TARGET_CAN_ELIMINATE
377 #define TARGET_CAN_ELIMINATE pa_can_eliminate
378 #undef TARGET_CONDITIONAL_REGISTER_USAGE
379 #define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage
380 #undef TARGET_C_MODE_FOR_SUFFIX
381 #define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix
382 #undef TARGET_ASM_FUNCTION_SECTION
383 #define TARGET_ASM_FUNCTION_SECTION pa_function_section
385 #undef TARGET_LEGITIMATE_CONSTANT_P
386 #define TARGET_LEGITIMATE_CONSTANT_P pa_legitimate_constant_p
387 #undef TARGET_SECTION_TYPE_FLAGS
388 #define TARGET_SECTION_TYPE_FLAGS pa_section_type_flags
390 struct gcc_target targetm = TARGET_INITIALIZER;
392 /* Parse the -mfixed-range= option string. */
394 static void
395 fix_range (const char *const_str)
397 int i, first, last;
398 char *str, *dash, *comma;
400 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
401 REG2 are either register names or register numbers. The effect
402 of this option is to mark the registers in the range from REG1 to
403 REG2 as ``fixed'' so they won't be used by the compiler. This is
404 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */
406 i = strlen (const_str);
407 str = (char *) alloca (i + 1);
408 memcpy (str, const_str, i + 1);
410 while (1)
412 dash = strchr (str, '-');
413 if (!dash)
415 warning (0, "value of -mfixed-range must have form REG1-REG2");
416 return;
418 *dash = '\0';
420 comma = strchr (dash + 1, ',');
421 if (comma)
422 *comma = '\0';
424 first = decode_reg_name (str);
425 if (first < 0)
427 warning (0, "unknown register name: %s", str);
428 return;
431 last = decode_reg_name (dash + 1);
432 if (last < 0)
434 warning (0, "unknown register name: %s", dash + 1);
435 return;
438 *dash = '-';
440 if (first > last)
442 warning (0, "%s-%s is an empty range", str, dash + 1);
443 return;
446 for (i = first; i <= last; ++i)
447 fixed_regs[i] = call_used_regs[i] = 1;
449 if (!comma)
450 break;
452 *comma = ',';
453 str = comma + 1;
456 /* Check if all floating point registers have been fixed. */
457 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
458 if (!fixed_regs[i])
459 break;
461 if (i > FP_REG_LAST)
462 target_flags |= MASK_DISABLE_FPREGS;
465 /* Implement the TARGET_OPTION_OVERRIDE hook. */
467 static void
468 pa_option_override (void)
470 unsigned int i;
471 cl_deferred_option *opt;
472 vec<cl_deferred_option> *v
473 = (vec<cl_deferred_option> *) pa_deferred_options;
475 if (v)
476 FOR_EACH_VEC_ELT (*v, i, opt)
478 switch (opt->opt_index)
480 case OPT_mfixed_range_:
481 fix_range (opt->arg);
482 break;
484 default:
485 gcc_unreachable ();
489 /* Unconditional branches in the delay slot are not compatible with dwarf2
490 call frame information. There is no benefit in using this optimization
491 on PA8000 and later processors. */
492 if (pa_cpu >= PROCESSOR_8000
493 || (targetm_common.except_unwind_info (&global_options) == UI_DWARF2
494 && flag_exceptions)
495 || flag_unwind_tables)
496 target_flags &= ~MASK_JUMP_IN_DELAY;
498 if (flag_pic && TARGET_PORTABLE_RUNTIME)
500 warning (0, "PIC code generation is not supported in the portable runtime model");
503 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
505 warning (0, "PIC code generation is not compatible with fast indirect calls");
508 if (! TARGET_GAS && write_symbols != NO_DEBUG)
510 warning (0, "-g is only supported when using GAS on this processor,");
511 warning (0, "-g option disabled");
512 write_symbols = NO_DEBUG;
515 /* We only support the "big PIC" model now. And we always generate PIC
516 code when in 64bit mode. */
517 if (flag_pic == 1 || TARGET_64BIT)
518 flag_pic = 2;
520 /* Disable -freorder-blocks-and-partition as we don't support hot and
521 cold partitioning. */
522 if (flag_reorder_blocks_and_partition)
524 inform (input_location,
525 "-freorder-blocks-and-partition does not work "
526 "on this architecture");
527 flag_reorder_blocks_and_partition = 0;
528 flag_reorder_blocks = 1;
531 /* We can't guarantee that .dword is available for 32-bit targets. */
532 if (UNITS_PER_WORD == 4)
533 targetm.asm_out.aligned_op.di = NULL;
535 /* The unaligned ops are only available when using GAS. */
536 if (!TARGET_GAS)
538 targetm.asm_out.unaligned_op.hi = NULL;
539 targetm.asm_out.unaligned_op.si = NULL;
540 targetm.asm_out.unaligned_op.di = NULL;
543 init_machine_status = pa_init_machine_status;
546 enum pa_builtins
548 PA_BUILTIN_COPYSIGNQ,
549 PA_BUILTIN_FABSQ,
550 PA_BUILTIN_INFQ,
551 PA_BUILTIN_HUGE_VALQ,
552 PA_BUILTIN_max
555 static GTY(()) tree pa_builtins[(int) PA_BUILTIN_max];
557 static void
558 pa_init_builtins (void)
560 #ifdef DONT_HAVE_FPUTC_UNLOCKED
562 tree decl = builtin_decl_explicit (BUILT_IN_PUTC_UNLOCKED);
563 set_builtin_decl (BUILT_IN_FPUTC_UNLOCKED, decl,
564 builtin_decl_implicit_p (BUILT_IN_PUTC_UNLOCKED));
566 #endif
567 #if TARGET_HPUX_11
569 tree decl;
571 if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
572 set_user_assembler_name (decl, "_Isfinite");
573 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
574 set_user_assembler_name (decl, "_Isfinitef");
576 #endif
578 if (HPUX_LONG_DOUBLE_LIBRARY)
580 tree decl, ftype;
582 /* Under HPUX, the __float128 type is a synonym for "long double". */
583 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
584 "__float128");
586 /* TFmode support builtins. */
587 ftype = build_function_type_list (long_double_type_node,
588 long_double_type_node,
589 NULL_TREE);
590 decl = add_builtin_function ("__builtin_fabsq", ftype,
591 PA_BUILTIN_FABSQ, BUILT_IN_MD,
592 "_U_Qfabs", NULL_TREE);
593 TREE_READONLY (decl) = 1;
594 pa_builtins[PA_BUILTIN_FABSQ] = decl;
596 ftype = build_function_type_list (long_double_type_node,
597 long_double_type_node,
598 long_double_type_node,
599 NULL_TREE);
600 decl = add_builtin_function ("__builtin_copysignq", ftype,
601 PA_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
602 "_U_Qfcopysign", NULL_TREE);
603 TREE_READONLY (decl) = 1;
604 pa_builtins[PA_BUILTIN_COPYSIGNQ] = decl;
606 ftype = build_function_type_list (long_double_type_node, NULL_TREE);
607 decl = add_builtin_function ("__builtin_infq", ftype,
608 PA_BUILTIN_INFQ, BUILT_IN_MD,
609 NULL, NULL_TREE);
610 pa_builtins[PA_BUILTIN_INFQ] = decl;
612 decl = add_builtin_function ("__builtin_huge_valq", ftype,
613 PA_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
614 NULL, NULL_TREE);
615 pa_builtins[PA_BUILTIN_HUGE_VALQ] = decl;
619 static rtx
620 pa_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
621 enum machine_mode mode ATTRIBUTE_UNUSED,
622 int ignore ATTRIBUTE_UNUSED)
624 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
625 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
627 switch (fcode)
629 case PA_BUILTIN_FABSQ:
630 case PA_BUILTIN_COPYSIGNQ:
631 return expand_call (exp, target, ignore);
633 case PA_BUILTIN_INFQ:
634 case PA_BUILTIN_HUGE_VALQ:
636 enum machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
637 REAL_VALUE_TYPE inf;
638 rtx tmp;
640 real_inf (&inf);
641 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, target_mode);
643 tmp = validize_mem (force_const_mem (target_mode, tmp));
645 if (target == 0)
646 target = gen_reg_rtx (target_mode);
648 emit_move_insn (target, tmp);
649 return target;
652 default:
653 gcc_unreachable ();
656 return NULL_RTX;
659 /* Function to init struct machine_function.
660 This will be called, via a pointer variable,
661 from push_function_context. */
663 static struct machine_function *
664 pa_init_machine_status (void)
666 return ggc_alloc_cleared_machine_function ();
669 /* If FROM is a probable pointer register, mark TO as a probable
670 pointer register with the same pointer alignment as FROM. */
672 static void
673 copy_reg_pointer (rtx to, rtx from)
675 if (REG_POINTER (from))
676 mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
679 /* Return 1 if X contains a symbolic expression. We know these
680 expressions will have one of a few well defined forms, so
681 we need only check those forms. */
683 pa_symbolic_expression_p (rtx x)
686 /* Strip off any HIGH. */
687 if (GET_CODE (x) == HIGH)
688 x = XEXP (x, 0);
690 return (symbolic_operand (x, VOIDmode));
693 /* Accept any constant that can be moved in one instruction into a
694 general register. */
696 pa_cint_ok_for_move (HOST_WIDE_INT ival)
698 /* OK if ldo, ldil, or zdepi, can be used. */
699 return (VAL_14_BITS_P (ival)
700 || pa_ldil_cint_p (ival)
701 || pa_zdepi_cint_p (ival));
704 /* True iff ldil can be used to load this CONST_INT. The least
705 significant 11 bits of the value must be zero and the value must
706 not change sign when extended from 32 to 64 bits. */
708 pa_ldil_cint_p (HOST_WIDE_INT ival)
710 HOST_WIDE_INT x = ival & (((HOST_WIDE_INT) -1 << 31) | 0x7ff);
712 return x == 0 || x == ((HOST_WIDE_INT) -1 << 31);
715 /* True iff zdepi can be used to generate this CONST_INT.
716 zdepi first sign extends a 5-bit signed number to a given field
717 length, then places this field anywhere in a zero. */
719 pa_zdepi_cint_p (unsigned HOST_WIDE_INT x)
721 unsigned HOST_WIDE_INT lsb_mask, t;
723 /* This might not be obvious, but it's at least fast.
724 This function is critical; we don't have the time loops would take. */
725 lsb_mask = x & -x;
726 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
727 /* Return true iff t is a power of two. */
728 return ((t & (t - 1)) == 0);
731 /* True iff depi or extru can be used to compute (reg & mask).
732 Accept bit pattern like these:
733 0....01....1
734 1....10....0
735 1..10..01..1 */
737 pa_and_mask_p (unsigned HOST_WIDE_INT mask)
739 mask = ~mask;
740 mask += mask & -mask;
741 return (mask & (mask - 1)) == 0;
744 /* True iff depi can be used to compute (reg | MASK). */
746 pa_ior_mask_p (unsigned HOST_WIDE_INT mask)
748 mask += mask & -mask;
749 return (mask & (mask - 1)) == 0;
752 /* Legitimize PIC addresses. If the address is already
753 position-independent, we return ORIG. Newly generated
754 position-independent addresses go to REG. If we need more
755 than one register, we lose. */
757 static rtx
758 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
760 rtx pic_ref = orig;
762 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
764 /* Labels need special handling. */
765 if (pic_label_operand (orig, mode))
767 rtx insn;
769 /* We do not want to go through the movXX expanders here since that
770 would create recursion.
772 Nor do we really want to call a generator for a named pattern
773 since that requires multiple patterns if we want to support
774 multiple word sizes.
776 So instead we just emit the raw set, which avoids the movXX
777 expanders completely. */
778 mark_reg_pointer (reg, BITS_PER_UNIT);
779 insn = emit_insn (gen_rtx_SET (VOIDmode, reg, orig));
781 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
782 add_reg_note (insn, REG_EQUAL, orig);
784 /* During and after reload, we need to generate a REG_LABEL_OPERAND note
785 and update LABEL_NUSES because this is not done automatically. */
786 if (reload_in_progress || reload_completed)
788 /* Extract LABEL_REF. */
789 if (GET_CODE (orig) == CONST)
790 orig = XEXP (XEXP (orig, 0), 0);
791 /* Extract CODE_LABEL. */
792 orig = XEXP (orig, 0);
793 add_reg_note (insn, REG_LABEL_OPERAND, orig);
794 LABEL_NUSES (orig)++;
796 crtl->uses_pic_offset_table = 1;
797 return reg;
799 if (GET_CODE (orig) == SYMBOL_REF)
801 rtx insn, tmp_reg;
803 gcc_assert (reg);
805 /* Before reload, allocate a temporary register for the intermediate
806 result. This allows the sequence to be deleted when the final
807 result is unused and the insns are trivially dead. */
808 tmp_reg = ((reload_in_progress || reload_completed)
809 ? reg : gen_reg_rtx (Pmode));
811 if (function_label_operand (orig, VOIDmode))
813 /* Force function label into memory in word mode. */
814 orig = XEXP (force_const_mem (word_mode, orig), 0);
815 /* Load plabel address from DLT. */
816 emit_move_insn (tmp_reg,
817 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
818 gen_rtx_HIGH (word_mode, orig)));
819 pic_ref
820 = gen_const_mem (Pmode,
821 gen_rtx_LO_SUM (Pmode, tmp_reg,
822 gen_rtx_UNSPEC (Pmode,
823 gen_rtvec (1, orig),
824 UNSPEC_DLTIND14R)));
825 emit_move_insn (reg, pic_ref);
826 /* Now load address of function descriptor. */
827 pic_ref = gen_rtx_MEM (Pmode, reg);
829 else
831 /* Load symbol reference from DLT. */
832 emit_move_insn (tmp_reg,
833 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
834 gen_rtx_HIGH (word_mode, orig)));
835 pic_ref
836 = gen_const_mem (Pmode,
837 gen_rtx_LO_SUM (Pmode, tmp_reg,
838 gen_rtx_UNSPEC (Pmode,
839 gen_rtvec (1, orig),
840 UNSPEC_DLTIND14R)));
843 crtl->uses_pic_offset_table = 1;
844 mark_reg_pointer (reg, BITS_PER_UNIT);
845 insn = emit_move_insn (reg, pic_ref);
847 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
848 set_unique_reg_note (insn, REG_EQUAL, orig);
850 return reg;
852 else if (GET_CODE (orig) == CONST)
854 rtx base;
856 if (GET_CODE (XEXP (orig, 0)) == PLUS
857 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
858 return orig;
860 gcc_assert (reg);
861 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
863 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
864 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
865 base == reg ? 0 : reg);
867 if (GET_CODE (orig) == CONST_INT)
869 if (INT_14_BITS (orig))
870 return plus_constant (Pmode, base, INTVAL (orig));
871 orig = force_reg (Pmode, orig);
873 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
874 /* Likewise, should we set special REG_NOTEs here? */
877 return pic_ref;
880 static GTY(()) rtx gen_tls_tga;
882 static rtx
883 gen_tls_get_addr (void)
885 if (!gen_tls_tga)
886 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
887 return gen_tls_tga;
890 static rtx
891 hppa_tls_call (rtx arg)
893 rtx ret;
895 ret = gen_reg_rtx (Pmode);
896 emit_library_call_value (gen_tls_get_addr (), ret,
897 LCT_CONST, Pmode, 1, arg, Pmode);
899 return ret;
902 static rtx
903 legitimize_tls_address (rtx addr)
905 rtx ret, insn, tmp, t1, t2, tp;
906 enum tls_model model = SYMBOL_REF_TLS_MODEL (addr);
908 switch (model)
910 case TLS_MODEL_GLOBAL_DYNAMIC:
911 tmp = gen_reg_rtx (Pmode);
912 if (flag_pic)
913 emit_insn (gen_tgd_load_pic (tmp, addr));
914 else
915 emit_insn (gen_tgd_load (tmp, addr));
916 ret = hppa_tls_call (tmp);
917 break;
919 case TLS_MODEL_LOCAL_DYNAMIC:
920 ret = gen_reg_rtx (Pmode);
921 tmp = gen_reg_rtx (Pmode);
922 start_sequence ();
923 if (flag_pic)
924 emit_insn (gen_tld_load_pic (tmp, addr));
925 else
926 emit_insn (gen_tld_load (tmp, addr));
927 t1 = hppa_tls_call (tmp);
928 insn = get_insns ();
929 end_sequence ();
930 t2 = gen_reg_rtx (Pmode);
931 emit_libcall_block (insn, t2, t1,
932 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
933 UNSPEC_TLSLDBASE));
934 emit_insn (gen_tld_offset_load (ret, addr, t2));
935 break;
937 case TLS_MODEL_INITIAL_EXEC:
938 tp = gen_reg_rtx (Pmode);
939 tmp = gen_reg_rtx (Pmode);
940 ret = gen_reg_rtx (Pmode);
941 emit_insn (gen_tp_load (tp));
942 if (flag_pic)
943 emit_insn (gen_tie_load_pic (tmp, addr));
944 else
945 emit_insn (gen_tie_load (tmp, addr));
946 emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
947 break;
949 case TLS_MODEL_LOCAL_EXEC:
950 tp = gen_reg_rtx (Pmode);
951 ret = gen_reg_rtx (Pmode);
952 emit_insn (gen_tp_load (tp));
953 emit_insn (gen_tle_load (ret, addr, tp));
954 break;
956 default:
957 gcc_unreachable ();
960 return ret;
963 /* Try machine-dependent ways of modifying an illegitimate address
964 to be legitimate. If we find one, return the new, valid address.
965 This macro is used in only one place: `memory_address' in explow.c.
967 OLDX is the address as it was before break_out_memory_refs was called.
968 In some cases it is useful to look at this to decide what needs to be done.
970 It is always safe for this macro to do nothing. It exists to recognize
971 opportunities to optimize the output.
973 For the PA, transform:
975 memory(X + <large int>)
977 into:
979 if (<large int> & mask) >= 16
980 Y = (<large int> & ~mask) + mask + 1 Round up.
981 else
982 Y = (<large int> & ~mask) Round down.
983 Z = X + Y
984 memory (Z + (<large int> - Y));
986 This is for CSE to find several similar references, and only use one Z.
988 X can either be a SYMBOL_REF or REG, but because combine cannot
989 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
990 D will not fit in 14 bits.
992 MODE_FLOAT references allow displacements which fit in 5 bits, so use
993 0x1f as the mask.
995 MODE_INT references allow displacements which fit in 14 bits, so use
996 0x3fff as the mask.
998 This relies on the fact that most mode MODE_FLOAT references will use FP
999 registers and most mode MODE_INT references will use integer registers.
1000 (In the rare case of an FP register used in an integer MODE, we depend
1001 on secondary reloads to clean things up.)
1004 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
1005 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
1006 addressing modes to be used).
1008 Put X and Z into registers. Then put the entire expression into
1009 a register. */
1012 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1013 enum machine_mode mode)
1015 rtx orig = x;
1017 /* We need to canonicalize the order of operands in unscaled indexed
1018 addresses since the code that checks if an address is valid doesn't
1019 always try both orders. */
1020 if (!TARGET_NO_SPACE_REGS
1021 && GET_CODE (x) == PLUS
1022 && GET_MODE (x) == Pmode
1023 && REG_P (XEXP (x, 0))
1024 && REG_P (XEXP (x, 1))
1025 && REG_POINTER (XEXP (x, 0))
1026 && !REG_POINTER (XEXP (x, 1)))
1027 return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
1029 if (PA_SYMBOL_REF_TLS_P (x))
1030 return legitimize_tls_address (x);
1031 else if (flag_pic)
1032 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
1034 /* Strip off CONST. */
1035 if (GET_CODE (x) == CONST)
1036 x = XEXP (x, 0);
1038 /* Special case. Get the SYMBOL_REF into a register and use indexing.
1039 That should always be safe. */
1040 if (GET_CODE (x) == PLUS
1041 && GET_CODE (XEXP (x, 0)) == REG
1042 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
1044 rtx reg = force_reg (Pmode, XEXP (x, 1));
1045 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
1048 /* Note we must reject symbols which represent function addresses
1049 since the assembler/linker can't handle arithmetic on plabels. */
1050 if (GET_CODE (x) == PLUS
1051 && GET_CODE (XEXP (x, 1)) == CONST_INT
1052 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
1053 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
1054 || GET_CODE (XEXP (x, 0)) == REG))
1056 rtx int_part, ptr_reg;
1057 int newoffset;
1058 int offset = INTVAL (XEXP (x, 1));
1059 int mask;
1061 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
1062 ? (INT14_OK_STRICT ? 0x3fff : 0x1f) : 0x3fff);
1064 /* Choose which way to round the offset. Round up if we
1065 are >= halfway to the next boundary. */
1066 if ((offset & mask) >= ((mask + 1) / 2))
1067 newoffset = (offset & ~ mask) + mask + 1;
1068 else
1069 newoffset = (offset & ~ mask);
1071 /* If the newoffset will not fit in 14 bits (ldo), then
1072 handling this would take 4 or 5 instructions (2 to load
1073 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1074 add the new offset and the SYMBOL_REF.) Combine can
1075 not handle 4->2 or 5->2 combinations, so do not create
1076 them. */
1077 if (! VAL_14_BITS_P (newoffset)
1078 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1080 rtx const_part = plus_constant (Pmode, XEXP (x, 0), newoffset);
1081 rtx tmp_reg
1082 = force_reg (Pmode,
1083 gen_rtx_HIGH (Pmode, const_part));
1084 ptr_reg
1085 = force_reg (Pmode,
1086 gen_rtx_LO_SUM (Pmode,
1087 tmp_reg, const_part));
1089 else
1091 if (! VAL_14_BITS_P (newoffset))
1092 int_part = force_reg (Pmode, GEN_INT (newoffset));
1093 else
1094 int_part = GEN_INT (newoffset);
1096 ptr_reg = force_reg (Pmode,
1097 gen_rtx_PLUS (Pmode,
1098 force_reg (Pmode, XEXP (x, 0)),
1099 int_part));
1101 return plus_constant (Pmode, ptr_reg, offset - newoffset);
1104 /* Handle (plus (mult (a) (shadd_constant)) (b)). */
1106 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
1107 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1108 && pa_shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
1109 && (OBJECT_P (XEXP (x, 1))
1110 || GET_CODE (XEXP (x, 1)) == SUBREG)
1111 && GET_CODE (XEXP (x, 1)) != CONST)
1113 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1114 rtx reg1, reg2;
1116 reg1 = XEXP (x, 1);
1117 if (GET_CODE (reg1) != REG)
1118 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1120 reg2 = XEXP (XEXP (x, 0), 0);
1121 if (GET_CODE (reg2) != REG)
1122 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1124 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1125 gen_rtx_MULT (Pmode,
1126 reg2,
1127 GEN_INT (val)),
1128 reg1));
1131 /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
1133 Only do so for floating point modes since this is more speculative
1134 and we lose if it's an integer store. */
1135 if (GET_CODE (x) == PLUS
1136 && GET_CODE (XEXP (x, 0)) == PLUS
1137 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
1138 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
1139 && pa_shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
1140 && (mode == SFmode || mode == DFmode))
1143 /* First, try and figure out what to use as a base register. */
1144 rtx reg1, reg2, base, idx;
1146 reg1 = XEXP (XEXP (x, 0), 1);
1147 reg2 = XEXP (x, 1);
1148 base = NULL_RTX;
1149 idx = NULL_RTX;
1151 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1152 then pa_emit_move_sequence will turn on REG_POINTER so we'll know
1153 it's a base register below. */
1154 if (GET_CODE (reg1) != REG)
1155 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1157 if (GET_CODE (reg2) != REG)
1158 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1160 /* Figure out what the base and index are. */
1162 if (GET_CODE (reg1) == REG
1163 && REG_POINTER (reg1))
1165 base = reg1;
1166 idx = gen_rtx_PLUS (Pmode,
1167 gen_rtx_MULT (Pmode,
1168 XEXP (XEXP (XEXP (x, 0), 0), 0),
1169 XEXP (XEXP (XEXP (x, 0), 0), 1)),
1170 XEXP (x, 1));
1172 else if (GET_CODE (reg2) == REG
1173 && REG_POINTER (reg2))
1175 base = reg2;
1176 idx = XEXP (x, 0);
1179 if (base == 0)
1180 return orig;
1182 /* If the index adds a large constant, try to scale the
1183 constant so that it can be loaded with only one insn. */
1184 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1185 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1186 / INTVAL (XEXP (XEXP (idx, 0), 1)))
1187 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1189 /* Divide the CONST_INT by the scale factor, then add it to A. */
1190 int val = INTVAL (XEXP (idx, 1));
1192 val /= INTVAL (XEXP (XEXP (idx, 0), 1));
1193 reg1 = XEXP (XEXP (idx, 0), 0);
1194 if (GET_CODE (reg1) != REG)
1195 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1197 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1199 /* We can now generate a simple scaled indexed address. */
1200 return
1201 force_reg
1202 (Pmode, gen_rtx_PLUS (Pmode,
1203 gen_rtx_MULT (Pmode, reg1,
1204 XEXP (XEXP (idx, 0), 1)),
1205 base));
1208 /* If B + C is still a valid base register, then add them. */
1209 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1210 && INTVAL (XEXP (idx, 1)) <= 4096
1211 && INTVAL (XEXP (idx, 1)) >= -4096)
1213 int val = INTVAL (XEXP (XEXP (idx, 0), 1));
1214 rtx reg1, reg2;
1216 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1218 reg2 = XEXP (XEXP (idx, 0), 0);
1219 if (GET_CODE (reg2) != CONST_INT)
1220 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1222 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1223 gen_rtx_MULT (Pmode,
1224 reg2,
1225 GEN_INT (val)),
1226 reg1));
1229 /* Get the index into a register, then add the base + index and
1230 return a register holding the result. */
1232 /* First get A into a register. */
1233 reg1 = XEXP (XEXP (idx, 0), 0);
1234 if (GET_CODE (reg1) != REG)
1235 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1237 /* And get B into a register. */
1238 reg2 = XEXP (idx, 1);
1239 if (GET_CODE (reg2) != REG)
1240 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1242 reg1 = force_reg (Pmode,
1243 gen_rtx_PLUS (Pmode,
1244 gen_rtx_MULT (Pmode, reg1,
1245 XEXP (XEXP (idx, 0), 1)),
1246 reg2));
1248 /* Add the result to our base register and return. */
1249 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1253 /* Uh-oh. We might have an address for x[n-100000]. This needs
1254 special handling to avoid creating an indexed memory address
1255 with x-100000 as the base.
1257 If the constant part is small enough, then it's still safe because
1258 there is a guard page at the beginning and end of the data segment.
1260 Scaled references are common enough that we want to try and rearrange the
1261 terms so that we can use indexing for these addresses too. Only
1262 do the optimization for floatint point modes. */
1264 if (GET_CODE (x) == PLUS
1265 && pa_symbolic_expression_p (XEXP (x, 1)))
1267 /* Ugly. We modify things here so that the address offset specified
1268 by the index expression is computed first, then added to x to form
1269 the entire address. */
1271 rtx regx1, regx2, regy1, regy2, y;
1273 /* Strip off any CONST. */
1274 y = XEXP (x, 1);
1275 if (GET_CODE (y) == CONST)
1276 y = XEXP (y, 0);
1278 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1280 /* See if this looks like
1281 (plus (mult (reg) (shadd_const))
1282 (const (plus (symbol_ref) (const_int))))
1284 Where const_int is small. In that case the const
1285 expression is a valid pointer for indexing.
1287 If const_int is big, but can be divided evenly by shadd_const
1288 and added to (reg). This allows more scaled indexed addresses. */
1289 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1290 && GET_CODE (XEXP (x, 0)) == MULT
1291 && GET_CODE (XEXP (y, 1)) == CONST_INT
1292 && INTVAL (XEXP (y, 1)) >= -4096
1293 && INTVAL (XEXP (y, 1)) <= 4095
1294 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1295 && pa_shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1297 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1298 rtx reg1, reg2;
1300 reg1 = XEXP (x, 1);
1301 if (GET_CODE (reg1) != REG)
1302 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1304 reg2 = XEXP (XEXP (x, 0), 0);
1305 if (GET_CODE (reg2) != REG)
1306 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1308 return force_reg (Pmode,
1309 gen_rtx_PLUS (Pmode,
1310 gen_rtx_MULT (Pmode,
1311 reg2,
1312 GEN_INT (val)),
1313 reg1));
1315 else if ((mode == DFmode || mode == SFmode)
1316 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1317 && GET_CODE (XEXP (x, 0)) == MULT
1318 && GET_CODE (XEXP (y, 1)) == CONST_INT
1319 && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
1320 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1321 && pa_shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1323 regx1
1324 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1325 / INTVAL (XEXP (XEXP (x, 0), 1))));
1326 regx2 = XEXP (XEXP (x, 0), 0);
1327 if (GET_CODE (regx2) != REG)
1328 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1329 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1330 regx2, regx1));
1331 return
1332 force_reg (Pmode,
1333 gen_rtx_PLUS (Pmode,
1334 gen_rtx_MULT (Pmode, regx2,
1335 XEXP (XEXP (x, 0), 1)),
1336 force_reg (Pmode, XEXP (y, 0))));
1338 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1339 && INTVAL (XEXP (y, 1)) >= -4096
1340 && INTVAL (XEXP (y, 1)) <= 4095)
1342 /* This is safe because of the guard page at the
1343 beginning and end of the data space. Just
1344 return the original address. */
1345 return orig;
1347 else
1349 /* Doesn't look like one we can optimize. */
1350 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1351 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1352 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1353 regx1 = force_reg (Pmode,
1354 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1355 regx1, regy2));
1356 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1361 return orig;
1364 /* Implement the TARGET_REGISTER_MOVE_COST hook.
1366 Compute extra cost of moving data between one register class
1367 and another.
1369 Make moves from SAR so expensive they should never happen. We used to
1370 have 0xffff here, but that generates overflow in rare cases.
1372 Copies involving a FP register and a non-FP register are relatively
1373 expensive because they must go through memory.
1375 Other copies are reasonably cheap. */
1377 static int
1378 hppa_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
1379 reg_class_t from, reg_class_t to)
1381 if (from == SHIFT_REGS)
1382 return 0x100;
1383 else if (to == SHIFT_REGS && FP_REG_CLASS_P (from))
1384 return 18;
1385 else if ((FP_REG_CLASS_P (from) && ! FP_REG_CLASS_P (to))
1386 || (FP_REG_CLASS_P (to) && ! FP_REG_CLASS_P (from)))
1387 return 16;
1388 else
1389 return 2;
1392 /* For the HPPA, REG and REG+CONST is cost 0
1393 and addresses involving symbolic constants are cost 2.
1395 PIC addresses are very expensive.
1397 It is no coincidence that this has the same structure
1398 as GO_IF_LEGITIMATE_ADDRESS. */
1400 static int
1401 hppa_address_cost (rtx X, enum machine_mode mode ATTRIBUTE_UNUSED,
1402 addr_space_t as ATTRIBUTE_UNUSED,
1403 bool speed ATTRIBUTE_UNUSED)
1405 switch (GET_CODE (X))
1407 case REG:
1408 case PLUS:
1409 case LO_SUM:
1410 return 1;
1411 case HIGH:
1412 return 2;
1413 default:
1414 return 4;
1418 /* Compute a (partial) cost for rtx X. Return true if the complete
1419 cost has been computed, and false if subexpressions should be
1420 scanned. In either case, *TOTAL contains the cost result. */
1422 static bool
1423 hppa_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
1424 int *total, bool speed ATTRIBUTE_UNUSED)
1426 int factor;
1428 switch (code)
1430 case CONST_INT:
1431 if (INTVAL (x) == 0)
1432 *total = 0;
1433 else if (INT_14_BITS (x))
1434 *total = 1;
1435 else
1436 *total = 2;
1437 return true;
1439 case HIGH:
1440 *total = 2;
1441 return true;
1443 case CONST:
1444 case LABEL_REF:
1445 case SYMBOL_REF:
1446 *total = 4;
1447 return true;
1449 case CONST_DOUBLE:
1450 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1451 && outer_code != SET)
1452 *total = 0;
1453 else
1454 *total = 8;
1455 return true;
1457 case MULT:
1458 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1460 *total = COSTS_N_INSNS (3);
1461 return true;
1464 /* A mode size N times larger than SImode needs O(N*N) more insns. */
1465 factor = GET_MODE_SIZE (GET_MODE (x)) / 4;
1466 if (factor == 0)
1467 factor = 1;
1469 if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1470 *total = factor * factor * COSTS_N_INSNS (8);
1471 else
1472 *total = factor * factor * COSTS_N_INSNS (20);
1473 return true;
1475 case DIV:
1476 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1478 *total = COSTS_N_INSNS (14);
1479 return true;
1481 /* FALLTHRU */
1483 case UDIV:
1484 case MOD:
1485 case UMOD:
1486 /* A mode size N times larger than SImode needs O(N*N) more insns. */
1487 factor = GET_MODE_SIZE (GET_MODE (x)) / 4;
1488 if (factor == 0)
1489 factor = 1;
1491 *total = factor * factor * COSTS_N_INSNS (60);
1492 return true;
1494 case PLUS: /* this includes shNadd insns */
1495 case MINUS:
1496 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1498 *total = COSTS_N_INSNS (3);
1499 return true;
1502 /* A size N times larger than UNITS_PER_WORD needs N times as
1503 many insns, taking N times as long. */
1504 factor = GET_MODE_SIZE (GET_MODE (x)) / UNITS_PER_WORD;
1505 if (factor == 0)
1506 factor = 1;
1507 *total = factor * COSTS_N_INSNS (1);
1508 return true;
1510 case ASHIFT:
1511 case ASHIFTRT:
1512 case LSHIFTRT:
1513 *total = COSTS_N_INSNS (1);
1514 return true;
1516 default:
1517 return false;
1521 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1522 new rtx with the correct mode. */
1523 static inline rtx
1524 force_mode (enum machine_mode mode, rtx orig)
1526 if (mode == GET_MODE (orig))
1527 return orig;
1529 gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
1531 return gen_rtx_REG (mode, REGNO (orig));
1534 /* Return 1 if *X is a thread-local symbol. */
1536 static int
1537 pa_tls_symbol_ref_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
1539 return PA_SYMBOL_REF_TLS_P (*x);
1542 /* Return 1 if X contains a thread-local symbol. */
1544 bool
1545 pa_tls_referenced_p (rtx x)
1547 if (!TARGET_HAVE_TLS)
1548 return false;
1550 return for_each_rtx (&x, &pa_tls_symbol_ref_1, 0);
1553 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
1555 static bool
1556 pa_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1558 return pa_tls_referenced_p (x);
1561 /* Emit insns to move operands[1] into operands[0].
1563 Return 1 if we have written out everything that needs to be done to
1564 do the move. Otherwise, return 0 and the caller will emit the move
1565 normally.
1567 Note SCRATCH_REG may not be in the proper mode depending on how it
1568 will be used. This routine is responsible for creating a new copy
1569 of SCRATCH_REG in the proper mode. */
1572 pa_emit_move_sequence (rtx *operands, enum machine_mode mode, rtx scratch_reg)
1574 register rtx operand0 = operands[0];
1575 register rtx operand1 = operands[1];
1576 register rtx tem;
1578 /* We can only handle indexed addresses in the destination operand
1579 of floating point stores. Thus, we need to break out indexed
1580 addresses from the destination operand. */
1581 if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1583 gcc_assert (can_create_pseudo_p ());
1585 tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1586 operand0 = replace_equiv_address (operand0, tem);
1589 /* On targets with non-equivalent space registers, break out unscaled
1590 indexed addresses from the source operand before the final CSE.
1591 We have to do this because the REG_POINTER flag is not correctly
1592 carried through various optimization passes and CSE may substitute
1593 a pseudo without the pointer set for one with the pointer set. As
1594 a result, we loose various opportunities to create insns with
1595 unscaled indexed addresses. */
1596 if (!TARGET_NO_SPACE_REGS
1597 && !cse_not_expected
1598 && GET_CODE (operand1) == MEM
1599 && GET_CODE (XEXP (operand1, 0)) == PLUS
1600 && REG_P (XEXP (XEXP (operand1, 0), 0))
1601 && REG_P (XEXP (XEXP (operand1, 0), 1)))
1602 operand1
1603 = replace_equiv_address (operand1,
1604 copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1606 if (scratch_reg
1607 && reload_in_progress && GET_CODE (operand0) == REG
1608 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1609 operand0 = reg_equiv_mem (REGNO (operand0));
1610 else if (scratch_reg
1611 && reload_in_progress && GET_CODE (operand0) == SUBREG
1612 && GET_CODE (SUBREG_REG (operand0)) == REG
1613 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1615 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1616 the code which tracks sets/uses for delete_output_reload. */
1617 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1618 reg_equiv_mem (REGNO (SUBREG_REG (operand0))),
1619 SUBREG_BYTE (operand0));
1620 operand0 = alter_subreg (&temp, true);
1623 if (scratch_reg
1624 && reload_in_progress && GET_CODE (operand1) == REG
1625 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1626 operand1 = reg_equiv_mem (REGNO (operand1));
1627 else if (scratch_reg
1628 && reload_in_progress && GET_CODE (operand1) == SUBREG
1629 && GET_CODE (SUBREG_REG (operand1)) == REG
1630 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1632 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1633 the code which tracks sets/uses for delete_output_reload. */
1634 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1635 reg_equiv_mem (REGNO (SUBREG_REG (operand1))),
1636 SUBREG_BYTE (operand1));
1637 operand1 = alter_subreg (&temp, true);
1640 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1641 && ((tem = find_replacement (&XEXP (operand0, 0)))
1642 != XEXP (operand0, 0)))
1643 operand0 = replace_equiv_address (operand0, tem);
1645 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1646 && ((tem = find_replacement (&XEXP (operand1, 0)))
1647 != XEXP (operand1, 0)))
1648 operand1 = replace_equiv_address (operand1, tem);
1650 /* Handle secondary reloads for loads/stores of FP registers from
1651 REG+D addresses where D does not fit in 5 or 14 bits, including
1652 (subreg (mem (addr))) cases. */
1653 if (scratch_reg
1654 && fp_reg_operand (operand0, mode)
1655 && ((GET_CODE (operand1) == MEM
1656 && !memory_address_p ((GET_MODE_SIZE (mode) == 4 ? SFmode : DFmode),
1657 XEXP (operand1, 0)))
1658 || ((GET_CODE (operand1) == SUBREG
1659 && GET_CODE (XEXP (operand1, 0)) == MEM
1660 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1661 ? SFmode : DFmode),
1662 XEXP (XEXP (operand1, 0), 0))))))
1664 if (GET_CODE (operand1) == SUBREG)
1665 operand1 = XEXP (operand1, 0);
1667 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1668 it in WORD_MODE regardless of what mode it was originally given
1669 to us. */
1670 scratch_reg = force_mode (word_mode, scratch_reg);
1672 /* D might not fit in 14 bits either; for such cases load D into
1673 scratch reg. */
1674 if (!memory_address_p (Pmode, XEXP (operand1, 0)))
1676 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1677 emit_move_insn (scratch_reg,
1678 gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
1679 Pmode,
1680 XEXP (XEXP (operand1, 0), 0),
1681 scratch_reg));
1683 else
1684 emit_move_insn (scratch_reg, XEXP (operand1, 0));
1685 emit_insn (gen_rtx_SET (VOIDmode, operand0,
1686 replace_equiv_address (operand1, scratch_reg)));
1687 return 1;
1689 else if (scratch_reg
1690 && fp_reg_operand (operand1, mode)
1691 && ((GET_CODE (operand0) == MEM
1692 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1693 ? SFmode : DFmode),
1694 XEXP (operand0, 0)))
1695 || ((GET_CODE (operand0) == SUBREG)
1696 && GET_CODE (XEXP (operand0, 0)) == MEM
1697 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1698 ? SFmode : DFmode),
1699 XEXP (XEXP (operand0, 0), 0)))))
1701 if (GET_CODE (operand0) == SUBREG)
1702 operand0 = XEXP (operand0, 0);
1704 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1705 it in WORD_MODE regardless of what mode it was originally given
1706 to us. */
1707 scratch_reg = force_mode (word_mode, scratch_reg);
1709 /* D might not fit in 14 bits either; for such cases load D into
1710 scratch reg. */
1711 if (!memory_address_p (Pmode, XEXP (operand0, 0)))
1713 emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
1714 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0,
1715 0)),
1716 Pmode,
1717 XEXP (XEXP (operand0, 0),
1719 scratch_reg));
1721 else
1722 emit_move_insn (scratch_reg, XEXP (operand0, 0));
1723 emit_insn (gen_rtx_SET (VOIDmode,
1724 replace_equiv_address (operand0, scratch_reg),
1725 operand1));
1726 return 1;
1728 /* Handle secondary reloads for loads of FP registers from constant
1729 expressions by forcing the constant into memory.
1731 Use scratch_reg to hold the address of the memory location.
1733 The proper fix is to change TARGET_PREFERRED_RELOAD_CLASS to return
1734 NO_REGS when presented with a const_int and a register class
1735 containing only FP registers. Doing so unfortunately creates
1736 more problems than it solves. Fix this for 2.5. */
1737 else if (scratch_reg
1738 && CONSTANT_P (operand1)
1739 && fp_reg_operand (operand0, mode))
1741 rtx const_mem, xoperands[2];
1743 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1744 it in WORD_MODE regardless of what mode it was originally given
1745 to us. */
1746 scratch_reg = force_mode (word_mode, scratch_reg);
1748 /* Force the constant into memory and put the address of the
1749 memory location into scratch_reg. */
1750 const_mem = force_const_mem (mode, operand1);
1751 xoperands[0] = scratch_reg;
1752 xoperands[1] = XEXP (const_mem, 0);
1753 pa_emit_move_sequence (xoperands, Pmode, 0);
1755 /* Now load the destination register. */
1756 emit_insn (gen_rtx_SET (mode, operand0,
1757 replace_equiv_address (const_mem, scratch_reg)));
1758 return 1;
1760 /* Handle secondary reloads for SAR. These occur when trying to load
1761 the SAR from memory or a constant. */
1762 else if (scratch_reg
1763 && GET_CODE (operand0) == REG
1764 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1765 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1766 && (GET_CODE (operand1) == MEM || GET_CODE (operand1) == CONST_INT))
1768 /* D might not fit in 14 bits either; for such cases load D into
1769 scratch reg. */
1770 if (GET_CODE (operand1) == MEM
1771 && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0)))
1773 /* We are reloading the address into the scratch register, so we
1774 want to make sure the scratch register is a full register. */
1775 scratch_reg = force_mode (word_mode, scratch_reg);
1777 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1778 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1779 0)),
1780 Pmode,
1781 XEXP (XEXP (operand1, 0),
1783 scratch_reg));
1785 /* Now we are going to load the scratch register from memory,
1786 we want to load it in the same width as the original MEM,
1787 which must be the same as the width of the ultimate destination,
1788 OPERAND0. */
1789 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1791 emit_move_insn (scratch_reg,
1792 replace_equiv_address (operand1, scratch_reg));
1794 else
1796 /* We want to load the scratch register using the same mode as
1797 the ultimate destination. */
1798 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1800 emit_move_insn (scratch_reg, operand1);
1803 /* And emit the insn to set the ultimate destination. We know that
1804 the scratch register has the same mode as the destination at this
1805 point. */
1806 emit_move_insn (operand0, scratch_reg);
1807 return 1;
1809 /* Handle the most common case: storing into a register. */
1810 else if (register_operand (operand0, mode))
1812 /* Legitimize TLS symbol references. This happens for references
1813 that aren't a legitimate constant. */
1814 if (PA_SYMBOL_REF_TLS_P (operand1))
1815 operand1 = legitimize_tls_address (operand1);
1817 if (register_operand (operand1, mode)
1818 || (GET_CODE (operand1) == CONST_INT
1819 && pa_cint_ok_for_move (INTVAL (operand1)))
1820 || (operand1 == CONST0_RTX (mode))
1821 || (GET_CODE (operand1) == HIGH
1822 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1823 /* Only `general_operands' can come here, so MEM is ok. */
1824 || GET_CODE (operand1) == MEM)
1826 /* Various sets are created during RTL generation which don't
1827 have the REG_POINTER flag correctly set. After the CSE pass,
1828 instruction recognition can fail if we don't consistently
1829 set this flag when performing register copies. This should
1830 also improve the opportunities for creating insns that use
1831 unscaled indexing. */
1832 if (REG_P (operand0) && REG_P (operand1))
1834 if (REG_POINTER (operand1)
1835 && !REG_POINTER (operand0)
1836 && !HARD_REGISTER_P (operand0))
1837 copy_reg_pointer (operand0, operand1);
1840 /* When MEMs are broken out, the REG_POINTER flag doesn't
1841 get set. In some cases, we can set the REG_POINTER flag
1842 from the declaration for the MEM. */
1843 if (REG_P (operand0)
1844 && GET_CODE (operand1) == MEM
1845 && !REG_POINTER (operand0))
1847 tree decl = MEM_EXPR (operand1);
1849 /* Set the register pointer flag and register alignment
1850 if the declaration for this memory reference is a
1851 pointer type. */
1852 if (decl)
1854 tree type;
1856 /* If this is a COMPONENT_REF, use the FIELD_DECL from
1857 tree operand 1. */
1858 if (TREE_CODE (decl) == COMPONENT_REF)
1859 decl = TREE_OPERAND (decl, 1);
1861 type = TREE_TYPE (decl);
1862 type = strip_array_types (type);
1864 if (POINTER_TYPE_P (type))
1866 int align;
1868 type = TREE_TYPE (type);
1869 /* Using TYPE_ALIGN_OK is rather conservative as
1870 only the ada frontend actually sets it. */
1871 align = (TYPE_ALIGN_OK (type) ? TYPE_ALIGN (type)
1872 : BITS_PER_UNIT);
1873 mark_reg_pointer (operand0, align);
1878 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1879 return 1;
1882 else if (GET_CODE (operand0) == MEM)
1884 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1885 && !(reload_in_progress || reload_completed))
1887 rtx temp = gen_reg_rtx (DFmode);
1889 emit_insn (gen_rtx_SET (VOIDmode, temp, operand1));
1890 emit_insn (gen_rtx_SET (VOIDmode, operand0, temp));
1891 return 1;
1893 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1895 /* Run this case quickly. */
1896 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1897 return 1;
1899 if (! (reload_in_progress || reload_completed))
1901 operands[0] = validize_mem (operand0);
1902 operands[1] = operand1 = force_reg (mode, operand1);
1906 /* Simplify the source if we need to.
1907 Note we do have to handle function labels here, even though we do
1908 not consider them legitimate constants. Loop optimizations can
1909 call the emit_move_xxx with one as a source. */
1910 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1911 || function_label_operand (operand1, VOIDmode)
1912 || (GET_CODE (operand1) == HIGH
1913 && symbolic_operand (XEXP (operand1, 0), mode)))
1915 int ishighonly = 0;
1917 if (GET_CODE (operand1) == HIGH)
1919 ishighonly = 1;
1920 operand1 = XEXP (operand1, 0);
1922 if (symbolic_operand (operand1, mode))
1924 /* Argh. The assembler and linker can't handle arithmetic
1925 involving plabels.
1927 So we force the plabel into memory, load operand0 from
1928 the memory location, then add in the constant part. */
1929 if ((GET_CODE (operand1) == CONST
1930 && GET_CODE (XEXP (operand1, 0)) == PLUS
1931 && function_label_operand (XEXP (XEXP (operand1, 0), 0),
1932 VOIDmode))
1933 || function_label_operand (operand1, VOIDmode))
1935 rtx temp, const_part;
1937 /* Figure out what (if any) scratch register to use. */
1938 if (reload_in_progress || reload_completed)
1940 scratch_reg = scratch_reg ? scratch_reg : operand0;
1941 /* SCRATCH_REG will hold an address and maybe the actual
1942 data. We want it in WORD_MODE regardless of what mode it
1943 was originally given to us. */
1944 scratch_reg = force_mode (word_mode, scratch_reg);
1946 else if (flag_pic)
1947 scratch_reg = gen_reg_rtx (Pmode);
1949 if (GET_CODE (operand1) == CONST)
1951 /* Save away the constant part of the expression. */
1952 const_part = XEXP (XEXP (operand1, 0), 1);
1953 gcc_assert (GET_CODE (const_part) == CONST_INT);
1955 /* Force the function label into memory. */
1956 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
1958 else
1960 /* No constant part. */
1961 const_part = NULL_RTX;
1963 /* Force the function label into memory. */
1964 temp = force_const_mem (mode, operand1);
1968 /* Get the address of the memory location. PIC-ify it if
1969 necessary. */
1970 temp = XEXP (temp, 0);
1971 if (flag_pic)
1972 temp = legitimize_pic_address (temp, mode, scratch_reg);
1974 /* Put the address of the memory location into our destination
1975 register. */
1976 operands[1] = temp;
1977 pa_emit_move_sequence (operands, mode, scratch_reg);
1979 /* Now load from the memory location into our destination
1980 register. */
1981 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
1982 pa_emit_move_sequence (operands, mode, scratch_reg);
1984 /* And add back in the constant part. */
1985 if (const_part != NULL_RTX)
1986 expand_inc (operand0, const_part);
1988 return 1;
1991 if (flag_pic)
1993 rtx temp;
1995 if (reload_in_progress || reload_completed)
1997 temp = scratch_reg ? scratch_reg : operand0;
1998 /* TEMP will hold an address and maybe the actual
1999 data. We want it in WORD_MODE regardless of what mode it
2000 was originally given to us. */
2001 temp = force_mode (word_mode, temp);
2003 else
2004 temp = gen_reg_rtx (Pmode);
2006 /* (const (plus (symbol) (const_int))) must be forced to
2007 memory during/after reload if the const_int will not fit
2008 in 14 bits. */
2009 if (GET_CODE (operand1) == CONST
2010 && GET_CODE (XEXP (operand1, 0)) == PLUS
2011 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
2012 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
2013 && (reload_completed || reload_in_progress)
2014 && flag_pic)
2016 rtx const_mem = force_const_mem (mode, operand1);
2017 operands[1] = legitimize_pic_address (XEXP (const_mem, 0),
2018 mode, temp);
2019 operands[1] = replace_equiv_address (const_mem, operands[1]);
2020 pa_emit_move_sequence (operands, mode, temp);
2022 else
2024 operands[1] = legitimize_pic_address (operand1, mode, temp);
2025 if (REG_P (operand0) && REG_P (operands[1]))
2026 copy_reg_pointer (operand0, operands[1]);
2027 emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1]));
2030 /* On the HPPA, references to data space are supposed to use dp,
2031 register 27, but showing it in the RTL inhibits various cse
2032 and loop optimizations. */
2033 else
2035 rtx temp, set;
2037 if (reload_in_progress || reload_completed)
2039 temp = scratch_reg ? scratch_reg : operand0;
2040 /* TEMP will hold an address and maybe the actual
2041 data. We want it in WORD_MODE regardless of what mode it
2042 was originally given to us. */
2043 temp = force_mode (word_mode, temp);
2045 else
2046 temp = gen_reg_rtx (mode);
2048 /* Loading a SYMBOL_REF into a register makes that register
2049 safe to be used as the base in an indexed address.
2051 Don't mark hard registers though. That loses. */
2052 if (GET_CODE (operand0) == REG
2053 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
2054 mark_reg_pointer (operand0, BITS_PER_UNIT);
2055 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
2056 mark_reg_pointer (temp, BITS_PER_UNIT);
2058 if (ishighonly)
2059 set = gen_rtx_SET (mode, operand0, temp);
2060 else
2061 set = gen_rtx_SET (VOIDmode,
2062 operand0,
2063 gen_rtx_LO_SUM (mode, temp, operand1));
2065 emit_insn (gen_rtx_SET (VOIDmode,
2066 temp,
2067 gen_rtx_HIGH (mode, operand1)));
2068 emit_insn (set);
2071 return 1;
2073 else if (pa_tls_referenced_p (operand1))
2075 rtx tmp = operand1;
2076 rtx addend = NULL;
2078 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
2080 addend = XEXP (XEXP (tmp, 0), 1);
2081 tmp = XEXP (XEXP (tmp, 0), 0);
2084 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
2085 tmp = legitimize_tls_address (tmp);
2086 if (addend)
2088 tmp = gen_rtx_PLUS (mode, tmp, addend);
2089 tmp = force_operand (tmp, operands[0]);
2091 operands[1] = tmp;
2093 else if (GET_CODE (operand1) != CONST_INT
2094 || !pa_cint_ok_for_move (INTVAL (operand1)))
2096 rtx insn, temp;
2097 rtx op1 = operand1;
2098 HOST_WIDE_INT value = 0;
2099 HOST_WIDE_INT insv = 0;
2100 int insert = 0;
2102 if (GET_CODE (operand1) == CONST_INT)
2103 value = INTVAL (operand1);
2105 if (TARGET_64BIT
2106 && GET_CODE (operand1) == CONST_INT
2107 && HOST_BITS_PER_WIDE_INT > 32
2108 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
2110 HOST_WIDE_INT nval;
2112 /* Extract the low order 32 bits of the value and sign extend.
2113 If the new value is the same as the original value, we can
2114 can use the original value as-is. If the new value is
2115 different, we use it and insert the most-significant 32-bits
2116 of the original value into the final result. */
2117 nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
2118 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
2119 if (value != nval)
2121 #if HOST_BITS_PER_WIDE_INT > 32
2122 insv = value >= 0 ? value >> 32 : ~(~value >> 32);
2123 #endif
2124 insert = 1;
2125 value = nval;
2126 operand1 = GEN_INT (nval);
2130 if (reload_in_progress || reload_completed)
2131 temp = scratch_reg ? scratch_reg : operand0;
2132 else
2133 temp = gen_reg_rtx (mode);
2135 /* We don't directly split DImode constants on 32-bit targets
2136 because PLUS uses an 11-bit immediate and the insn sequence
2137 generated is not as efficient as the one using HIGH/LO_SUM. */
2138 if (GET_CODE (operand1) == CONST_INT
2139 && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
2140 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
2141 && !insert)
2143 /* Directly break constant into high and low parts. This
2144 provides better optimization opportunities because various
2145 passes recognize constants split with PLUS but not LO_SUM.
2146 We use a 14-bit signed low part except when the addition
2147 of 0x4000 to the high part might change the sign of the
2148 high part. */
2149 HOST_WIDE_INT low = value & 0x3fff;
2150 HOST_WIDE_INT high = value & ~ 0x3fff;
2152 if (low >= 0x2000)
2154 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
2155 high += 0x2000;
2156 else
2157 high += 0x4000;
2160 low = value - high;
2162 emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (high)));
2163 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
2165 else
2167 emit_insn (gen_rtx_SET (VOIDmode, temp,
2168 gen_rtx_HIGH (mode, operand1)));
2169 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
2172 insn = emit_move_insn (operands[0], operands[1]);
2174 /* Now insert the most significant 32 bits of the value
2175 into the register. When we don't have a second register
2176 available, it could take up to nine instructions to load
2177 a 64-bit integer constant. Prior to reload, we force
2178 constants that would take more than three instructions
2179 to load to the constant pool. During and after reload,
2180 we have to handle all possible values. */
2181 if (insert)
2183 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2184 register and the value to be inserted is outside the
2185 range that can be loaded with three depdi instructions. */
2186 if (temp != operand0 && (insv >= 16384 || insv < -16384))
2188 operand1 = GEN_INT (insv);
2190 emit_insn (gen_rtx_SET (VOIDmode, temp,
2191 gen_rtx_HIGH (mode, operand1)));
2192 emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
2193 emit_insn (gen_insv (operand0, GEN_INT (32),
2194 const0_rtx, temp));
2196 else
2198 int len = 5, pos = 27;
2200 /* Insert the bits using the depdi instruction. */
2201 while (pos >= 0)
2203 HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2204 HOST_WIDE_INT sign = v5 < 0;
2206 /* Left extend the insertion. */
2207 insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2208 while (pos > 0 && (insv & 1) == sign)
2210 insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2211 len += 1;
2212 pos -= 1;
2215 emit_insn (gen_insv (operand0, GEN_INT (len),
2216 GEN_INT (pos), GEN_INT (v5)));
2218 len = pos > 0 && pos < 5 ? pos : 5;
2219 pos -= len;
2224 set_unique_reg_note (insn, REG_EQUAL, op1);
2226 return 1;
2229 /* Now have insn-emit do whatever it normally does. */
2230 return 0;
2233 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2234 it will need a link/runtime reloc). */
2237 pa_reloc_needed (tree exp)
2239 int reloc = 0;
2241 switch (TREE_CODE (exp))
2243 case ADDR_EXPR:
2244 return 1;
2246 case POINTER_PLUS_EXPR:
2247 case PLUS_EXPR:
2248 case MINUS_EXPR:
2249 reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2250 reloc |= pa_reloc_needed (TREE_OPERAND (exp, 1));
2251 break;
2253 CASE_CONVERT:
2254 case NON_LVALUE_EXPR:
2255 reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2256 break;
2258 case CONSTRUCTOR:
2260 tree value;
2261 unsigned HOST_WIDE_INT ix;
2263 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2264 if (value)
2265 reloc |= pa_reloc_needed (value);
2267 break;
2269 case ERROR_MARK:
2270 break;
2272 default:
2273 break;
2275 return reloc;
2279 /* Return the best assembler insn template
2280 for moving operands[1] into operands[0] as a fullword. */
2281 const char *
2282 pa_singlemove_string (rtx *operands)
2284 HOST_WIDE_INT intval;
2286 if (GET_CODE (operands[0]) == MEM)
2287 return "stw %r1,%0";
2288 if (GET_CODE (operands[1]) == MEM)
2289 return "ldw %1,%0";
2290 if (GET_CODE (operands[1]) == CONST_DOUBLE)
2292 long i;
2293 REAL_VALUE_TYPE d;
2295 gcc_assert (GET_MODE (operands[1]) == SFmode);
2297 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2298 bit pattern. */
2299 REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
2300 REAL_VALUE_TO_TARGET_SINGLE (d, i);
2302 operands[1] = GEN_INT (i);
2303 /* Fall through to CONST_INT case. */
2305 if (GET_CODE (operands[1]) == CONST_INT)
2307 intval = INTVAL (operands[1]);
2309 if (VAL_14_BITS_P (intval))
2310 return "ldi %1,%0";
2311 else if ((intval & 0x7ff) == 0)
2312 return "ldil L'%1,%0";
2313 else if (pa_zdepi_cint_p (intval))
2314 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2315 else
2316 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2318 return "copy %1,%0";
2322 /* Compute position (in OP[1]) and width (in OP[2])
2323 useful for copying IMM to a register using the zdepi
2324 instructions. Store the immediate value to insert in OP[0]. */
2325 static void
2326 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2328 int lsb, len;
2330 /* Find the least significant set bit in IMM. */
2331 for (lsb = 0; lsb < 32; lsb++)
2333 if ((imm & 1) != 0)
2334 break;
2335 imm >>= 1;
2338 /* Choose variants based on *sign* of the 5-bit field. */
2339 if ((imm & 0x10) == 0)
2340 len = (lsb <= 28) ? 4 : 32 - lsb;
2341 else
2343 /* Find the width of the bitstring in IMM. */
2344 for (len = 5; len < 32 - lsb; len++)
2346 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2347 break;
2350 /* Sign extend IMM as a 5-bit value. */
2351 imm = (imm & 0xf) - 0x10;
2354 op[0] = imm;
2355 op[1] = 31 - lsb;
2356 op[2] = len;
2359 /* Compute position (in OP[1]) and width (in OP[2])
2360 useful for copying IMM to a register using the depdi,z
2361 instructions. Store the immediate value to insert in OP[0]. */
2363 static void
2364 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2366 int lsb, len, maxlen;
2368 maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64);
2370 /* Find the least significant set bit in IMM. */
2371 for (lsb = 0; lsb < maxlen; lsb++)
2373 if ((imm & 1) != 0)
2374 break;
2375 imm >>= 1;
2378 /* Choose variants based on *sign* of the 5-bit field. */
2379 if ((imm & 0x10) == 0)
2380 len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb;
2381 else
2383 /* Find the width of the bitstring in IMM. */
2384 for (len = 5; len < maxlen - lsb; len++)
2386 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2387 break;
2390 /* Extend length if host is narrow and IMM is negative. */
2391 if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb)
2392 len += 32;
2394 /* Sign extend IMM as a 5-bit value. */
2395 imm = (imm & 0xf) - 0x10;
2398 op[0] = imm;
2399 op[1] = 63 - lsb;
2400 op[2] = len;
2403 /* Output assembler code to perform a doubleword move insn
2404 with operands OPERANDS. */
2406 const char *
2407 pa_output_move_double (rtx *operands)
2409 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2410 rtx latehalf[2];
2411 rtx addreg0 = 0, addreg1 = 0;
2413 /* First classify both operands. */
2415 if (REG_P (operands[0]))
2416 optype0 = REGOP;
2417 else if (offsettable_memref_p (operands[0]))
2418 optype0 = OFFSOP;
2419 else if (GET_CODE (operands[0]) == MEM)
2420 optype0 = MEMOP;
2421 else
2422 optype0 = RNDOP;
2424 if (REG_P (operands[1]))
2425 optype1 = REGOP;
2426 else if (CONSTANT_P (operands[1]))
2427 optype1 = CNSTOP;
2428 else if (offsettable_memref_p (operands[1]))
2429 optype1 = OFFSOP;
2430 else if (GET_CODE (operands[1]) == MEM)
2431 optype1 = MEMOP;
2432 else
2433 optype1 = RNDOP;
2435 /* Check for the cases that the operand constraints are not
2436 supposed to allow to happen. */
2437 gcc_assert (optype0 == REGOP || optype1 == REGOP);
2439 /* Handle copies between general and floating registers. */
2441 if (optype0 == REGOP && optype1 == REGOP
2442 && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2444 if (FP_REG_P (operands[0]))
2446 output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2447 output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2448 return "{fldds|fldd} -16(%%sp),%0";
2450 else
2452 output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2453 output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2454 return "{ldws|ldw} -12(%%sp),%R0";
2458 /* Handle auto decrementing and incrementing loads and stores
2459 specifically, since the structure of the function doesn't work
2460 for them without major modification. Do it better when we learn
2461 this port about the general inc/dec addressing of PA.
2462 (This was written by tege. Chide him if it doesn't work.) */
2464 if (optype0 == MEMOP)
2466 /* We have to output the address syntax ourselves, since print_operand
2467 doesn't deal with the addresses we want to use. Fix this later. */
2469 rtx addr = XEXP (operands[0], 0);
2470 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2472 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2474 operands[0] = XEXP (addr, 0);
2475 gcc_assert (GET_CODE (operands[1]) == REG
2476 && GET_CODE (operands[0]) == REG);
2478 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2480 /* No overlap between high target register and address
2481 register. (We do this in a non-obvious way to
2482 save a register file writeback) */
2483 if (GET_CODE (addr) == POST_INC)
2484 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2485 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2487 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2489 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2491 operands[0] = XEXP (addr, 0);
2492 gcc_assert (GET_CODE (operands[1]) == REG
2493 && GET_CODE (operands[0]) == REG);
2495 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2496 /* No overlap between high target register and address
2497 register. (We do this in a non-obvious way to save a
2498 register file writeback) */
2499 if (GET_CODE (addr) == PRE_INC)
2500 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2501 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2504 if (optype1 == MEMOP)
2506 /* We have to output the address syntax ourselves, since print_operand
2507 doesn't deal with the addresses we want to use. Fix this later. */
2509 rtx addr = XEXP (operands[1], 0);
2510 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2512 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2514 operands[1] = XEXP (addr, 0);
2515 gcc_assert (GET_CODE (operands[0]) == REG
2516 && GET_CODE (operands[1]) == REG);
2518 if (!reg_overlap_mentioned_p (high_reg, addr))
2520 /* No overlap between high target register and address
2521 register. (We do this in a non-obvious way to
2522 save a register file writeback) */
2523 if (GET_CODE (addr) == POST_INC)
2524 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2525 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2527 else
2529 /* This is an undefined situation. We should load into the
2530 address register *and* update that register. Probably
2531 we don't need to handle this at all. */
2532 if (GET_CODE (addr) == POST_INC)
2533 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2534 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2537 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2539 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2541 operands[1] = XEXP (addr, 0);
2542 gcc_assert (GET_CODE (operands[0]) == REG
2543 && GET_CODE (operands[1]) == REG);
2545 if (!reg_overlap_mentioned_p (high_reg, addr))
2547 /* No overlap between high target register and address
2548 register. (We do this in a non-obvious way to
2549 save a register file writeback) */
2550 if (GET_CODE (addr) == PRE_INC)
2551 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2552 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2554 else
2556 /* This is an undefined situation. We should load into the
2557 address register *and* update that register. Probably
2558 we don't need to handle this at all. */
2559 if (GET_CODE (addr) == PRE_INC)
2560 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2561 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2564 else if (GET_CODE (addr) == PLUS
2565 && GET_CODE (XEXP (addr, 0)) == MULT)
2567 rtx xoperands[4];
2568 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2570 if (!reg_overlap_mentioned_p (high_reg, addr))
2572 xoperands[0] = high_reg;
2573 xoperands[1] = XEXP (addr, 1);
2574 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2575 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2576 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2577 xoperands);
2578 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2580 else
2582 xoperands[0] = high_reg;
2583 xoperands[1] = XEXP (addr, 1);
2584 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2585 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2586 output_asm_insn ("{sh%O3addl %2,%1,%R0|shladd,l %2,%O3,%1,%R0}",
2587 xoperands);
2588 return "ldw 0(%R0),%0\n\tldw 4(%R0),%R0";
2593 /* If an operand is an unoffsettable memory ref, find a register
2594 we can increment temporarily to make it refer to the second word. */
2596 if (optype0 == MEMOP)
2597 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2599 if (optype1 == MEMOP)
2600 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2602 /* Ok, we can do one word at a time.
2603 Normally we do the low-numbered word first.
2605 In either case, set up in LATEHALF the operands to use
2606 for the high-numbered word and in some cases alter the
2607 operands in OPERANDS to be suitable for the low-numbered word. */
2609 if (optype0 == REGOP)
2610 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2611 else if (optype0 == OFFSOP)
2612 latehalf[0] = adjust_address (operands[0], SImode, 4);
2613 else
2614 latehalf[0] = operands[0];
2616 if (optype1 == REGOP)
2617 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2618 else if (optype1 == OFFSOP)
2619 latehalf[1] = adjust_address (operands[1], SImode, 4);
2620 else if (optype1 == CNSTOP)
2621 split_double (operands[1], &operands[1], &latehalf[1]);
2622 else
2623 latehalf[1] = operands[1];
2625 /* If the first move would clobber the source of the second one,
2626 do them in the other order.
2628 This can happen in two cases:
2630 mem -> register where the first half of the destination register
2631 is the same register used in the memory's address. Reload
2632 can create such insns.
2634 mem in this case will be either register indirect or register
2635 indirect plus a valid offset.
2637 register -> register move where REGNO(dst) == REGNO(src + 1)
2638 someone (Tim/Tege?) claimed this can happen for parameter loads.
2640 Handle mem -> register case first. */
2641 if (optype0 == REGOP
2642 && (optype1 == MEMOP || optype1 == OFFSOP)
2643 && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
2644 operands[1], 0))
2646 /* Do the late half first. */
2647 if (addreg1)
2648 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2649 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2651 /* Then clobber. */
2652 if (addreg1)
2653 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2654 return pa_singlemove_string (operands);
2657 /* Now handle register -> register case. */
2658 if (optype0 == REGOP && optype1 == REGOP
2659 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2661 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2662 return pa_singlemove_string (operands);
2665 /* Normal case: do the two words, low-numbered first. */
2667 output_asm_insn (pa_singlemove_string (operands), operands);
2669 /* Make any unoffsettable addresses point at high-numbered word. */
2670 if (addreg0)
2671 output_asm_insn ("ldo 4(%0),%0", &addreg0);
2672 if (addreg1)
2673 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2675 /* Do that word. */
2676 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2678 /* Undo the adds we just did. */
2679 if (addreg0)
2680 output_asm_insn ("ldo -4(%0),%0", &addreg0);
2681 if (addreg1)
2682 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2684 return "";
2687 const char *
2688 pa_output_fp_move_double (rtx *operands)
2690 if (FP_REG_P (operands[0]))
2692 if (FP_REG_P (operands[1])
2693 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2694 output_asm_insn ("fcpy,dbl %f1,%0", operands);
2695 else
2696 output_asm_insn ("fldd%F1 %1,%0", operands);
2698 else if (FP_REG_P (operands[1]))
2700 output_asm_insn ("fstd%F0 %1,%0", operands);
2702 else
2704 rtx xoperands[2];
2706 gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2708 /* This is a pain. You have to be prepared to deal with an
2709 arbitrary address here including pre/post increment/decrement.
2711 so avoid this in the MD. */
2712 gcc_assert (GET_CODE (operands[0]) == REG);
2714 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2715 xoperands[0] = operands[0];
2716 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2718 return "";
2721 /* Return a REG that occurs in ADDR with coefficient 1.
2722 ADDR can be effectively incremented by incrementing REG. */
2724 static rtx
2725 find_addr_reg (rtx addr)
2727 while (GET_CODE (addr) == PLUS)
2729 if (GET_CODE (XEXP (addr, 0)) == REG)
2730 addr = XEXP (addr, 0);
2731 else if (GET_CODE (XEXP (addr, 1)) == REG)
2732 addr = XEXP (addr, 1);
2733 else if (CONSTANT_P (XEXP (addr, 0)))
2734 addr = XEXP (addr, 1);
2735 else if (CONSTANT_P (XEXP (addr, 1)))
2736 addr = XEXP (addr, 0);
2737 else
2738 gcc_unreachable ();
2740 gcc_assert (GET_CODE (addr) == REG);
2741 return addr;
2744 /* Emit code to perform a block move.
2746 OPERANDS[0] is the destination pointer as a REG, clobbered.
2747 OPERANDS[1] is the source pointer as a REG, clobbered.
2748 OPERANDS[2] is a register for temporary storage.
2749 OPERANDS[3] is a register for temporary storage.
2750 OPERANDS[4] is the size as a CONST_INT
2751 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2752 OPERANDS[6] is another temporary register. */
2754 const char *
2755 pa_output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2757 int align = INTVAL (operands[5]);
2758 unsigned long n_bytes = INTVAL (operands[4]);
2760 /* We can't move more than a word at a time because the PA
2761 has no longer integer move insns. (Could use fp mem ops?) */
2762 if (align > (TARGET_64BIT ? 8 : 4))
2763 align = (TARGET_64BIT ? 8 : 4);
2765 /* Note that we know each loop below will execute at least twice
2766 (else we would have open-coded the copy). */
2767 switch (align)
2769 case 8:
2770 /* Pre-adjust the loop counter. */
2771 operands[4] = GEN_INT (n_bytes - 16);
2772 output_asm_insn ("ldi %4,%2", operands);
2774 /* Copying loop. */
2775 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2776 output_asm_insn ("ldd,ma 8(%1),%6", operands);
2777 output_asm_insn ("std,ma %3,8(%0)", operands);
2778 output_asm_insn ("addib,>= -16,%2,.-12", operands);
2779 output_asm_insn ("std,ma %6,8(%0)", operands);
2781 /* Handle the residual. There could be up to 7 bytes of
2782 residual to copy! */
2783 if (n_bytes % 16 != 0)
2785 operands[4] = GEN_INT (n_bytes % 8);
2786 if (n_bytes % 16 >= 8)
2787 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2788 if (n_bytes % 8 != 0)
2789 output_asm_insn ("ldd 0(%1),%6", operands);
2790 if (n_bytes % 16 >= 8)
2791 output_asm_insn ("std,ma %3,8(%0)", operands);
2792 if (n_bytes % 8 != 0)
2793 output_asm_insn ("stdby,e %6,%4(%0)", operands);
2795 return "";
2797 case 4:
2798 /* Pre-adjust the loop counter. */
2799 operands[4] = GEN_INT (n_bytes - 8);
2800 output_asm_insn ("ldi %4,%2", operands);
2802 /* Copying loop. */
2803 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2804 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2805 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2806 output_asm_insn ("addib,>= -8,%2,.-12", operands);
2807 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2809 /* Handle the residual. There could be up to 7 bytes of
2810 residual to copy! */
2811 if (n_bytes % 8 != 0)
2813 operands[4] = GEN_INT (n_bytes % 4);
2814 if (n_bytes % 8 >= 4)
2815 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2816 if (n_bytes % 4 != 0)
2817 output_asm_insn ("ldw 0(%1),%6", operands);
2818 if (n_bytes % 8 >= 4)
2819 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2820 if (n_bytes % 4 != 0)
2821 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2823 return "";
2825 case 2:
2826 /* Pre-adjust the loop counter. */
2827 operands[4] = GEN_INT (n_bytes - 4);
2828 output_asm_insn ("ldi %4,%2", operands);
2830 /* Copying loop. */
2831 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2832 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2833 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2834 output_asm_insn ("addib,>= -4,%2,.-12", operands);
2835 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2837 /* Handle the residual. */
2838 if (n_bytes % 4 != 0)
2840 if (n_bytes % 4 >= 2)
2841 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2842 if (n_bytes % 2 != 0)
2843 output_asm_insn ("ldb 0(%1),%6", operands);
2844 if (n_bytes % 4 >= 2)
2845 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2846 if (n_bytes % 2 != 0)
2847 output_asm_insn ("stb %6,0(%0)", operands);
2849 return "";
2851 case 1:
2852 /* Pre-adjust the loop counter. */
2853 operands[4] = GEN_INT (n_bytes - 2);
2854 output_asm_insn ("ldi %4,%2", operands);
2856 /* Copying loop. */
2857 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2858 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2859 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2860 output_asm_insn ("addib,>= -2,%2,.-12", operands);
2861 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2863 /* Handle the residual. */
2864 if (n_bytes % 2 != 0)
2866 output_asm_insn ("ldb 0(%1),%3", operands);
2867 output_asm_insn ("stb %3,0(%0)", operands);
2869 return "";
2871 default:
2872 gcc_unreachable ();
2876 /* Count the number of insns necessary to handle this block move.
2878 Basic structure is the same as emit_block_move, except that we
2879 count insns rather than emit them. */
2881 static int
2882 compute_movmem_length (rtx insn)
2884 rtx pat = PATTERN (insn);
2885 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2886 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2887 unsigned int n_insns = 0;
2889 /* We can't move more than four bytes at a time because the PA
2890 has no longer integer move insns. (Could use fp mem ops?) */
2891 if (align > (TARGET_64BIT ? 8 : 4))
2892 align = (TARGET_64BIT ? 8 : 4);
2894 /* The basic copying loop. */
2895 n_insns = 6;
2897 /* Residuals. */
2898 if (n_bytes % (2 * align) != 0)
2900 if ((n_bytes % (2 * align)) >= align)
2901 n_insns += 2;
2903 if ((n_bytes % align) != 0)
2904 n_insns += 2;
2907 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2908 return n_insns * 4;
2911 /* Emit code to perform a block clear.
2913 OPERANDS[0] is the destination pointer as a REG, clobbered.
2914 OPERANDS[1] is a register for temporary storage.
2915 OPERANDS[2] is the size as a CONST_INT
2916 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
2918 const char *
2919 pa_output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2921 int align = INTVAL (operands[3]);
2922 unsigned long n_bytes = INTVAL (operands[2]);
2924 /* We can't clear more than a word at a time because the PA
2925 has no longer integer move insns. */
2926 if (align > (TARGET_64BIT ? 8 : 4))
2927 align = (TARGET_64BIT ? 8 : 4);
2929 /* Note that we know each loop below will execute at least twice
2930 (else we would have open-coded the copy). */
2931 switch (align)
2933 case 8:
2934 /* Pre-adjust the loop counter. */
2935 operands[2] = GEN_INT (n_bytes - 16);
2936 output_asm_insn ("ldi %2,%1", operands);
2938 /* Loop. */
2939 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2940 output_asm_insn ("addib,>= -16,%1,.-4", operands);
2941 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2943 /* Handle the residual. There could be up to 7 bytes of
2944 residual to copy! */
2945 if (n_bytes % 16 != 0)
2947 operands[2] = GEN_INT (n_bytes % 8);
2948 if (n_bytes % 16 >= 8)
2949 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2950 if (n_bytes % 8 != 0)
2951 output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
2953 return "";
2955 case 4:
2956 /* Pre-adjust the loop counter. */
2957 operands[2] = GEN_INT (n_bytes - 8);
2958 output_asm_insn ("ldi %2,%1", operands);
2960 /* Loop. */
2961 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2962 output_asm_insn ("addib,>= -8,%1,.-4", operands);
2963 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2965 /* Handle the residual. There could be up to 7 bytes of
2966 residual to copy! */
2967 if (n_bytes % 8 != 0)
2969 operands[2] = GEN_INT (n_bytes % 4);
2970 if (n_bytes % 8 >= 4)
2971 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2972 if (n_bytes % 4 != 0)
2973 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
2975 return "";
2977 case 2:
2978 /* Pre-adjust the loop counter. */
2979 operands[2] = GEN_INT (n_bytes - 4);
2980 output_asm_insn ("ldi %2,%1", operands);
2982 /* Loop. */
2983 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2984 output_asm_insn ("addib,>= -4,%1,.-4", operands);
2985 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2987 /* Handle the residual. */
2988 if (n_bytes % 4 != 0)
2990 if (n_bytes % 4 >= 2)
2991 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2992 if (n_bytes % 2 != 0)
2993 output_asm_insn ("stb %%r0,0(%0)", operands);
2995 return "";
2997 case 1:
2998 /* Pre-adjust the loop counter. */
2999 operands[2] = GEN_INT (n_bytes - 2);
3000 output_asm_insn ("ldi %2,%1", operands);
3002 /* Loop. */
3003 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3004 output_asm_insn ("addib,>= -2,%1,.-4", operands);
3005 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3007 /* Handle the residual. */
3008 if (n_bytes % 2 != 0)
3009 output_asm_insn ("stb %%r0,0(%0)", operands);
3011 return "";
3013 default:
3014 gcc_unreachable ();
3018 /* Count the number of insns necessary to handle this block move.
3020 Basic structure is the same as emit_block_move, except that we
3021 count insns rather than emit them. */
3023 static int
3024 compute_clrmem_length (rtx insn)
3026 rtx pat = PATTERN (insn);
3027 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
3028 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
3029 unsigned int n_insns = 0;
3031 /* We can't clear more than a word at a time because the PA
3032 has no longer integer move insns. */
3033 if (align > (TARGET_64BIT ? 8 : 4))
3034 align = (TARGET_64BIT ? 8 : 4);
3036 /* The basic loop. */
3037 n_insns = 4;
3039 /* Residuals. */
3040 if (n_bytes % (2 * align) != 0)
3042 if ((n_bytes % (2 * align)) >= align)
3043 n_insns++;
3045 if ((n_bytes % align) != 0)
3046 n_insns++;
3049 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
3050 return n_insns * 4;
3054 const char *
3055 pa_output_and (rtx *operands)
3057 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3059 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3060 int ls0, ls1, ms0, p, len;
3062 for (ls0 = 0; ls0 < 32; ls0++)
3063 if ((mask & (1 << ls0)) == 0)
3064 break;
3066 for (ls1 = ls0; ls1 < 32; ls1++)
3067 if ((mask & (1 << ls1)) != 0)
3068 break;
3070 for (ms0 = ls1; ms0 < 32; ms0++)
3071 if ((mask & (1 << ms0)) == 0)
3072 break;
3074 gcc_assert (ms0 == 32);
3076 if (ls1 == 32)
3078 len = ls0;
3080 gcc_assert (len);
3082 operands[2] = GEN_INT (len);
3083 return "{extru|extrw,u} %1,31,%2,%0";
3085 else
3087 /* We could use this `depi' for the case above as well, but `depi'
3088 requires one more register file access than an `extru'. */
3090 p = 31 - ls0;
3091 len = ls1 - ls0;
3093 operands[2] = GEN_INT (p);
3094 operands[3] = GEN_INT (len);
3095 return "{depi|depwi} 0,%2,%3,%0";
3098 else
3099 return "and %1,%2,%0";
3102 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3103 storing the result in operands[0]. */
3104 const char *
3105 pa_output_64bit_and (rtx *operands)
3107 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3109 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3110 int ls0, ls1, ms0, p, len;
3112 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
3113 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
3114 break;
3116 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
3117 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
3118 break;
3120 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
3121 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
3122 break;
3124 gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
3126 if (ls1 == HOST_BITS_PER_WIDE_INT)
3128 len = ls0;
3130 gcc_assert (len);
3132 operands[2] = GEN_INT (len);
3133 return "extrd,u %1,63,%2,%0";
3135 else
3137 /* We could use this `depi' for the case above as well, but `depi'
3138 requires one more register file access than an `extru'. */
3140 p = 63 - ls0;
3141 len = ls1 - ls0;
3143 operands[2] = GEN_INT (p);
3144 operands[3] = GEN_INT (len);
3145 return "depdi 0,%2,%3,%0";
3148 else
3149 return "and %1,%2,%0";
3152 const char *
3153 pa_output_ior (rtx *operands)
3155 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3156 int bs0, bs1, p, len;
3158 if (INTVAL (operands[2]) == 0)
3159 return "copy %1,%0";
3161 for (bs0 = 0; bs0 < 32; bs0++)
3162 if ((mask & (1 << bs0)) != 0)
3163 break;
3165 for (bs1 = bs0; bs1 < 32; bs1++)
3166 if ((mask & (1 << bs1)) == 0)
3167 break;
3169 gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3171 p = 31 - bs0;
3172 len = bs1 - bs0;
3174 operands[2] = GEN_INT (p);
3175 operands[3] = GEN_INT (len);
3176 return "{depi|depwi} -1,%2,%3,%0";
3179 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3180 storing the result in operands[0]. */
3181 const char *
3182 pa_output_64bit_ior (rtx *operands)
3184 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3185 int bs0, bs1, p, len;
3187 if (INTVAL (operands[2]) == 0)
3188 return "copy %1,%0";
3190 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
3191 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
3192 break;
3194 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
3195 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
3196 break;
3198 gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
3199 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3201 p = 63 - bs0;
3202 len = bs1 - bs0;
3204 operands[2] = GEN_INT (p);
3205 operands[3] = GEN_INT (len);
3206 return "depdi -1,%2,%3,%0";
3209 /* Target hook for assembling integer objects. This code handles
3210 aligned SI and DI integers specially since function references
3211 must be preceded by P%. */
3213 static bool
3214 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
3216 if (size == UNITS_PER_WORD
3217 && aligned_p
3218 && function_label_operand (x, VOIDmode))
3220 fputs (size == 8? "\t.dword\tP%" : "\t.word\tP%", asm_out_file);
3221 output_addr_const (asm_out_file, x);
3222 fputc ('\n', asm_out_file);
3223 return true;
3225 return default_assemble_integer (x, size, aligned_p);
3228 /* Output an ascii string. */
3229 void
3230 pa_output_ascii (FILE *file, const char *p, int size)
3232 int i;
3233 int chars_output;
3234 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
3236 /* The HP assembler can only take strings of 256 characters at one
3237 time. This is a limitation on input line length, *not* the
3238 length of the string. Sigh. Even worse, it seems that the
3239 restriction is in number of input characters (see \xnn &
3240 \whatever). So we have to do this very carefully. */
3242 fputs ("\t.STRING \"", file);
3244 chars_output = 0;
3245 for (i = 0; i < size; i += 4)
3247 int co = 0;
3248 int io = 0;
3249 for (io = 0, co = 0; io < MIN (4, size - i); io++)
3251 register unsigned int c = (unsigned char) p[i + io];
3253 if (c == '\"' || c == '\\')
3254 partial_output[co++] = '\\';
3255 if (c >= ' ' && c < 0177)
3256 partial_output[co++] = c;
3257 else
3259 unsigned int hexd;
3260 partial_output[co++] = '\\';
3261 partial_output[co++] = 'x';
3262 hexd = c / 16 - 0 + '0';
3263 if (hexd > '9')
3264 hexd -= '9' - 'a' + 1;
3265 partial_output[co++] = hexd;
3266 hexd = c % 16 - 0 + '0';
3267 if (hexd > '9')
3268 hexd -= '9' - 'a' + 1;
3269 partial_output[co++] = hexd;
3272 if (chars_output + co > 243)
3274 fputs ("\"\n\t.STRING \"", file);
3275 chars_output = 0;
3277 fwrite (partial_output, 1, (size_t) co, file);
3278 chars_output += co;
3279 co = 0;
3281 fputs ("\"\n", file);
3284 /* Try to rewrite floating point comparisons & branches to avoid
3285 useless add,tr insns.
3287 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3288 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3289 first attempt to remove useless add,tr insns. It is zero
3290 for the second pass as reorg sometimes leaves bogus REG_DEAD
3291 notes lying around.
3293 When CHECK_NOTES is zero we can only eliminate add,tr insns
3294 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3295 instructions. */
3296 static void
3297 remove_useless_addtr_insns (int check_notes)
3299 rtx insn;
3300 static int pass = 0;
3302 /* This is fairly cheap, so always run it when optimizing. */
3303 if (optimize > 0)
3305 int fcmp_count = 0;
3306 int fbranch_count = 0;
3308 /* Walk all the insns in this function looking for fcmp & fbranch
3309 instructions. Keep track of how many of each we find. */
3310 for (insn = get_insns (); insn; insn = next_insn (insn))
3312 rtx tmp;
3314 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
3315 if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
3316 continue;
3318 tmp = PATTERN (insn);
3320 /* It must be a set. */
3321 if (GET_CODE (tmp) != SET)
3322 continue;
3324 /* If the destination is CCFP, then we've found an fcmp insn. */
3325 tmp = SET_DEST (tmp);
3326 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3328 fcmp_count++;
3329 continue;
3332 tmp = PATTERN (insn);
3333 /* If this is an fbranch instruction, bump the fbranch counter. */
3334 if (GET_CODE (tmp) == SET
3335 && SET_DEST (tmp) == pc_rtx
3336 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3337 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3338 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3339 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3341 fbranch_count++;
3342 continue;
3347 /* Find all floating point compare + branch insns. If possible,
3348 reverse the comparison & the branch to avoid add,tr insns. */
3349 for (insn = get_insns (); insn; insn = next_insn (insn))
3351 rtx tmp, next;
3353 /* Ignore anything that isn't an INSN. */
3354 if (GET_CODE (insn) != INSN)
3355 continue;
3357 tmp = PATTERN (insn);
3359 /* It must be a set. */
3360 if (GET_CODE (tmp) != SET)
3361 continue;
3363 /* The destination must be CCFP, which is register zero. */
3364 tmp = SET_DEST (tmp);
3365 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3366 continue;
3368 /* INSN should be a set of CCFP.
3370 See if the result of this insn is used in a reversed FP
3371 conditional branch. If so, reverse our condition and
3372 the branch. Doing so avoids useless add,tr insns. */
3373 next = next_insn (insn);
3374 while (next)
3376 /* Jumps, calls and labels stop our search. */
3377 if (GET_CODE (next) == JUMP_INSN
3378 || GET_CODE (next) == CALL_INSN
3379 || GET_CODE (next) == CODE_LABEL)
3380 break;
3382 /* As does another fcmp insn. */
3383 if (GET_CODE (next) == INSN
3384 && GET_CODE (PATTERN (next)) == SET
3385 && GET_CODE (SET_DEST (PATTERN (next))) == REG
3386 && REGNO (SET_DEST (PATTERN (next))) == 0)
3387 break;
3389 next = next_insn (next);
3392 /* Is NEXT_INSN a branch? */
3393 if (next
3394 && GET_CODE (next) == JUMP_INSN)
3396 rtx pattern = PATTERN (next);
3398 /* If it a reversed fp conditional branch (e.g. uses add,tr)
3399 and CCFP dies, then reverse our conditional and the branch
3400 to avoid the add,tr. */
3401 if (GET_CODE (pattern) == SET
3402 && SET_DEST (pattern) == pc_rtx
3403 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3404 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3405 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3406 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3407 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3408 && (fcmp_count == fbranch_count
3409 || (check_notes
3410 && find_regno_note (next, REG_DEAD, 0))))
3412 /* Reverse the branch. */
3413 tmp = XEXP (SET_SRC (pattern), 1);
3414 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3415 XEXP (SET_SRC (pattern), 2) = tmp;
3416 INSN_CODE (next) = -1;
3418 /* Reverse our condition. */
3419 tmp = PATTERN (insn);
3420 PUT_CODE (XEXP (tmp, 1),
3421 (reverse_condition_maybe_unordered
3422 (GET_CODE (XEXP (tmp, 1)))));
3428 pass = !pass;
3432 /* You may have trouble believing this, but this is the 32 bit HP-PA
3433 stack layout. Wow.
3435 Offset Contents
3437 Variable arguments (optional; any number may be allocated)
3439 SP-(4*(N+9)) arg word N
3441 SP-56 arg word 5
3442 SP-52 arg word 4
3444 Fixed arguments (must be allocated; may remain unused)
3446 SP-48 arg word 3
3447 SP-44 arg word 2
3448 SP-40 arg word 1
3449 SP-36 arg word 0
3451 Frame Marker
3453 SP-32 External Data Pointer (DP)
3454 SP-28 External sr4
3455 SP-24 External/stub RP (RP')
3456 SP-20 Current RP
3457 SP-16 Static Link
3458 SP-12 Clean up
3459 SP-8 Calling Stub RP (RP'')
3460 SP-4 Previous SP
3462 Top of Frame
3464 SP-0 Stack Pointer (points to next available address)
3468 /* This function saves registers as follows. Registers marked with ' are
3469 this function's registers (as opposed to the previous function's).
3470 If a frame_pointer isn't needed, r4 is saved as a general register;
3471 the space for the frame pointer is still allocated, though, to keep
3472 things simple.
3475 Top of Frame
3477 SP (FP') Previous FP
3478 SP + 4 Alignment filler (sigh)
3479 SP + 8 Space for locals reserved here.
3483 SP + n All call saved register used.
3487 SP + o All call saved fp registers used.
3491 SP + p (SP') points to next available address.
3495 /* Global variables set by output_function_prologue(). */
3496 /* Size of frame. Need to know this to emit return insns from
3497 leaf procedures. */
3498 static HOST_WIDE_INT actual_fsize, local_fsize;
3499 static int save_fregs;
3501 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3502 Handle case where DISP > 8k by using the add_high_const patterns.
3504 Note in DISP > 8k case, we will leave the high part of the address
3505 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3507 static void
3508 store_reg (int reg, HOST_WIDE_INT disp, int base)
3510 rtx insn, dest, src, basereg;
3512 src = gen_rtx_REG (word_mode, reg);
3513 basereg = gen_rtx_REG (Pmode, base);
3514 if (VAL_14_BITS_P (disp))
3516 dest = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
3517 insn = emit_move_insn (dest, src);
3519 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3521 rtx delta = GEN_INT (disp);
3522 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3524 emit_move_insn (tmpreg, delta);
3525 insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3526 if (DO_FRAME_NOTES)
3528 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3529 gen_rtx_SET (VOIDmode, tmpreg,
3530 gen_rtx_PLUS (Pmode, basereg, delta)));
3531 RTX_FRAME_RELATED_P (insn) = 1;
3533 dest = gen_rtx_MEM (word_mode, tmpreg);
3534 insn = emit_move_insn (dest, src);
3536 else
3538 rtx delta = GEN_INT (disp);
3539 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3540 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3542 emit_move_insn (tmpreg, high);
3543 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3544 insn = emit_move_insn (dest, src);
3545 if (DO_FRAME_NOTES)
3546 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3547 gen_rtx_SET (VOIDmode,
3548 gen_rtx_MEM (word_mode,
3549 gen_rtx_PLUS (word_mode,
3550 basereg,
3551 delta)),
3552 src));
3555 if (DO_FRAME_NOTES)
3556 RTX_FRAME_RELATED_P (insn) = 1;
3559 /* Emit RTL to store REG at the memory location specified by BASE and then
3560 add MOD to BASE. MOD must be <= 8k. */
3562 static void
3563 store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3565 rtx insn, basereg, srcreg, delta;
3567 gcc_assert (VAL_14_BITS_P (mod));
3569 basereg = gen_rtx_REG (Pmode, base);
3570 srcreg = gen_rtx_REG (word_mode, reg);
3571 delta = GEN_INT (mod);
3573 insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3574 if (DO_FRAME_NOTES)
3576 RTX_FRAME_RELATED_P (insn) = 1;
3578 /* RTX_FRAME_RELATED_P must be set on each frame related set
3579 in a parallel with more than one element. */
3580 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3581 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3585 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3586 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3587 whether to add a frame note or not.
3589 In the DISP > 8k case, we leave the high part of the address in %r1.
3590 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3592 static void
3593 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3595 rtx insn;
3597 if (VAL_14_BITS_P (disp))
3599 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3600 plus_constant (Pmode,
3601 gen_rtx_REG (Pmode, base), disp));
3603 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3605 rtx basereg = gen_rtx_REG (Pmode, base);
3606 rtx delta = GEN_INT (disp);
3607 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3609 emit_move_insn (tmpreg, delta);
3610 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3611 gen_rtx_PLUS (Pmode, tmpreg, basereg));
3612 if (DO_FRAME_NOTES)
3613 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3614 gen_rtx_SET (VOIDmode, tmpreg,
3615 gen_rtx_PLUS (Pmode, basereg, delta)));
3617 else
3619 rtx basereg = gen_rtx_REG (Pmode, base);
3620 rtx delta = GEN_INT (disp);
3621 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3623 emit_move_insn (tmpreg,
3624 gen_rtx_PLUS (Pmode, basereg,
3625 gen_rtx_HIGH (Pmode, delta)));
3626 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3627 gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3630 if (DO_FRAME_NOTES && note)
3631 RTX_FRAME_RELATED_P (insn) = 1;
3634 HOST_WIDE_INT
3635 pa_compute_frame_size (HOST_WIDE_INT size, int *fregs_live)
3637 int freg_saved = 0;
3638 int i, j;
3640 /* The code in pa_expand_prologue and pa_expand_epilogue must
3641 be consistent with the rounding and size calculation done here.
3642 Change them at the same time. */
3644 /* We do our own stack alignment. First, round the size of the
3645 stack locals up to a word boundary. */
3646 size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3648 /* Space for previous frame pointer + filler. If any frame is
3649 allocated, we need to add in the STARTING_FRAME_OFFSET. We
3650 waste some space here for the sake of HP compatibility. The
3651 first slot is only used when the frame pointer is needed. */
3652 if (size || frame_pointer_needed)
3653 size += STARTING_FRAME_OFFSET;
3655 /* If the current function calls __builtin_eh_return, then we need
3656 to allocate stack space for registers that will hold data for
3657 the exception handler. */
3658 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3660 unsigned int i;
3662 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3663 continue;
3664 size += i * UNITS_PER_WORD;
3667 /* Account for space used by the callee general register saves. */
3668 for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3669 if (df_regs_ever_live_p (i))
3670 size += UNITS_PER_WORD;
3672 /* Account for space used by the callee floating point register saves. */
3673 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3674 if (df_regs_ever_live_p (i)
3675 || (!TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3677 freg_saved = 1;
3679 /* We always save both halves of the FP register, so always
3680 increment the frame size by 8 bytes. */
3681 size += 8;
3684 /* If any of the floating registers are saved, account for the
3685 alignment needed for the floating point register save block. */
3686 if (freg_saved)
3688 size = (size + 7) & ~7;
3689 if (fregs_live)
3690 *fregs_live = 1;
3693 /* The various ABIs include space for the outgoing parameters in the
3694 size of the current function's stack frame. We don't need to align
3695 for the outgoing arguments as their alignment is set by the final
3696 rounding for the frame as a whole. */
3697 size += crtl->outgoing_args_size;
3699 /* Allocate space for the fixed frame marker. This space must be
3700 allocated for any function that makes calls or allocates
3701 stack space. */
3702 if (!crtl->is_leaf || size)
3703 size += TARGET_64BIT ? 48 : 32;
3705 /* Finally, round to the preferred stack boundary. */
3706 return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3707 & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3710 /* Generate the assembly code for function entry. FILE is a stdio
3711 stream to output the code to. SIZE is an int: how many units of
3712 temporary storage to allocate.
3714 Refer to the array `regs_ever_live' to determine which registers to
3715 save; `regs_ever_live[I]' is nonzero if register number I is ever
3716 used in the function. This function is responsible for knowing
3717 which registers should not be saved even if used. */
3719 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3720 of memory. If any fpu reg is used in the function, we allocate
3721 such a block here, at the bottom of the frame, just in case it's needed.
3723 If this function is a leaf procedure, then we may choose not
3724 to do a "save" insn. The decision about whether or not
3725 to do this is made in regclass.c. */
3727 static void
3728 pa_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3730 /* The function's label and associated .PROC must never be
3731 separated and must be output *after* any profiling declarations
3732 to avoid changing spaces/subspaces within a procedure. */
3733 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3734 fputs ("\t.PROC\n", file);
3736 /* pa_expand_prologue does the dirty work now. We just need
3737 to output the assembler directives which denote the start
3738 of a function. */
3739 fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
3740 if (crtl->is_leaf)
3741 fputs (",NO_CALLS", file);
3742 else
3743 fputs (",CALLS", file);
3744 if (rp_saved)
3745 fputs (",SAVE_RP", file);
3747 /* The SAVE_SP flag is used to indicate that register %r3 is stored
3748 at the beginning of the frame and that it is used as the frame
3749 pointer for the frame. We do this because our current frame
3750 layout doesn't conform to that specified in the HP runtime
3751 documentation and we need a way to indicate to programs such as
3752 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
3753 isn't used by HP compilers but is supported by the assembler.
3754 However, SAVE_SP is supposed to indicate that the previous stack
3755 pointer has been saved in the frame marker. */
3756 if (frame_pointer_needed)
3757 fputs (",SAVE_SP", file);
3759 /* Pass on information about the number of callee register saves
3760 performed in the prologue.
3762 The compiler is supposed to pass the highest register number
3763 saved, the assembler then has to adjust that number before
3764 entering it into the unwind descriptor (to account for any
3765 caller saved registers with lower register numbers than the
3766 first callee saved register). */
3767 if (gr_saved)
3768 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3770 if (fr_saved)
3771 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3773 fputs ("\n\t.ENTRY\n", file);
3775 remove_useless_addtr_insns (0);
3778 void
3779 pa_expand_prologue (void)
3781 int merge_sp_adjust_with_store = 0;
3782 HOST_WIDE_INT size = get_frame_size ();
3783 HOST_WIDE_INT offset;
3784 int i;
3785 rtx insn, tmpreg;
3787 gr_saved = 0;
3788 fr_saved = 0;
3789 save_fregs = 0;
3791 /* Compute total size for frame pointer, filler, locals and rounding to
3792 the next word boundary. Similar code appears in pa_compute_frame_size
3793 and must be changed in tandem with this code. */
3794 local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3795 if (local_fsize || frame_pointer_needed)
3796 local_fsize += STARTING_FRAME_OFFSET;
3798 actual_fsize = pa_compute_frame_size (size, &save_fregs);
3799 if (flag_stack_usage_info)
3800 current_function_static_stack_size = actual_fsize;
3802 /* Compute a few things we will use often. */
3803 tmpreg = gen_rtx_REG (word_mode, 1);
3805 /* Save RP first. The calling conventions manual states RP will
3806 always be stored into the caller's frame at sp - 20 or sp - 16
3807 depending on which ABI is in use. */
3808 if (df_regs_ever_live_p (2) || crtl->calls_eh_return)
3810 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3811 rp_saved = true;
3813 else
3814 rp_saved = false;
3816 /* Allocate the local frame and set up the frame pointer if needed. */
3817 if (actual_fsize != 0)
3819 if (frame_pointer_needed)
3821 /* Copy the old frame pointer temporarily into %r1. Set up the
3822 new stack pointer, then store away the saved old frame pointer
3823 into the stack at sp and at the same time update the stack
3824 pointer by actual_fsize bytes. Two versions, first
3825 handles small (<8k) frames. The second handles large (>=8k)
3826 frames. */
3827 insn = emit_move_insn (tmpreg, hard_frame_pointer_rtx);
3828 if (DO_FRAME_NOTES)
3829 RTX_FRAME_RELATED_P (insn) = 1;
3831 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3832 if (DO_FRAME_NOTES)
3833 RTX_FRAME_RELATED_P (insn) = 1;
3835 if (VAL_14_BITS_P (actual_fsize))
3836 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
3837 else
3839 /* It is incorrect to store the saved frame pointer at *sp,
3840 then increment sp (writes beyond the current stack boundary).
3842 So instead use stwm to store at *sp and post-increment the
3843 stack pointer as an atomic operation. Then increment sp to
3844 finish allocating the new frame. */
3845 HOST_WIDE_INT adjust1 = 8192 - 64;
3846 HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
3848 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
3849 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3850 adjust2, 1);
3853 /* We set SAVE_SP in frames that need a frame pointer. Thus,
3854 we need to store the previous stack pointer (frame pointer)
3855 into the frame marker on targets that use the HP unwind
3856 library. This allows the HP unwind library to be used to
3857 unwind GCC frames. However, we are not fully compatible
3858 with the HP library because our frame layout differs from
3859 that specified in the HP runtime specification.
3861 We don't want a frame note on this instruction as the frame
3862 marker moves during dynamic stack allocation.
3864 This instruction also serves as a blockage to prevent
3865 register spills from being scheduled before the stack
3866 pointer is raised. This is necessary as we store
3867 registers using the frame pointer as a base register,
3868 and the frame pointer is set before sp is raised. */
3869 if (TARGET_HPUX_UNWIND_LIBRARY)
3871 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
3872 GEN_INT (TARGET_64BIT ? -8 : -4));
3874 emit_move_insn (gen_rtx_MEM (word_mode, addr),
3875 hard_frame_pointer_rtx);
3877 else
3878 emit_insn (gen_blockage ());
3880 /* no frame pointer needed. */
3881 else
3883 /* In some cases we can perform the first callee register save
3884 and allocating the stack frame at the same time. If so, just
3885 make a note of it and defer allocating the frame until saving
3886 the callee registers. */
3887 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
3888 merge_sp_adjust_with_store = 1;
3889 /* Can not optimize. Adjust the stack frame by actual_fsize
3890 bytes. */
3891 else
3892 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3893 actual_fsize, 1);
3897 /* Normal register save.
3899 Do not save the frame pointer in the frame_pointer_needed case. It
3900 was done earlier. */
3901 if (frame_pointer_needed)
3903 offset = local_fsize;
3905 /* Saving the EH return data registers in the frame is the simplest
3906 way to get the frame unwind information emitted. We put them
3907 just before the general registers. */
3908 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3910 unsigned int i, regno;
3912 for (i = 0; ; ++i)
3914 regno = EH_RETURN_DATA_REGNO (i);
3915 if (regno == INVALID_REGNUM)
3916 break;
3918 store_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
3919 offset += UNITS_PER_WORD;
3923 for (i = 18; i >= 4; i--)
3924 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
3926 store_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
3927 offset += UNITS_PER_WORD;
3928 gr_saved++;
3930 /* Account for %r3 which is saved in a special place. */
3931 gr_saved++;
3933 /* No frame pointer needed. */
3934 else
3936 offset = local_fsize - actual_fsize;
3938 /* Saving the EH return data registers in the frame is the simplest
3939 way to get the frame unwind information emitted. */
3940 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3942 unsigned int i, regno;
3944 for (i = 0; ; ++i)
3946 regno = EH_RETURN_DATA_REGNO (i);
3947 if (regno == INVALID_REGNUM)
3948 break;
3950 /* If merge_sp_adjust_with_store is nonzero, then we can
3951 optimize the first save. */
3952 if (merge_sp_adjust_with_store)
3954 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
3955 merge_sp_adjust_with_store = 0;
3957 else
3958 store_reg (regno, offset, STACK_POINTER_REGNUM);
3959 offset += UNITS_PER_WORD;
3963 for (i = 18; i >= 3; i--)
3964 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
3966 /* If merge_sp_adjust_with_store is nonzero, then we can
3967 optimize the first GR save. */
3968 if (merge_sp_adjust_with_store)
3970 store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
3971 merge_sp_adjust_with_store = 0;
3973 else
3974 store_reg (i, offset, STACK_POINTER_REGNUM);
3975 offset += UNITS_PER_WORD;
3976 gr_saved++;
3979 /* If we wanted to merge the SP adjustment with a GR save, but we never
3980 did any GR saves, then just emit the adjustment here. */
3981 if (merge_sp_adjust_with_store)
3982 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3983 actual_fsize, 1);
3986 /* The hppa calling conventions say that %r19, the pic offset
3987 register, is saved at sp - 32 (in this function's frame)
3988 when generating PIC code. FIXME: What is the correct thing
3989 to do for functions which make no calls and allocate no
3990 frame? Do we need to allocate a frame, or can we just omit
3991 the save? For now we'll just omit the save.
3993 We don't want a note on this insn as the frame marker can
3994 move if there is a dynamic stack allocation. */
3995 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
3997 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
3999 emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
4003 /* Align pointer properly (doubleword boundary). */
4004 offset = (offset + 7) & ~7;
4006 /* Floating point register store. */
4007 if (save_fregs)
4009 rtx base;
4011 /* First get the frame or stack pointer to the start of the FP register
4012 save area. */
4013 if (frame_pointer_needed)
4015 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4016 base = hard_frame_pointer_rtx;
4018 else
4020 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4021 base = stack_pointer_rtx;
4024 /* Now actually save the FP registers. */
4025 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4027 if (df_regs_ever_live_p (i)
4028 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4030 rtx addr, insn, reg;
4031 addr = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
4032 reg = gen_rtx_REG (DFmode, i);
4033 insn = emit_move_insn (addr, reg);
4034 if (DO_FRAME_NOTES)
4036 RTX_FRAME_RELATED_P (insn) = 1;
4037 if (TARGET_64BIT)
4039 rtx mem = gen_rtx_MEM (DFmode,
4040 plus_constant (Pmode, base,
4041 offset));
4042 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4043 gen_rtx_SET (VOIDmode, mem, reg));
4045 else
4047 rtx meml = gen_rtx_MEM (SFmode,
4048 plus_constant (Pmode, base,
4049 offset));
4050 rtx memr = gen_rtx_MEM (SFmode,
4051 plus_constant (Pmode, base,
4052 offset + 4));
4053 rtx regl = gen_rtx_REG (SFmode, i);
4054 rtx regr = gen_rtx_REG (SFmode, i + 1);
4055 rtx setl = gen_rtx_SET (VOIDmode, meml, regl);
4056 rtx setr = gen_rtx_SET (VOIDmode, memr, regr);
4057 rtvec vec;
4059 RTX_FRAME_RELATED_P (setl) = 1;
4060 RTX_FRAME_RELATED_P (setr) = 1;
4061 vec = gen_rtvec (2, setl, setr);
4062 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4063 gen_rtx_SEQUENCE (VOIDmode, vec));
4066 offset += GET_MODE_SIZE (DFmode);
4067 fr_saved++;
4073 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
4074 Handle case where DISP > 8k by using the add_high_const patterns. */
4076 static void
4077 load_reg (int reg, HOST_WIDE_INT disp, int base)
4079 rtx dest = gen_rtx_REG (word_mode, reg);
4080 rtx basereg = gen_rtx_REG (Pmode, base);
4081 rtx src;
4083 if (VAL_14_BITS_P (disp))
4084 src = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
4085 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
4087 rtx delta = GEN_INT (disp);
4088 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4090 emit_move_insn (tmpreg, delta);
4091 if (TARGET_DISABLE_INDEXING)
4093 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4094 src = gen_rtx_MEM (word_mode, tmpreg);
4096 else
4097 src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4099 else
4101 rtx delta = GEN_INT (disp);
4102 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
4103 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4105 emit_move_insn (tmpreg, high);
4106 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
4109 emit_move_insn (dest, src);
4112 /* Update the total code bytes output to the text section. */
4114 static void
4115 update_total_code_bytes (unsigned int nbytes)
4117 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
4118 && !IN_NAMED_SECTION_P (cfun->decl))
4120 unsigned int old_total = total_code_bytes;
4122 total_code_bytes += nbytes;
4124 /* Be prepared to handle overflows. */
4125 if (old_total > total_code_bytes)
4126 total_code_bytes = UINT_MAX;
4130 /* This function generates the assembly code for function exit.
4131 Args are as for output_function_prologue ().
4133 The function epilogue should not depend on the current stack
4134 pointer! It should use the frame pointer only. This is mandatory
4135 because of alloca; we also take advantage of it to omit stack
4136 adjustments before returning. */
4138 static void
4139 pa_output_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4141 rtx insn = get_last_insn ();
4143 last_address = 0;
4145 /* pa_expand_epilogue does the dirty work now. We just need
4146 to output the assembler directives which denote the end
4147 of a function.
4149 To make debuggers happy, emit a nop if the epilogue was completely
4150 eliminated due to a volatile call as the last insn in the
4151 current function. That way the return address (in %r2) will
4152 always point to a valid instruction in the current function. */
4154 /* Get the last real insn. */
4155 if (GET_CODE (insn) == NOTE)
4156 insn = prev_real_insn (insn);
4158 /* If it is a sequence, then look inside. */
4159 if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
4160 insn = XVECEXP (PATTERN (insn), 0, 0);
4162 /* If insn is a CALL_INSN, then it must be a call to a volatile
4163 function (otherwise there would be epilogue insns). */
4164 if (insn && GET_CODE (insn) == CALL_INSN)
4166 fputs ("\tnop\n", file);
4167 last_address += 4;
4170 fputs ("\t.EXIT\n\t.PROCEND\n", file);
4172 if (TARGET_SOM && TARGET_GAS)
4174 /* We done with this subspace except possibly for some additional
4175 debug information. Forget that we are in this subspace to ensure
4176 that the next function is output in its own subspace. */
4177 in_section = NULL;
4178 cfun->machine->in_nsubspa = 2;
4181 if (INSN_ADDRESSES_SET_P ())
4183 insn = get_last_nonnote_insn ();
4184 last_address += INSN_ADDRESSES (INSN_UID (insn));
4185 if (INSN_P (insn))
4186 last_address += insn_default_length (insn);
4187 last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4188 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
4190 else
4191 last_address = UINT_MAX;
4193 /* Finally, update the total number of code bytes output so far. */
4194 update_total_code_bytes (last_address);
4197 void
4198 pa_expand_epilogue (void)
4200 rtx tmpreg;
4201 HOST_WIDE_INT offset;
4202 HOST_WIDE_INT ret_off = 0;
4203 int i;
4204 int merge_sp_adjust_with_load = 0;
4206 /* We will use this often. */
4207 tmpreg = gen_rtx_REG (word_mode, 1);
4209 /* Try to restore RP early to avoid load/use interlocks when
4210 RP gets used in the return (bv) instruction. This appears to still
4211 be necessary even when we schedule the prologue and epilogue. */
4212 if (rp_saved)
4214 ret_off = TARGET_64BIT ? -16 : -20;
4215 if (frame_pointer_needed)
4217 load_reg (2, ret_off, HARD_FRAME_POINTER_REGNUM);
4218 ret_off = 0;
4220 else
4222 /* No frame pointer, and stack is smaller than 8k. */
4223 if (VAL_14_BITS_P (ret_off - actual_fsize))
4225 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4226 ret_off = 0;
4231 /* General register restores. */
4232 if (frame_pointer_needed)
4234 offset = local_fsize;
4236 /* If the current function calls __builtin_eh_return, then we need
4237 to restore the saved EH data registers. */
4238 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4240 unsigned int i, regno;
4242 for (i = 0; ; ++i)
4244 regno = EH_RETURN_DATA_REGNO (i);
4245 if (regno == INVALID_REGNUM)
4246 break;
4248 load_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4249 offset += UNITS_PER_WORD;
4253 for (i = 18; i >= 4; i--)
4254 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4256 load_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4257 offset += UNITS_PER_WORD;
4260 else
4262 offset = local_fsize - actual_fsize;
4264 /* If the current function calls __builtin_eh_return, then we need
4265 to restore the saved EH data registers. */
4266 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4268 unsigned int i, regno;
4270 for (i = 0; ; ++i)
4272 regno = EH_RETURN_DATA_REGNO (i);
4273 if (regno == INVALID_REGNUM)
4274 break;
4276 /* Only for the first load.
4277 merge_sp_adjust_with_load holds the register load
4278 with which we will merge the sp adjustment. */
4279 if (merge_sp_adjust_with_load == 0
4280 && local_fsize == 0
4281 && VAL_14_BITS_P (-actual_fsize))
4282 merge_sp_adjust_with_load = regno;
4283 else
4284 load_reg (regno, offset, STACK_POINTER_REGNUM);
4285 offset += UNITS_PER_WORD;
4289 for (i = 18; i >= 3; i--)
4291 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4293 /* Only for the first load.
4294 merge_sp_adjust_with_load holds the register load
4295 with which we will merge the sp adjustment. */
4296 if (merge_sp_adjust_with_load == 0
4297 && local_fsize == 0
4298 && VAL_14_BITS_P (-actual_fsize))
4299 merge_sp_adjust_with_load = i;
4300 else
4301 load_reg (i, offset, STACK_POINTER_REGNUM);
4302 offset += UNITS_PER_WORD;
4307 /* Align pointer properly (doubleword boundary). */
4308 offset = (offset + 7) & ~7;
4310 /* FP register restores. */
4311 if (save_fregs)
4313 /* Adjust the register to index off of. */
4314 if (frame_pointer_needed)
4315 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4316 else
4317 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4319 /* Actually do the restores now. */
4320 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4321 if (df_regs_ever_live_p (i)
4322 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4324 rtx src = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
4325 rtx dest = gen_rtx_REG (DFmode, i);
4326 emit_move_insn (dest, src);
4330 /* Emit a blockage insn here to keep these insns from being moved to
4331 an earlier spot in the epilogue, or into the main instruction stream.
4333 This is necessary as we must not cut the stack back before all the
4334 restores are finished. */
4335 emit_insn (gen_blockage ());
4337 /* Reset stack pointer (and possibly frame pointer). The stack
4338 pointer is initially set to fp + 64 to avoid a race condition. */
4339 if (frame_pointer_needed)
4341 rtx delta = GEN_INT (-64);
4343 set_reg_plus_d (STACK_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM, 64, 0);
4344 emit_insn (gen_pre_load (hard_frame_pointer_rtx,
4345 stack_pointer_rtx, delta));
4347 /* If we were deferring a callee register restore, do it now. */
4348 else if (merge_sp_adjust_with_load)
4350 rtx delta = GEN_INT (-actual_fsize);
4351 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4353 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4355 else if (actual_fsize != 0)
4356 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4357 - actual_fsize, 0);
4359 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4360 frame greater than 8k), do so now. */
4361 if (ret_off != 0)
4362 load_reg (2, ret_off, STACK_POINTER_REGNUM);
4364 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4366 rtx sa = EH_RETURN_STACKADJ_RTX;
4368 emit_insn (gen_blockage ());
4369 emit_insn (TARGET_64BIT
4370 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4371 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4375 bool
4376 pa_can_use_return_insn (void)
4378 if (!reload_completed)
4379 return false;
4381 if (frame_pointer_needed)
4382 return false;
4384 if (df_regs_ever_live_p (2))
4385 return false;
4387 if (crtl->profile)
4388 return false;
4390 return pa_compute_frame_size (get_frame_size (), 0) == 0;
4394 hppa_pic_save_rtx (void)
4396 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4399 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4400 #define NO_DEFERRED_PROFILE_COUNTERS 0
4401 #endif
4404 /* Vector of funcdef numbers. */
4405 static vec<int> funcdef_nos;
4407 /* Output deferred profile counters. */
4408 static void
4409 output_deferred_profile_counters (void)
4411 unsigned int i;
4412 int align, n;
4414 if (funcdef_nos.is_empty ())
4415 return;
4417 switch_to_section (data_section);
4418 align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4419 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4421 for (i = 0; funcdef_nos.iterate (i, &n); i++)
4423 targetm.asm_out.internal_label (asm_out_file, "LP", n);
4424 assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4427 funcdef_nos.release ();
4430 void
4431 hppa_profile_hook (int label_no)
4433 /* We use SImode for the address of the function in both 32 and
4434 64-bit code to avoid having to provide DImode versions of the
4435 lcla2 and load_offset_label_address insn patterns. */
4436 rtx reg = gen_reg_rtx (SImode);
4437 rtx label_rtx = gen_label_rtx ();
4438 rtx begin_label_rtx, call_insn;
4439 char begin_label_name[16];
4441 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4442 label_no);
4443 begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4445 if (TARGET_64BIT)
4446 emit_move_insn (arg_pointer_rtx,
4447 gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
4448 GEN_INT (64)));
4450 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4452 /* The address of the function is loaded into %r25 with an instruction-
4453 relative sequence that avoids the use of relocations. The sequence
4454 is split so that the load_offset_label_address instruction can
4455 occupy the delay slot of the call to _mcount. */
4456 if (TARGET_PA_20)
4457 emit_insn (gen_lcla2 (reg, label_rtx));
4458 else
4459 emit_insn (gen_lcla1 (reg, label_rtx));
4461 emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25),
4462 reg, begin_label_rtx, label_rtx));
4464 #if !NO_DEFERRED_PROFILE_COUNTERS
4466 rtx count_label_rtx, addr, r24;
4467 char count_label_name[16];
4469 funcdef_nos.safe_push (label_no);
4470 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4471 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name));
4473 addr = force_reg (Pmode, count_label_rtx);
4474 r24 = gen_rtx_REG (Pmode, 24);
4475 emit_move_insn (r24, addr);
4477 call_insn =
4478 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4479 gen_rtx_SYMBOL_REF (Pmode,
4480 "_mcount")),
4481 GEN_INT (TARGET_64BIT ? 24 : 12)));
4483 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4485 #else
4487 call_insn =
4488 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4489 gen_rtx_SYMBOL_REF (Pmode,
4490 "_mcount")),
4491 GEN_INT (TARGET_64BIT ? 16 : 8)));
4493 #endif
4495 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4496 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4498 /* Indicate the _mcount call cannot throw, nor will it execute a
4499 non-local goto. */
4500 make_reg_eh_region_note_nothrow_nononlocal (call_insn);
4503 /* Fetch the return address for the frame COUNT steps up from
4504 the current frame, after the prologue. FRAMEADDR is the
4505 frame pointer of the COUNT frame.
4507 We want to ignore any export stub remnants here. To handle this,
4508 we examine the code at the return address, and if it is an export
4509 stub, we return a memory rtx for the stub return address stored
4510 at frame-24.
4512 The value returned is used in two different ways:
4514 1. To find a function's caller.
4516 2. To change the return address for a function.
4518 This function handles most instances of case 1; however, it will
4519 fail if there are two levels of stubs to execute on the return
4520 path. The only way I believe that can happen is if the return value
4521 needs a parameter relocation, which never happens for C code.
4523 This function handles most instances of case 2; however, it will
4524 fail if we did not originally have stub code on the return path
4525 but will need stub code on the new return path. This can happen if
4526 the caller & callee are both in the main program, but the new
4527 return location is in a shared library. */
4530 pa_return_addr_rtx (int count, rtx frameaddr)
4532 rtx label;
4533 rtx rp;
4534 rtx saved_rp;
4535 rtx ins;
4537 /* The instruction stream at the return address of a PA1.X export stub is:
4539 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4540 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4541 0x00011820 | stub+16: mtsp r1,sr0
4542 0xe0400002 | stub+20: be,n 0(sr0,rp)
4544 0xe0400002 must be specified as -532676606 so that it won't be
4545 rejected as an invalid immediate operand on 64-bit hosts.
4547 The instruction stream at the return address of a PA2.0 export stub is:
4549 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4550 0xe840d002 | stub+12: bve,n (rp)
4553 HOST_WIDE_INT insns[4];
4554 int i, len;
4556 if (count != 0)
4557 return NULL_RTX;
4559 rp = get_hard_reg_initial_val (Pmode, 2);
4561 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4562 return rp;
4564 /* If there is no export stub then just use the value saved from
4565 the return pointer register. */
4567 saved_rp = gen_reg_rtx (Pmode);
4568 emit_move_insn (saved_rp, rp);
4570 /* Get pointer to the instruction stream. We have to mask out the
4571 privilege level from the two low order bits of the return address
4572 pointer here so that ins will point to the start of the first
4573 instruction that would have been executed if we returned. */
4574 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4575 label = gen_label_rtx ();
4577 if (TARGET_PA_20)
4579 insns[0] = 0x4bc23fd1;
4580 insns[1] = -398405630;
4581 len = 2;
4583 else
4585 insns[0] = 0x4bc23fd1;
4586 insns[1] = 0x004010a1;
4587 insns[2] = 0x00011820;
4588 insns[3] = -532676606;
4589 len = 4;
4592 /* Check the instruction stream at the normal return address for the
4593 export stub. If it is an export stub, than our return address is
4594 really in -24[frameaddr]. */
4596 for (i = 0; i < len; i++)
4598 rtx op0 = gen_rtx_MEM (SImode, plus_constant (Pmode, ins, i * 4));
4599 rtx op1 = GEN_INT (insns[i]);
4600 emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label);
4603 /* Here we know that our return address points to an export
4604 stub. We don't want to return the address of the export stub,
4605 but rather the return address of the export stub. That return
4606 address is stored at -24[frameaddr]. */
4608 emit_move_insn (saved_rp,
4609 gen_rtx_MEM (Pmode,
4610 memory_address (Pmode,
4611 plus_constant (Pmode, frameaddr,
4612 -24))));
4614 emit_label (label);
4616 return saved_rp;
4619 void
4620 pa_emit_bcond_fp (rtx operands[])
4622 enum rtx_code code = GET_CODE (operands[0]);
4623 rtx operand0 = operands[1];
4624 rtx operand1 = operands[2];
4625 rtx label = operands[3];
4627 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0),
4628 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1)));
4630 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4631 gen_rtx_IF_THEN_ELSE (VOIDmode,
4632 gen_rtx_fmt_ee (NE,
4633 VOIDmode,
4634 gen_rtx_REG (CCFPmode, 0),
4635 const0_rtx),
4636 gen_rtx_LABEL_REF (VOIDmode, label),
4637 pc_rtx)));
4641 /* Adjust the cost of a scheduling dependency. Return the new cost of
4642 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4644 static int
4645 pa_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
4647 enum attr_type attr_type;
4649 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4650 true dependencies as they are described with bypasses now. */
4651 if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0)
4652 return cost;
4654 if (! recog_memoized (insn))
4655 return 0;
4657 attr_type = get_attr_type (insn);
4659 switch (REG_NOTE_KIND (link))
4661 case REG_DEP_ANTI:
4662 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4663 cycles later. */
4665 if (attr_type == TYPE_FPLOAD)
4667 rtx pat = PATTERN (insn);
4668 rtx dep_pat = PATTERN (dep_insn);
4669 if (GET_CODE (pat) == PARALLEL)
4671 /* This happens for the fldXs,mb patterns. */
4672 pat = XVECEXP (pat, 0, 0);
4674 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4675 /* If this happens, we have to extend this to schedule
4676 optimally. Return 0 for now. */
4677 return 0;
4679 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4681 if (! recog_memoized (dep_insn))
4682 return 0;
4683 switch (get_attr_type (dep_insn))
4685 case TYPE_FPALU:
4686 case TYPE_FPMULSGL:
4687 case TYPE_FPMULDBL:
4688 case TYPE_FPDIVSGL:
4689 case TYPE_FPDIVDBL:
4690 case TYPE_FPSQRTSGL:
4691 case TYPE_FPSQRTDBL:
4692 /* A fpload can't be issued until one cycle before a
4693 preceding arithmetic operation has finished if
4694 the target of the fpload is any of the sources
4695 (or destination) of the arithmetic operation. */
4696 return insn_default_latency (dep_insn) - 1;
4698 default:
4699 return 0;
4703 else if (attr_type == TYPE_FPALU)
4705 rtx pat = PATTERN (insn);
4706 rtx dep_pat = PATTERN (dep_insn);
4707 if (GET_CODE (pat) == PARALLEL)
4709 /* This happens for the fldXs,mb patterns. */
4710 pat = XVECEXP (pat, 0, 0);
4712 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4713 /* If this happens, we have to extend this to schedule
4714 optimally. Return 0 for now. */
4715 return 0;
4717 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4719 if (! recog_memoized (dep_insn))
4720 return 0;
4721 switch (get_attr_type (dep_insn))
4723 case TYPE_FPDIVSGL:
4724 case TYPE_FPDIVDBL:
4725 case TYPE_FPSQRTSGL:
4726 case TYPE_FPSQRTDBL:
4727 /* An ALU flop can't be issued until two cycles before a
4728 preceding divide or sqrt operation has finished if
4729 the target of the ALU flop is any of the sources
4730 (or destination) of the divide or sqrt operation. */
4731 return insn_default_latency (dep_insn) - 2;
4733 default:
4734 return 0;
4739 /* For other anti dependencies, the cost is 0. */
4740 return 0;
4742 case REG_DEP_OUTPUT:
4743 /* Output dependency; DEP_INSN writes a register that INSN writes some
4744 cycles later. */
4745 if (attr_type == TYPE_FPLOAD)
4747 rtx pat = PATTERN (insn);
4748 rtx dep_pat = PATTERN (dep_insn);
4749 if (GET_CODE (pat) == PARALLEL)
4751 /* This happens for the fldXs,mb patterns. */
4752 pat = XVECEXP (pat, 0, 0);
4754 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4755 /* If this happens, we have to extend this to schedule
4756 optimally. Return 0 for now. */
4757 return 0;
4759 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4761 if (! recog_memoized (dep_insn))
4762 return 0;
4763 switch (get_attr_type (dep_insn))
4765 case TYPE_FPALU:
4766 case TYPE_FPMULSGL:
4767 case TYPE_FPMULDBL:
4768 case TYPE_FPDIVSGL:
4769 case TYPE_FPDIVDBL:
4770 case TYPE_FPSQRTSGL:
4771 case TYPE_FPSQRTDBL:
4772 /* A fpload can't be issued until one cycle before a
4773 preceding arithmetic operation has finished if
4774 the target of the fpload is the destination of the
4775 arithmetic operation.
4777 Exception: For PA7100LC, PA7200 and PA7300, the cost
4778 is 3 cycles, unless they bundle together. We also
4779 pay the penalty if the second insn is a fpload. */
4780 return insn_default_latency (dep_insn) - 1;
4782 default:
4783 return 0;
4787 else if (attr_type == TYPE_FPALU)
4789 rtx pat = PATTERN (insn);
4790 rtx dep_pat = PATTERN (dep_insn);
4791 if (GET_CODE (pat) == PARALLEL)
4793 /* This happens for the fldXs,mb patterns. */
4794 pat = XVECEXP (pat, 0, 0);
4796 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4797 /* If this happens, we have to extend this to schedule
4798 optimally. Return 0 for now. */
4799 return 0;
4801 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4803 if (! recog_memoized (dep_insn))
4804 return 0;
4805 switch (get_attr_type (dep_insn))
4807 case TYPE_FPDIVSGL:
4808 case TYPE_FPDIVDBL:
4809 case TYPE_FPSQRTSGL:
4810 case TYPE_FPSQRTDBL:
4811 /* An ALU flop can't be issued until two cycles before a
4812 preceding divide or sqrt operation has finished if
4813 the target of the ALU flop is also the target of
4814 the divide or sqrt operation. */
4815 return insn_default_latency (dep_insn) - 2;
4817 default:
4818 return 0;
4823 /* For other output dependencies, the cost is 0. */
4824 return 0;
4826 default:
4827 gcc_unreachable ();
4831 /* Adjust scheduling priorities. We use this to try and keep addil
4832 and the next use of %r1 close together. */
4833 static int
4834 pa_adjust_priority (rtx insn, int priority)
4836 rtx set = single_set (insn);
4837 rtx src, dest;
4838 if (set)
4840 src = SET_SRC (set);
4841 dest = SET_DEST (set);
4842 if (GET_CODE (src) == LO_SUM
4843 && symbolic_operand (XEXP (src, 1), VOIDmode)
4844 && ! read_only_operand (XEXP (src, 1), VOIDmode))
4845 priority >>= 3;
4847 else if (GET_CODE (src) == MEM
4848 && GET_CODE (XEXP (src, 0)) == LO_SUM
4849 && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
4850 && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
4851 priority >>= 1;
4853 else if (GET_CODE (dest) == MEM
4854 && GET_CODE (XEXP (dest, 0)) == LO_SUM
4855 && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
4856 && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
4857 priority >>= 3;
4859 return priority;
4862 /* The 700 can only issue a single insn at a time.
4863 The 7XXX processors can issue two insns at a time.
4864 The 8000 can issue 4 insns at a time. */
4865 static int
4866 pa_issue_rate (void)
4868 switch (pa_cpu)
4870 case PROCESSOR_700: return 1;
4871 case PROCESSOR_7100: return 2;
4872 case PROCESSOR_7100LC: return 2;
4873 case PROCESSOR_7200: return 2;
4874 case PROCESSOR_7300: return 2;
4875 case PROCESSOR_8000: return 4;
4877 default:
4878 gcc_unreachable ();
4884 /* Return any length plus adjustment needed by INSN which already has
4885 its length computed as LENGTH. Return LENGTH if no adjustment is
4886 necessary.
4888 Also compute the length of an inline block move here as it is too
4889 complicated to express as a length attribute in pa.md. */
4891 pa_adjust_insn_length (rtx insn, int length)
4893 rtx pat = PATTERN (insn);
4895 /* If length is negative or undefined, provide initial length. */
4896 if ((unsigned int) length >= INT_MAX)
4898 if (GET_CODE (pat) == SEQUENCE)
4899 insn = XVECEXP (pat, 0, 0);
4901 switch (get_attr_type (insn))
4903 case TYPE_MILLI:
4904 length = pa_attr_length_millicode_call (insn);
4905 break;
4906 case TYPE_CALL:
4907 length = pa_attr_length_call (insn, 0);
4908 break;
4909 case TYPE_SIBCALL:
4910 length = pa_attr_length_call (insn, 1);
4911 break;
4912 case TYPE_DYNCALL:
4913 length = pa_attr_length_indirect_call (insn);
4914 break;
4915 case TYPE_SH_FUNC_ADRS:
4916 length = pa_attr_length_millicode_call (insn) + 20;
4917 break;
4918 default:
4919 gcc_unreachable ();
4923 /* Jumps inside switch tables which have unfilled delay slots need
4924 adjustment. */
4925 if (GET_CODE (insn) == JUMP_INSN
4926 && GET_CODE (pat) == PARALLEL
4927 && get_attr_type (insn) == TYPE_BTABLE_BRANCH)
4928 length += 4;
4929 /* Block move pattern. */
4930 else if (GET_CODE (insn) == INSN
4931 && GET_CODE (pat) == PARALLEL
4932 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4933 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4934 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
4935 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
4936 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
4937 length += compute_movmem_length (insn) - 4;
4938 /* Block clear pattern. */
4939 else if (GET_CODE (insn) == INSN
4940 && GET_CODE (pat) == PARALLEL
4941 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4942 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4943 && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
4944 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
4945 length += compute_clrmem_length (insn) - 4;
4946 /* Conditional branch with an unfilled delay slot. */
4947 else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
4949 /* Adjust a short backwards conditional with an unfilled delay slot. */
4950 if (GET_CODE (pat) == SET
4951 && length == 4
4952 && JUMP_LABEL (insn) != NULL_RTX
4953 && ! forward_branch_p (insn))
4954 length += 4;
4955 else if (GET_CODE (pat) == PARALLEL
4956 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
4957 && length == 4)
4958 length += 4;
4959 /* Adjust dbra insn with short backwards conditional branch with
4960 unfilled delay slot -- only for case where counter is in a
4961 general register register. */
4962 else if (GET_CODE (pat) == PARALLEL
4963 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
4964 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
4965 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
4966 && length == 4
4967 && ! forward_branch_p (insn))
4968 length += 4;
4970 return length;
4973 /* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook. */
4975 static bool
4976 pa_print_operand_punct_valid_p (unsigned char code)
4978 if (code == '@'
4979 || code == '#'
4980 || code == '*'
4981 || code == '^')
4982 return true;
4984 return false;
4987 /* Print operand X (an rtx) in assembler syntax to file FILE.
4988 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
4989 For `%' followed by punctuation, CODE is the punctuation and X is null. */
4991 void
4992 pa_print_operand (FILE *file, rtx x, int code)
4994 switch (code)
4996 case '#':
4997 /* Output a 'nop' if there's nothing for the delay slot. */
4998 if (dbr_sequence_length () == 0)
4999 fputs ("\n\tnop", file);
5000 return;
5001 case '*':
5002 /* Output a nullification completer if there's nothing for the */
5003 /* delay slot or nullification is requested. */
5004 if (dbr_sequence_length () == 0 ||
5005 (final_sequence &&
5006 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
5007 fputs (",n", file);
5008 return;
5009 case 'R':
5010 /* Print out the second register name of a register pair.
5011 I.e., R (6) => 7. */
5012 fputs (reg_names[REGNO (x) + 1], file);
5013 return;
5014 case 'r':
5015 /* A register or zero. */
5016 if (x == const0_rtx
5017 || (x == CONST0_RTX (DFmode))
5018 || (x == CONST0_RTX (SFmode)))
5020 fputs ("%r0", file);
5021 return;
5023 else
5024 break;
5025 case 'f':
5026 /* A register or zero (floating point). */
5027 if (x == const0_rtx
5028 || (x == CONST0_RTX (DFmode))
5029 || (x == CONST0_RTX (SFmode)))
5031 fputs ("%fr0", file);
5032 return;
5034 else
5035 break;
5036 case 'A':
5038 rtx xoperands[2];
5040 xoperands[0] = XEXP (XEXP (x, 0), 0);
5041 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
5042 pa_output_global_address (file, xoperands[1], 0);
5043 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
5044 return;
5047 case 'C': /* Plain (C)ondition */
5048 case 'X':
5049 switch (GET_CODE (x))
5051 case EQ:
5052 fputs ("=", file); break;
5053 case NE:
5054 fputs ("<>", file); break;
5055 case GT:
5056 fputs (">", file); break;
5057 case GE:
5058 fputs (">=", file); break;
5059 case GEU:
5060 fputs (">>=", file); break;
5061 case GTU:
5062 fputs (">>", file); break;
5063 case LT:
5064 fputs ("<", file); break;
5065 case LE:
5066 fputs ("<=", file); break;
5067 case LEU:
5068 fputs ("<<=", file); break;
5069 case LTU:
5070 fputs ("<<", file); break;
5071 default:
5072 gcc_unreachable ();
5074 return;
5075 case 'N': /* Condition, (N)egated */
5076 switch (GET_CODE (x))
5078 case EQ:
5079 fputs ("<>", file); break;
5080 case NE:
5081 fputs ("=", file); break;
5082 case GT:
5083 fputs ("<=", file); break;
5084 case GE:
5085 fputs ("<", file); break;
5086 case GEU:
5087 fputs ("<<", file); break;
5088 case GTU:
5089 fputs ("<<=", file); break;
5090 case LT:
5091 fputs (">=", file); break;
5092 case LE:
5093 fputs (">", file); break;
5094 case LEU:
5095 fputs (">>", file); break;
5096 case LTU:
5097 fputs (">>=", file); break;
5098 default:
5099 gcc_unreachable ();
5101 return;
5102 /* For floating point comparisons. Note that the output
5103 predicates are the complement of the desired mode. The
5104 conditions for GT, GE, LT, LE and LTGT cause an invalid
5105 operation exception if the result is unordered and this
5106 exception is enabled in the floating-point status register. */
5107 case 'Y':
5108 switch (GET_CODE (x))
5110 case EQ:
5111 fputs ("!=", file); break;
5112 case NE:
5113 fputs ("=", file); break;
5114 case GT:
5115 fputs ("!>", file); break;
5116 case GE:
5117 fputs ("!>=", file); break;
5118 case LT:
5119 fputs ("!<", file); break;
5120 case LE:
5121 fputs ("!<=", file); break;
5122 case LTGT:
5123 fputs ("!<>", file); break;
5124 case UNLE:
5125 fputs ("!?<=", file); break;
5126 case UNLT:
5127 fputs ("!?<", file); break;
5128 case UNGE:
5129 fputs ("!?>=", file); break;
5130 case UNGT:
5131 fputs ("!?>", file); break;
5132 case UNEQ:
5133 fputs ("!?=", file); break;
5134 case UNORDERED:
5135 fputs ("!?", file); break;
5136 case ORDERED:
5137 fputs ("?", file); break;
5138 default:
5139 gcc_unreachable ();
5141 return;
5142 case 'S': /* Condition, operands are (S)wapped. */
5143 switch (GET_CODE (x))
5145 case EQ:
5146 fputs ("=", file); break;
5147 case NE:
5148 fputs ("<>", file); break;
5149 case GT:
5150 fputs ("<", file); break;
5151 case GE:
5152 fputs ("<=", file); break;
5153 case GEU:
5154 fputs ("<<=", file); break;
5155 case GTU:
5156 fputs ("<<", file); break;
5157 case LT:
5158 fputs (">", file); break;
5159 case LE:
5160 fputs (">=", file); break;
5161 case LEU:
5162 fputs (">>=", file); break;
5163 case LTU:
5164 fputs (">>", file); break;
5165 default:
5166 gcc_unreachable ();
5168 return;
5169 case 'B': /* Condition, (B)oth swapped and negate. */
5170 switch (GET_CODE (x))
5172 case EQ:
5173 fputs ("<>", file); break;
5174 case NE:
5175 fputs ("=", file); break;
5176 case GT:
5177 fputs (">=", file); break;
5178 case GE:
5179 fputs (">", file); break;
5180 case GEU:
5181 fputs (">>", file); break;
5182 case GTU:
5183 fputs (">>=", file); break;
5184 case LT:
5185 fputs ("<=", file); break;
5186 case LE:
5187 fputs ("<", file); break;
5188 case LEU:
5189 fputs ("<<", file); break;
5190 case LTU:
5191 fputs ("<<=", file); break;
5192 default:
5193 gcc_unreachable ();
5195 return;
5196 case 'k':
5197 gcc_assert (GET_CODE (x) == CONST_INT);
5198 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
5199 return;
5200 case 'Q':
5201 gcc_assert (GET_CODE (x) == CONST_INT);
5202 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
5203 return;
5204 case 'L':
5205 gcc_assert (GET_CODE (x) == CONST_INT);
5206 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
5207 return;
5208 case 'O':
5209 gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
5210 fprintf (file, "%d", exact_log2 (INTVAL (x)));
5211 return;
5212 case 'p':
5213 gcc_assert (GET_CODE (x) == CONST_INT);
5214 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
5215 return;
5216 case 'P':
5217 gcc_assert (GET_CODE (x) == CONST_INT);
5218 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
5219 return;
5220 case 'I':
5221 if (GET_CODE (x) == CONST_INT)
5222 fputs ("i", file);
5223 return;
5224 case 'M':
5225 case 'F':
5226 switch (GET_CODE (XEXP (x, 0)))
5228 case PRE_DEC:
5229 case PRE_INC:
5230 if (ASSEMBLER_DIALECT == 0)
5231 fputs ("s,mb", file);
5232 else
5233 fputs (",mb", file);
5234 break;
5235 case POST_DEC:
5236 case POST_INC:
5237 if (ASSEMBLER_DIALECT == 0)
5238 fputs ("s,ma", file);
5239 else
5240 fputs (",ma", file);
5241 break;
5242 case PLUS:
5243 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5244 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5246 if (ASSEMBLER_DIALECT == 0)
5247 fputs ("x", file);
5249 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5250 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5252 if (ASSEMBLER_DIALECT == 0)
5253 fputs ("x,s", file);
5254 else
5255 fputs (",s", file);
5257 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
5258 fputs ("s", file);
5259 break;
5260 default:
5261 if (code == 'F' && ASSEMBLER_DIALECT == 0)
5262 fputs ("s", file);
5263 break;
5265 return;
5266 case 'G':
5267 pa_output_global_address (file, x, 0);
5268 return;
5269 case 'H':
5270 pa_output_global_address (file, x, 1);
5271 return;
5272 case 0: /* Don't do anything special */
5273 break;
5274 case 'Z':
5276 unsigned op[3];
5277 compute_zdepwi_operands (INTVAL (x), op);
5278 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5279 return;
5281 case 'z':
5283 unsigned op[3];
5284 compute_zdepdi_operands (INTVAL (x), op);
5285 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5286 return;
5288 case 'c':
5289 /* We can get here from a .vtable_inherit due to our
5290 CONSTANT_ADDRESS_P rejecting perfectly good constant
5291 addresses. */
5292 break;
5293 default:
5294 gcc_unreachable ();
5296 if (GET_CODE (x) == REG)
5298 fputs (reg_names [REGNO (x)], file);
5299 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5301 fputs ("R", file);
5302 return;
5304 if (FP_REG_P (x)
5305 && GET_MODE_SIZE (GET_MODE (x)) <= 4
5306 && (REGNO (x) & 1) == 0)
5307 fputs ("L", file);
5309 else if (GET_CODE (x) == MEM)
5311 int size = GET_MODE_SIZE (GET_MODE (x));
5312 rtx base = NULL_RTX;
5313 switch (GET_CODE (XEXP (x, 0)))
5315 case PRE_DEC:
5316 case POST_DEC:
5317 base = XEXP (XEXP (x, 0), 0);
5318 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5319 break;
5320 case PRE_INC:
5321 case POST_INC:
5322 base = XEXP (XEXP (x, 0), 0);
5323 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5324 break;
5325 case PLUS:
5326 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5327 fprintf (file, "%s(%s)",
5328 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5329 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5330 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5331 fprintf (file, "%s(%s)",
5332 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5333 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5334 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5335 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5337 /* Because the REG_POINTER flag can get lost during reload,
5338 GO_IF_LEGITIMATE_ADDRESS canonicalizes the order of the
5339 index and base registers in the combined move patterns. */
5340 rtx base = XEXP (XEXP (x, 0), 1);
5341 rtx index = XEXP (XEXP (x, 0), 0);
5343 fprintf (file, "%s(%s)",
5344 reg_names [REGNO (index)], reg_names [REGNO (base)]);
5346 else
5347 output_address (XEXP (x, 0));
5348 break;
5349 default:
5350 output_address (XEXP (x, 0));
5351 break;
5354 else
5355 output_addr_const (file, x);
5358 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
5360 void
5361 pa_output_global_address (FILE *file, rtx x, int round_constant)
5364 /* Imagine (high (const (plus ...))). */
5365 if (GET_CODE (x) == HIGH)
5366 x = XEXP (x, 0);
5368 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5369 output_addr_const (file, x);
5370 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5372 output_addr_const (file, x);
5373 fputs ("-$global$", file);
5375 else if (GET_CODE (x) == CONST)
5377 const char *sep = "";
5378 int offset = 0; /* assembler wants -$global$ at end */
5379 rtx base = NULL_RTX;
5381 switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
5383 case SYMBOL_REF:
5384 base = XEXP (XEXP (x, 0), 0);
5385 output_addr_const (file, base);
5386 break;
5387 case CONST_INT:
5388 offset = INTVAL (XEXP (XEXP (x, 0), 0));
5389 break;
5390 default:
5391 gcc_unreachable ();
5394 switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
5396 case SYMBOL_REF:
5397 base = XEXP (XEXP (x, 0), 1);
5398 output_addr_const (file, base);
5399 break;
5400 case CONST_INT:
5401 offset = INTVAL (XEXP (XEXP (x, 0), 1));
5402 break;
5403 default:
5404 gcc_unreachable ();
5407 /* How bogus. The compiler is apparently responsible for
5408 rounding the constant if it uses an LR field selector.
5410 The linker and/or assembler seem a better place since
5411 they have to do this kind of thing already.
5413 If we fail to do this, HP's optimizing linker may eliminate
5414 an addil, but not update the ldw/stw/ldo instruction that
5415 uses the result of the addil. */
5416 if (round_constant)
5417 offset = ((offset + 0x1000) & ~0x1fff);
5419 switch (GET_CODE (XEXP (x, 0)))
5421 case PLUS:
5422 if (offset < 0)
5424 offset = -offset;
5425 sep = "-";
5427 else
5428 sep = "+";
5429 break;
5431 case MINUS:
5432 gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5433 sep = "-";
5434 break;
5436 default:
5437 gcc_unreachable ();
5440 if (!read_only_operand (base, VOIDmode) && !flag_pic)
5441 fputs ("-$global$", file);
5442 if (offset)
5443 fprintf (file, "%s%d", sep, offset);
5445 else
5446 output_addr_const (file, x);
5449 /* Output boilerplate text to appear at the beginning of the file.
5450 There are several possible versions. */
5451 #define aputs(x) fputs(x, asm_out_file)
5452 static inline void
5453 pa_file_start_level (void)
5455 if (TARGET_64BIT)
5456 aputs ("\t.LEVEL 2.0w\n");
5457 else if (TARGET_PA_20)
5458 aputs ("\t.LEVEL 2.0\n");
5459 else if (TARGET_PA_11)
5460 aputs ("\t.LEVEL 1.1\n");
5461 else
5462 aputs ("\t.LEVEL 1.0\n");
5465 static inline void
5466 pa_file_start_space (int sortspace)
5468 aputs ("\t.SPACE $PRIVATE$");
5469 if (sortspace)
5470 aputs (",SORT=16");
5471 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31");
5472 if (flag_tm)
5473 aputs ("\n\t.SUBSPA $TM_CLONE_TABLE$,QUAD=1,ALIGN=8,ACCESS=31");
5474 aputs ("\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5475 "\n\t.SPACE $TEXT$");
5476 if (sortspace)
5477 aputs (",SORT=8");
5478 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5479 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5482 static inline void
5483 pa_file_start_file (int want_version)
5485 if (write_symbols != NO_DEBUG)
5487 output_file_directive (asm_out_file, main_input_filename);
5488 if (want_version)
5489 aputs ("\t.version\t\"01.01\"\n");
5493 static inline void
5494 pa_file_start_mcount (const char *aswhat)
5496 if (profile_flag)
5497 fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5500 static void
5501 pa_elf_file_start (void)
5503 pa_file_start_level ();
5504 pa_file_start_mcount ("ENTRY");
5505 pa_file_start_file (0);
5508 static void
5509 pa_som_file_start (void)
5511 pa_file_start_level ();
5512 pa_file_start_space (0);
5513 aputs ("\t.IMPORT $global$,DATA\n"
5514 "\t.IMPORT $$dyncall,MILLICODE\n");
5515 pa_file_start_mcount ("CODE");
5516 pa_file_start_file (0);
5519 static void
5520 pa_linux_file_start (void)
5522 pa_file_start_file (1);
5523 pa_file_start_level ();
5524 pa_file_start_mcount ("CODE");
5527 static void
5528 pa_hpux64_gas_file_start (void)
5530 pa_file_start_level ();
5531 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5532 if (profile_flag)
5533 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5534 #endif
5535 pa_file_start_file (1);
5538 static void
5539 pa_hpux64_hpas_file_start (void)
5541 pa_file_start_level ();
5542 pa_file_start_space (1);
5543 pa_file_start_mcount ("CODE");
5544 pa_file_start_file (0);
5546 #undef aputs
5548 /* Search the deferred plabel list for SYMBOL and return its internal
5549 label. If an entry for SYMBOL is not found, a new entry is created. */
5552 pa_get_deferred_plabel (rtx symbol)
5554 const char *fname = XSTR (symbol, 0);
5555 size_t i;
5557 /* See if we have already put this function on the list of deferred
5558 plabels. This list is generally small, so a liner search is not
5559 too ugly. If it proves too slow replace it with something faster. */
5560 for (i = 0; i < n_deferred_plabels; i++)
5561 if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
5562 break;
5564 /* If the deferred plabel list is empty, or this entry was not found
5565 on the list, create a new entry on the list. */
5566 if (deferred_plabels == NULL || i == n_deferred_plabels)
5568 tree id;
5570 if (deferred_plabels == 0)
5571 deferred_plabels = ggc_alloc_deferred_plabel ();
5572 else
5573 deferred_plabels = GGC_RESIZEVEC (struct deferred_plabel,
5574 deferred_plabels,
5575 n_deferred_plabels + 1);
5577 i = n_deferred_plabels++;
5578 deferred_plabels[i].internal_label = gen_label_rtx ();
5579 deferred_plabels[i].symbol = symbol;
5581 /* Gross. We have just implicitly taken the address of this
5582 function. Mark it in the same manner as assemble_name. */
5583 id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5584 if (id)
5585 mark_referenced (id);
5588 return deferred_plabels[i].internal_label;
5591 static void
5592 output_deferred_plabels (void)
5594 size_t i;
5596 /* If we have some deferred plabels, then we need to switch into the
5597 data or readonly data section, and align it to a 4 byte boundary
5598 before outputting the deferred plabels. */
5599 if (n_deferred_plabels)
5601 switch_to_section (flag_pic ? data_section : readonly_data_section);
5602 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5605 /* Now output the deferred plabels. */
5606 for (i = 0; i < n_deferred_plabels; i++)
5608 targetm.asm_out.internal_label (asm_out_file, "L",
5609 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5610 assemble_integer (deferred_plabels[i].symbol,
5611 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5615 /* Initialize optabs to point to emulation routines. */
5617 static void
5618 pa_init_libfuncs (void)
5620 if (HPUX_LONG_DOUBLE_LIBRARY)
5622 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5623 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5624 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5625 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5626 set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5627 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5628 set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5629 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5630 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5632 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5633 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5634 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5635 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5636 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5637 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5638 set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5640 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5641 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5642 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5643 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5645 set_conv_libfunc (sfix_optab, SImode, TFmode,
5646 TARGET_64BIT ? "__U_Qfcnvfxt_quad_to_sgl"
5647 : "_U_Qfcnvfxt_quad_to_sgl");
5648 set_conv_libfunc (sfix_optab, DImode, TFmode,
5649 "_U_Qfcnvfxt_quad_to_dbl");
5650 set_conv_libfunc (ufix_optab, SImode, TFmode,
5651 "_U_Qfcnvfxt_quad_to_usgl");
5652 set_conv_libfunc (ufix_optab, DImode, TFmode,
5653 "_U_Qfcnvfxt_quad_to_udbl");
5655 set_conv_libfunc (sfloat_optab, TFmode, SImode,
5656 "_U_Qfcnvxf_sgl_to_quad");
5657 set_conv_libfunc (sfloat_optab, TFmode, DImode,
5658 "_U_Qfcnvxf_dbl_to_quad");
5659 set_conv_libfunc (ufloat_optab, TFmode, SImode,
5660 "_U_Qfcnvxf_usgl_to_quad");
5661 set_conv_libfunc (ufloat_optab, TFmode, DImode,
5662 "_U_Qfcnvxf_udbl_to_quad");
5665 if (TARGET_SYNC_LIBCALL)
5666 init_sync_libfuncs (UNITS_PER_WORD);
5669 /* HP's millicode routines mean something special to the assembler.
5670 Keep track of which ones we have used. */
5672 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5673 static void import_milli (enum millicodes);
5674 static char imported[(int) end1000];
5675 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5676 static const char import_string[] = ".IMPORT $$....,MILLICODE";
5677 #define MILLI_START 10
5679 static void
5680 import_milli (enum millicodes code)
5682 char str[sizeof (import_string)];
5684 if (!imported[(int) code])
5686 imported[(int) code] = 1;
5687 strcpy (str, import_string);
5688 strncpy (str + MILLI_START, milli_names[(int) code], 4);
5689 output_asm_insn (str, 0);
5693 /* The register constraints have put the operands and return value in
5694 the proper registers. */
5696 const char *
5697 pa_output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx insn)
5699 import_milli (mulI);
5700 return pa_output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5703 /* Emit the rtl for doing a division by a constant. */
5705 /* Do magic division millicodes exist for this value? */
5706 const int pa_magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5708 /* We'll use an array to keep track of the magic millicodes and
5709 whether or not we've used them already. [n][0] is signed, [n][1] is
5710 unsigned. */
5712 static int div_milli[16][2];
5715 pa_emit_hpdiv_const (rtx *operands, int unsignedp)
5717 if (GET_CODE (operands[2]) == CONST_INT
5718 && INTVAL (operands[2]) > 0
5719 && INTVAL (operands[2]) < 16
5720 && pa_magic_milli[INTVAL (operands[2])])
5722 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5724 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
5725 emit
5726 (gen_rtx_PARALLEL
5727 (VOIDmode,
5728 gen_rtvec (6, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29),
5729 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5730 SImode,
5731 gen_rtx_REG (SImode, 26),
5732 operands[2])),
5733 gen_rtx_CLOBBER (VOIDmode, operands[4]),
5734 gen_rtx_CLOBBER (VOIDmode, operands[3]),
5735 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5736 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
5737 gen_rtx_CLOBBER (VOIDmode, ret))));
5738 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
5739 return 1;
5741 return 0;
5744 const char *
5745 pa_output_div_insn (rtx *operands, int unsignedp, rtx insn)
5747 int divisor;
5749 /* If the divisor is a constant, try to use one of the special
5750 opcodes .*/
5751 if (GET_CODE (operands[0]) == CONST_INT)
5753 static char buf[100];
5754 divisor = INTVAL (operands[0]);
5755 if (!div_milli[divisor][unsignedp])
5757 div_milli[divisor][unsignedp] = 1;
5758 if (unsignedp)
5759 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
5760 else
5761 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
5763 if (unsignedp)
5765 sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
5766 INTVAL (operands[0]));
5767 return pa_output_millicode_call (insn,
5768 gen_rtx_SYMBOL_REF (SImode, buf));
5770 else
5772 sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
5773 INTVAL (operands[0]));
5774 return pa_output_millicode_call (insn,
5775 gen_rtx_SYMBOL_REF (SImode, buf));
5778 /* Divisor isn't a special constant. */
5779 else
5781 if (unsignedp)
5783 import_milli (divU);
5784 return pa_output_millicode_call (insn,
5785 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
5787 else
5789 import_milli (divI);
5790 return pa_output_millicode_call (insn,
5791 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
5796 /* Output a $$rem millicode to do mod. */
5798 const char *
5799 pa_output_mod_insn (int unsignedp, rtx insn)
5801 if (unsignedp)
5803 import_milli (remU);
5804 return pa_output_millicode_call (insn,
5805 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
5807 else
5809 import_milli (remI);
5810 return pa_output_millicode_call (insn,
5811 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
5815 void
5816 pa_output_arg_descriptor (rtx call_insn)
5818 const char *arg_regs[4];
5819 enum machine_mode arg_mode;
5820 rtx link;
5821 int i, output_flag = 0;
5822 int regno;
5824 /* We neither need nor want argument location descriptors for the
5825 64bit runtime environment or the ELF32 environment. */
5826 if (TARGET_64BIT || TARGET_ELF32)
5827 return;
5829 for (i = 0; i < 4; i++)
5830 arg_regs[i] = 0;
5832 /* Specify explicitly that no argument relocations should take place
5833 if using the portable runtime calling conventions. */
5834 if (TARGET_PORTABLE_RUNTIME)
5836 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5837 asm_out_file);
5838 return;
5841 gcc_assert (GET_CODE (call_insn) == CALL_INSN);
5842 for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
5843 link; link = XEXP (link, 1))
5845 rtx use = XEXP (link, 0);
5847 if (! (GET_CODE (use) == USE
5848 && GET_CODE (XEXP (use, 0)) == REG
5849 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5850 continue;
5852 arg_mode = GET_MODE (XEXP (use, 0));
5853 regno = REGNO (XEXP (use, 0));
5854 if (regno >= 23 && regno <= 26)
5856 arg_regs[26 - regno] = "GR";
5857 if (arg_mode == DImode)
5858 arg_regs[25 - regno] = "GR";
5860 else if (regno >= 32 && regno <= 39)
5862 if (arg_mode == SFmode)
5863 arg_regs[(regno - 32) / 2] = "FR";
5864 else
5866 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
5867 arg_regs[(regno - 34) / 2] = "FR";
5868 arg_regs[(regno - 34) / 2 + 1] = "FU";
5869 #else
5870 arg_regs[(regno - 34) / 2] = "FU";
5871 arg_regs[(regno - 34) / 2 + 1] = "FR";
5872 #endif
5876 fputs ("\t.CALL ", asm_out_file);
5877 for (i = 0; i < 4; i++)
5879 if (arg_regs[i])
5881 if (output_flag++)
5882 fputc (',', asm_out_file);
5883 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
5886 fputc ('\n', asm_out_file);
5889 /* Inform reload about cases where moving X with a mode MODE to a register in
5890 RCLASS requires an extra scratch or immediate register. Return the class
5891 needed for the immediate register. */
5893 static reg_class_t
5894 pa_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
5895 enum machine_mode mode, secondary_reload_info *sri)
5897 int regno;
5898 enum reg_class rclass = (enum reg_class) rclass_i;
5900 /* Handle the easy stuff first. */
5901 if (rclass == R1_REGS)
5902 return NO_REGS;
5904 if (REG_P (x))
5906 regno = REGNO (x);
5907 if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
5908 return NO_REGS;
5910 else
5911 regno = -1;
5913 /* If we have something like (mem (mem (...)), we can safely assume the
5914 inner MEM will end up in a general register after reloading, so there's
5915 no need for a secondary reload. */
5916 if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
5917 return NO_REGS;
5919 /* Trying to load a constant into a FP register during PIC code
5920 generation requires %r1 as a scratch register. */
5921 if (flag_pic
5922 && (mode == SImode || mode == DImode)
5923 && FP_REG_CLASS_P (rclass)
5924 && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
5926 sri->icode = (mode == SImode ? CODE_FOR_reload_insi_r1
5927 : CODE_FOR_reload_indi_r1);
5928 return NO_REGS;
5931 /* Secondary reloads of symbolic operands require %r1 as a scratch
5932 register when we're generating PIC code and when the operand isn't
5933 readonly. */
5934 if (pa_symbolic_expression_p (x))
5936 if (GET_CODE (x) == HIGH)
5937 x = XEXP (x, 0);
5939 if (flag_pic || !read_only_operand (x, VOIDmode))
5941 gcc_assert (mode == SImode || mode == DImode);
5942 sri->icode = (mode == SImode ? CODE_FOR_reload_insi_r1
5943 : CODE_FOR_reload_indi_r1);
5944 return NO_REGS;
5948 /* Profiling showed the PA port spends about 1.3% of its compilation
5949 time in true_regnum from calls inside pa_secondary_reload_class. */
5950 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
5951 regno = true_regnum (x);
5953 /* In order to allow 14-bit displacements in integer loads and stores,
5954 we need to prevent reload from generating out of range integer mode
5955 loads and stores to the floating point registers. Previously, we
5956 used to call for a secondary reload and have pa_emit_move_sequence()
5957 fix the instruction sequence. However, reload occasionally wouldn't
5958 generate the reload and we would end up with an invalid REG+D memory
5959 address. So, now we use an intermediate general register for most
5960 memory loads and stores. */
5961 if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
5962 && GET_MODE_CLASS (mode) == MODE_INT
5963 && FP_REG_CLASS_P (rclass))
5965 /* Reload passes (mem:SI (reg/f:DI 30 %r30) when it wants to check
5966 the secondary reload needed for a pseudo. It never passes a
5967 REG+D address. */
5968 if (GET_CODE (x) == MEM)
5970 x = XEXP (x, 0);
5972 /* We don't need an intermediate for indexed and LO_SUM DLT
5973 memory addresses. When INT14_OK_STRICT is true, it might
5974 appear that we could directly allow register indirect
5975 memory addresses. However, this doesn't work because we
5976 don't support SUBREGs in floating-point register copies
5977 and reload doesn't tell us when it's going to use a SUBREG. */
5978 if (IS_INDEX_ADDR_P (x)
5979 || IS_LO_SUM_DLT_ADDR_P (x))
5980 return NO_REGS;
5982 /* Otherwise, we need an intermediate general register. */
5983 return GENERAL_REGS;
5986 /* Request a secondary reload with a general scratch register
5987 for everything else. ??? Could symbolic operands be handled
5988 directly when generating non-pic PA 2.0 code? */
5989 sri->icode = (in_p
5990 ? direct_optab_handler (reload_in_optab, mode)
5991 : direct_optab_handler (reload_out_optab, mode));
5992 return NO_REGS;
5995 /* A SAR<->FP register copy requires an intermediate general register
5996 and secondary memory. We need a secondary reload with a general
5997 scratch register for spills. */
5998 if (rclass == SHIFT_REGS)
6000 /* Handle spill. */
6001 if (regno >= FIRST_PSEUDO_REGISTER || regno < 0)
6003 sri->icode = (in_p
6004 ? direct_optab_handler (reload_in_optab, mode)
6005 : direct_optab_handler (reload_out_optab, mode));
6006 return NO_REGS;
6009 /* Handle FP copy. */
6010 if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))
6011 return GENERAL_REGS;
6014 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
6015 && REGNO_REG_CLASS (regno) == SHIFT_REGS
6016 && FP_REG_CLASS_P (rclass))
6017 return GENERAL_REGS;
6019 return NO_REGS;
6022 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY. The argument pointer
6023 is only marked as live on entry by df-scan when it is a fixed
6024 register. It isn't a fixed register in the 64-bit runtime,
6025 so we need to mark it here. */
6027 static void
6028 pa_extra_live_on_entry (bitmap regs)
6030 if (TARGET_64BIT)
6031 bitmap_set_bit (regs, ARG_POINTER_REGNUM);
6034 /* Implement EH_RETURN_HANDLER_RTX. The MEM needs to be volatile
6035 to prevent it from being deleted. */
6038 pa_eh_return_handler_rtx (void)
6040 rtx tmp;
6042 tmp = gen_rtx_PLUS (word_mode, hard_frame_pointer_rtx,
6043 TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20));
6044 tmp = gen_rtx_MEM (word_mode, tmp);
6045 tmp->volatil = 1;
6046 return tmp;
6049 /* In the 32-bit runtime, arguments larger than eight bytes are passed
6050 by invisible reference. As a GCC extension, we also pass anything
6051 with a zero or variable size by reference.
6053 The 64-bit runtime does not describe passing any types by invisible
6054 reference. The internals of GCC can't currently handle passing
6055 empty structures, and zero or variable length arrays when they are
6056 not passed entirely on the stack or by reference. Thus, as a GCC
6057 extension, we pass these types by reference. The HP compiler doesn't
6058 support these types, so hopefully there shouldn't be any compatibility
6059 issues. This may have to be revisited when HP releases a C99 compiler
6060 or updates the ABI. */
6062 static bool
6063 pa_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
6064 enum machine_mode mode, const_tree type,
6065 bool named ATTRIBUTE_UNUSED)
6067 HOST_WIDE_INT size;
6069 if (type)
6070 size = int_size_in_bytes (type);
6071 else
6072 size = GET_MODE_SIZE (mode);
6074 if (TARGET_64BIT)
6075 return size <= 0;
6076 else
6077 return size <= 0 || size > 8;
6080 enum direction
6081 pa_function_arg_padding (enum machine_mode mode, const_tree type)
6083 if (mode == BLKmode
6084 || (TARGET_64BIT
6085 && type
6086 && (AGGREGATE_TYPE_P (type)
6087 || TREE_CODE (type) == COMPLEX_TYPE
6088 || TREE_CODE (type) == VECTOR_TYPE)))
6090 /* Return none if justification is not required. */
6091 if (type
6092 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
6093 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
6094 return none;
6096 /* The directions set here are ignored when a BLKmode argument larger
6097 than a word is placed in a register. Different code is used for
6098 the stack and registers. This makes it difficult to have a
6099 consistent data representation for both the stack and registers.
6100 For both runtimes, the justification and padding for arguments on
6101 the stack and in registers should be identical. */
6102 if (TARGET_64BIT)
6103 /* The 64-bit runtime specifies left justification for aggregates. */
6104 return upward;
6105 else
6106 /* The 32-bit runtime architecture specifies right justification.
6107 When the argument is passed on the stack, the argument is padded
6108 with garbage on the left. The HP compiler pads with zeros. */
6109 return downward;
6112 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
6113 return downward;
6114 else
6115 return none;
6119 /* Do what is necessary for `va_start'. We look at the current function
6120 to determine if stdargs or varargs is used and fill in an initial
6121 va_list. A pointer to this constructor is returned. */
6123 static rtx
6124 hppa_builtin_saveregs (void)
6126 rtx offset, dest;
6127 tree fntype = TREE_TYPE (current_function_decl);
6128 int argadj = ((!stdarg_p (fntype))
6129 ? UNITS_PER_WORD : 0);
6131 if (argadj)
6132 offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, argadj);
6133 else
6134 offset = crtl->args.arg_offset_rtx;
6136 if (TARGET_64BIT)
6138 int i, off;
6140 /* Adjust for varargs/stdarg differences. */
6141 if (argadj)
6142 offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, -argadj);
6143 else
6144 offset = crtl->args.arg_offset_rtx;
6146 /* We need to save %r26 .. %r19 inclusive starting at offset -64
6147 from the incoming arg pointer and growing to larger addresses. */
6148 for (i = 26, off = -64; i >= 19; i--, off += 8)
6149 emit_move_insn (gen_rtx_MEM (word_mode,
6150 plus_constant (Pmode,
6151 arg_pointer_rtx, off)),
6152 gen_rtx_REG (word_mode, i));
6154 /* The incoming args pointer points just beyond the flushback area;
6155 normally this is not a serious concern. However, when we are doing
6156 varargs/stdargs we want to make the arg pointer point to the start
6157 of the incoming argument area. */
6158 emit_move_insn (virtual_incoming_args_rtx,
6159 plus_constant (Pmode, arg_pointer_rtx, -64));
6161 /* Now return a pointer to the first anonymous argument. */
6162 return copy_to_reg (expand_binop (Pmode, add_optab,
6163 virtual_incoming_args_rtx,
6164 offset, 0, 0, OPTAB_LIB_WIDEN));
6167 /* Store general registers on the stack. */
6168 dest = gen_rtx_MEM (BLKmode,
6169 plus_constant (Pmode, crtl->args.internal_arg_pointer,
6170 -16));
6171 set_mem_alias_set (dest, get_varargs_alias_set ());
6172 set_mem_align (dest, BITS_PER_WORD);
6173 move_block_from_reg (23, dest, 4);
6175 /* move_block_from_reg will emit code to store the argument registers
6176 individually as scalar stores.
6178 However, other insns may later load from the same addresses for
6179 a structure load (passing a struct to a varargs routine).
6181 The alias code assumes that such aliasing can never happen, so we
6182 have to keep memory referencing insns from moving up beyond the
6183 last argument register store. So we emit a blockage insn here. */
6184 emit_insn (gen_blockage ());
6186 return copy_to_reg (expand_binop (Pmode, add_optab,
6187 crtl->args.internal_arg_pointer,
6188 offset, 0, 0, OPTAB_LIB_WIDEN));
6191 static void
6192 hppa_va_start (tree valist, rtx nextarg)
6194 nextarg = expand_builtin_saveregs ();
6195 std_expand_builtin_va_start (valist, nextarg);
6198 static tree
6199 hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6200 gimple_seq *post_p)
6202 if (TARGET_64BIT)
6204 /* Args grow upward. We can use the generic routines. */
6205 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6207 else /* !TARGET_64BIT */
6209 tree ptr = build_pointer_type (type);
6210 tree valist_type;
6211 tree t, u;
6212 unsigned int size, ofs;
6213 bool indirect;
6215 indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0);
6216 if (indirect)
6218 type = ptr;
6219 ptr = build_pointer_type (type);
6221 size = int_size_in_bytes (type);
6222 valist_type = TREE_TYPE (valist);
6224 /* Args grow down. Not handled by generic routines. */
6226 u = fold_convert (sizetype, size_in_bytes (type));
6227 u = fold_build1 (NEGATE_EXPR, sizetype, u);
6228 t = fold_build_pointer_plus (valist, u);
6230 /* Align to 4 or 8 byte boundary depending on argument size. */
6232 u = build_int_cst (TREE_TYPE (t), (HOST_WIDE_INT)(size > 4 ? -8 : -4));
6233 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u);
6234 t = fold_convert (valist_type, t);
6236 t = build2 (MODIFY_EXPR, valist_type, valist, t);
6238 ofs = (8 - size) % 4;
6239 if (ofs != 0)
6240 t = fold_build_pointer_plus_hwi (t, ofs);
6242 t = fold_convert (ptr, t);
6243 t = build_va_arg_indirect_ref (t);
6245 if (indirect)
6246 t = build_va_arg_indirect_ref (t);
6248 return t;
6252 /* True if MODE is valid for the target. By "valid", we mean able to
6253 be manipulated in non-trivial ways. In particular, this means all
6254 the arithmetic is supported.
6256 Currently, TImode is not valid as the HP 64-bit runtime documentation
6257 doesn't document the alignment and calling conventions for this type.
6258 Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
6259 2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE. */
6261 static bool
6262 pa_scalar_mode_supported_p (enum machine_mode mode)
6264 int precision = GET_MODE_PRECISION (mode);
6266 switch (GET_MODE_CLASS (mode))
6268 case MODE_PARTIAL_INT:
6269 case MODE_INT:
6270 if (precision == CHAR_TYPE_SIZE)
6271 return true;
6272 if (precision == SHORT_TYPE_SIZE)
6273 return true;
6274 if (precision == INT_TYPE_SIZE)
6275 return true;
6276 if (precision == LONG_TYPE_SIZE)
6277 return true;
6278 if (precision == LONG_LONG_TYPE_SIZE)
6279 return true;
6280 return false;
6282 case MODE_FLOAT:
6283 if (precision == FLOAT_TYPE_SIZE)
6284 return true;
6285 if (precision == DOUBLE_TYPE_SIZE)
6286 return true;
6287 if (precision == LONG_DOUBLE_TYPE_SIZE)
6288 return true;
6289 return false;
6291 case MODE_DECIMAL_FLOAT:
6292 return false;
6294 default:
6295 gcc_unreachable ();
6299 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
6300 it branches into the delay slot. Otherwise, return FALSE. */
6302 static bool
6303 branch_to_delay_slot_p (rtx insn)
6305 rtx jump_insn;
6307 if (dbr_sequence_length ())
6308 return FALSE;
6310 jump_insn = next_active_insn (JUMP_LABEL (insn));
6311 while (insn)
6313 insn = next_active_insn (insn);
6314 if (jump_insn == insn)
6315 return TRUE;
6317 /* We can't rely on the length of asms. So, we return FALSE when
6318 the branch is followed by an asm. */
6319 if (!insn
6320 || GET_CODE (PATTERN (insn)) == ASM_INPUT
6321 || extract_asm_operands (PATTERN (insn)) != NULL_RTX
6322 || get_attr_length (insn) > 0)
6323 break;
6326 return FALSE;
6329 /* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot.
6331 This occurs when INSN has an unfilled delay slot and is followed
6332 by an asm. Disaster can occur if the asm is empty and the jump
6333 branches into the delay slot. So, we add a nop in the delay slot
6334 when this occurs. */
6336 static bool
6337 branch_needs_nop_p (rtx insn)
6339 rtx jump_insn;
6341 if (dbr_sequence_length ())
6342 return FALSE;
6344 jump_insn = next_active_insn (JUMP_LABEL (insn));
6345 while (insn)
6347 insn = next_active_insn (insn);
6348 if (!insn || jump_insn == insn)
6349 return TRUE;
6351 if (!(GET_CODE (PATTERN (insn)) == ASM_INPUT
6352 || extract_asm_operands (PATTERN (insn)) != NULL_RTX)
6353 && get_attr_length (insn) > 0)
6354 break;
6357 return FALSE;
6360 /* Return TRUE if INSN, a forward jump insn, can use nullification
6361 to skip the following instruction. This avoids an extra cycle due
6362 to a mis-predicted branch when we fall through. */
6364 static bool
6365 use_skip_p (rtx insn)
6367 rtx jump_insn = next_active_insn (JUMP_LABEL (insn));
6369 while (insn)
6371 insn = next_active_insn (insn);
6373 /* We can't rely on the length of asms, so we can't skip asms. */
6374 if (!insn
6375 || GET_CODE (PATTERN (insn)) == ASM_INPUT
6376 || extract_asm_operands (PATTERN (insn)) != NULL_RTX)
6377 break;
6378 if (get_attr_length (insn) == 4
6379 && jump_insn == next_active_insn (insn))
6380 return TRUE;
6381 if (get_attr_length (insn) > 0)
6382 break;
6385 return FALSE;
6388 /* This routine handles all the normal conditional branch sequences we
6389 might need to generate. It handles compare immediate vs compare
6390 register, nullification of delay slots, varying length branches,
6391 negated branches, and all combinations of the above. It returns the
6392 output appropriate to emit the branch corresponding to all given
6393 parameters. */
6395 const char *
6396 pa_output_cbranch (rtx *operands, int negated, rtx insn)
6398 static char buf[100];
6399 bool useskip;
6400 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6401 int length = get_attr_length (insn);
6402 int xdelay;
6404 /* A conditional branch to the following instruction (e.g. the delay slot)
6405 is asking for a disaster. This can happen when not optimizing and
6406 when jump optimization fails.
6408 While it is usually safe to emit nothing, this can fail if the
6409 preceding instruction is a nullified branch with an empty delay
6410 slot and the same branch target as this branch. We could check
6411 for this but jump optimization should eliminate nop jumps. It
6412 is always safe to emit a nop. */
6413 if (branch_to_delay_slot_p (insn))
6414 return "nop";
6416 /* The doubleword form of the cmpib instruction doesn't have the LEU
6417 and GTU conditions while the cmpb instruction does. Since we accept
6418 zero for cmpb, we must ensure that we use cmpb for the comparison. */
6419 if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6420 operands[2] = gen_rtx_REG (DImode, 0);
6421 if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
6422 operands[1] = gen_rtx_REG (DImode, 0);
6424 /* If this is a long branch with its delay slot unfilled, set `nullify'
6425 as it can nullify the delay slot and save a nop. */
6426 if (length == 8 && dbr_sequence_length () == 0)
6427 nullify = 1;
6429 /* If this is a short forward conditional branch which did not get
6430 its delay slot filled, the delay slot can still be nullified. */
6431 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6432 nullify = forward_branch_p (insn);
6434 /* A forward branch over a single nullified insn can be done with a
6435 comclr instruction. This avoids a single cycle penalty due to
6436 mis-predicted branch if we fall through (branch not taken). */
6437 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6439 switch (length)
6441 /* All short conditional branches except backwards with an unfilled
6442 delay slot. */
6443 case 4:
6444 if (useskip)
6445 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6446 else
6447 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6448 if (GET_MODE (operands[1]) == DImode)
6449 strcat (buf, "*");
6450 if (negated)
6451 strcat (buf, "%B3");
6452 else
6453 strcat (buf, "%S3");
6454 if (useskip)
6455 strcat (buf, " %2,%r1,%%r0");
6456 else if (nullify)
6458 if (branch_needs_nop_p (insn))
6459 strcat (buf, ",n %2,%r1,%0%#");
6460 else
6461 strcat (buf, ",n %2,%r1,%0");
6463 else
6464 strcat (buf, " %2,%r1,%0");
6465 break;
6467 /* All long conditionals. Note a short backward branch with an
6468 unfilled delay slot is treated just like a long backward branch
6469 with an unfilled delay slot. */
6470 case 8:
6471 /* Handle weird backwards branch with a filled delay slot
6472 which is nullified. */
6473 if (dbr_sequence_length () != 0
6474 && ! forward_branch_p (insn)
6475 && nullify)
6477 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6478 if (GET_MODE (operands[1]) == DImode)
6479 strcat (buf, "*");
6480 if (negated)
6481 strcat (buf, "%S3");
6482 else
6483 strcat (buf, "%B3");
6484 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6486 /* Handle short backwards branch with an unfilled delay slot.
6487 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6488 taken and untaken branches. */
6489 else if (dbr_sequence_length () == 0
6490 && ! forward_branch_p (insn)
6491 && INSN_ADDRESSES_SET_P ()
6492 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6493 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6495 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6496 if (GET_MODE (operands[1]) == DImode)
6497 strcat (buf, "*");
6498 if (negated)
6499 strcat (buf, "%B3 %2,%r1,%0%#");
6500 else
6501 strcat (buf, "%S3 %2,%r1,%0%#");
6503 else
6505 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6506 if (GET_MODE (operands[1]) == DImode)
6507 strcat (buf, "*");
6508 if (negated)
6509 strcat (buf, "%S3");
6510 else
6511 strcat (buf, "%B3");
6512 if (nullify)
6513 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6514 else
6515 strcat (buf, " %2,%r1,%%r0\n\tb %0");
6517 break;
6519 default:
6520 /* The reversed conditional branch must branch over one additional
6521 instruction if the delay slot is filled and needs to be extracted
6522 by pa_output_lbranch. If the delay slot is empty or this is a
6523 nullified forward branch, the instruction after the reversed
6524 condition branch must be nullified. */
6525 if (dbr_sequence_length () == 0
6526 || (nullify && forward_branch_p (insn)))
6528 nullify = 1;
6529 xdelay = 0;
6530 operands[4] = GEN_INT (length);
6532 else
6534 xdelay = 1;
6535 operands[4] = GEN_INT (length + 4);
6538 /* Create a reversed conditional branch which branches around
6539 the following insns. */
6540 if (GET_MODE (operands[1]) != DImode)
6542 if (nullify)
6544 if (negated)
6545 strcpy (buf,
6546 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6547 else
6548 strcpy (buf,
6549 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6551 else
6553 if (negated)
6554 strcpy (buf,
6555 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6556 else
6557 strcpy (buf,
6558 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6561 else
6563 if (nullify)
6565 if (negated)
6566 strcpy (buf,
6567 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6568 else
6569 strcpy (buf,
6570 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6572 else
6574 if (negated)
6575 strcpy (buf,
6576 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6577 else
6578 strcpy (buf,
6579 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6583 output_asm_insn (buf, operands);
6584 return pa_output_lbranch (operands[0], insn, xdelay);
6586 return buf;
6589 /* This routine handles output of long unconditional branches that
6590 exceed the maximum range of a simple branch instruction. Since
6591 we don't have a register available for the branch, we save register
6592 %r1 in the frame marker, load the branch destination DEST into %r1,
6593 execute the branch, and restore %r1 in the delay slot of the branch.
6595 Since long branches may have an insn in the delay slot and the
6596 delay slot is used to restore %r1, we in general need to extract
6597 this insn and execute it before the branch. However, to facilitate
6598 use of this function by conditional branches, we also provide an
6599 option to not extract the delay insn so that it will be emitted
6600 after the long branch. So, if there is an insn in the delay slot,
6601 it is extracted if XDELAY is nonzero.
6603 The lengths of the various long-branch sequences are 20, 16 and 24
6604 bytes for the portable runtime, non-PIC and PIC cases, respectively. */
6606 const char *
6607 pa_output_lbranch (rtx dest, rtx insn, int xdelay)
6609 rtx xoperands[2];
6611 xoperands[0] = dest;
6613 /* First, free up the delay slot. */
6614 if (xdelay && dbr_sequence_length () != 0)
6616 /* We can't handle a jump in the delay slot. */
6617 gcc_assert (GET_CODE (NEXT_INSN (insn)) != JUMP_INSN);
6619 final_scan_insn (NEXT_INSN (insn), asm_out_file,
6620 optimize, 0, NULL);
6622 /* Now delete the delay insn. */
6623 SET_INSN_DELETED (NEXT_INSN (insn));
6626 /* Output an insn to save %r1. The runtime documentation doesn't
6627 specify whether the "Clean Up" slot in the callers frame can
6628 be clobbered by the callee. It isn't copied by HP's builtin
6629 alloca, so this suggests that it can be clobbered if necessary.
6630 The "Static Link" location is copied by HP builtin alloca, so
6631 we avoid using it. Using the cleanup slot might be a problem
6632 if we have to interoperate with languages that pass cleanup
6633 information. However, it should be possible to handle these
6634 situations with GCC's asm feature.
6636 The "Current RP" slot is reserved for the called procedure, so
6637 we try to use it when we don't have a frame of our own. It's
6638 rather unlikely that we won't have a frame when we need to emit
6639 a very long branch.
6641 Really the way to go long term is a register scavenger; goto
6642 the target of the jump and find a register which we can use
6643 as a scratch to hold the value in %r1. Then, we wouldn't have
6644 to free up the delay slot or clobber a slot that may be needed
6645 for other purposes. */
6646 if (TARGET_64BIT)
6648 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6649 /* Use the return pointer slot in the frame marker. */
6650 output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
6651 else
6652 /* Use the slot at -40 in the frame marker since HP builtin
6653 alloca doesn't copy it. */
6654 output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
6656 else
6658 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6659 /* Use the return pointer slot in the frame marker. */
6660 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
6661 else
6662 /* Use the "Clean Up" slot in the frame marker. In GCC,
6663 the only other use of this location is for copying a
6664 floating point double argument from a floating-point
6665 register to two general registers. The copy is done
6666 as an "atomic" operation when outputting a call, so it
6667 won't interfere with our using the location here. */
6668 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
6671 if (TARGET_PORTABLE_RUNTIME)
6673 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6674 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6675 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6677 else if (flag_pic)
6679 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6680 if (TARGET_SOM || !TARGET_GAS)
6682 xoperands[1] = gen_label_rtx ();
6683 output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands);
6684 targetm.asm_out.internal_label (asm_out_file, "L",
6685 CODE_LABEL_NUMBER (xoperands[1]));
6686 output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands);
6688 else
6690 output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands);
6691 output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
6693 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6695 else
6696 /* Now output a very long branch to the original target. */
6697 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
6699 /* Now restore the value of %r1 in the delay slot. */
6700 if (TARGET_64BIT)
6702 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6703 return "ldd -16(%%r30),%%r1";
6704 else
6705 return "ldd -40(%%r30),%%r1";
6707 else
6709 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6710 return "ldw -20(%%r30),%%r1";
6711 else
6712 return "ldw -12(%%r30),%%r1";
6716 /* This routine handles all the branch-on-bit conditional branch sequences we
6717 might need to generate. It handles nullification of delay slots,
6718 varying length branches, negated branches and all combinations of the
6719 above. it returns the appropriate output template to emit the branch. */
6721 const char *
6722 pa_output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn, int which)
6724 static char buf[100];
6725 bool useskip;
6726 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6727 int length = get_attr_length (insn);
6728 int xdelay;
6730 /* A conditional branch to the following instruction (e.g. the delay slot) is
6731 asking for a disaster. I do not think this can happen as this pattern
6732 is only used when optimizing; jump optimization should eliminate the
6733 jump. But be prepared just in case. */
6735 if (branch_to_delay_slot_p (insn))
6736 return "nop";
6738 /* If this is a long branch with its delay slot unfilled, set `nullify'
6739 as it can nullify the delay slot and save a nop. */
6740 if (length == 8 && dbr_sequence_length () == 0)
6741 nullify = 1;
6743 /* If this is a short forward conditional branch which did not get
6744 its delay slot filled, the delay slot can still be nullified. */
6745 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6746 nullify = forward_branch_p (insn);
6748 /* A forward branch over a single nullified insn can be done with a
6749 extrs instruction. This avoids a single cycle penalty due to
6750 mis-predicted branch if we fall through (branch not taken). */
6751 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6753 switch (length)
6756 /* All short conditional branches except backwards with an unfilled
6757 delay slot. */
6758 case 4:
6759 if (useskip)
6760 strcpy (buf, "{extrs,|extrw,s,}");
6761 else
6762 strcpy (buf, "bb,");
6763 if (useskip && GET_MODE (operands[0]) == DImode)
6764 strcpy (buf, "extrd,s,*");
6765 else if (GET_MODE (operands[0]) == DImode)
6766 strcpy (buf, "bb,*");
6767 if ((which == 0 && negated)
6768 || (which == 1 && ! negated))
6769 strcat (buf, ">=");
6770 else
6771 strcat (buf, "<");
6772 if (useskip)
6773 strcat (buf, " %0,%1,1,%%r0");
6774 else if (nullify && negated)
6776 if (branch_needs_nop_p (insn))
6777 strcat (buf, ",n %0,%1,%3%#");
6778 else
6779 strcat (buf, ",n %0,%1,%3");
6781 else if (nullify && ! negated)
6783 if (branch_needs_nop_p (insn))
6784 strcat (buf, ",n %0,%1,%2%#");
6785 else
6786 strcat (buf, ",n %0,%1,%2");
6788 else if (! nullify && negated)
6789 strcat (buf, " %0,%1,%3");
6790 else if (! nullify && ! negated)
6791 strcat (buf, " %0,%1,%2");
6792 break;
6794 /* All long conditionals. Note a short backward branch with an
6795 unfilled delay slot is treated just like a long backward branch
6796 with an unfilled delay slot. */
6797 case 8:
6798 /* Handle weird backwards branch with a filled delay slot
6799 which is nullified. */
6800 if (dbr_sequence_length () != 0
6801 && ! forward_branch_p (insn)
6802 && nullify)
6804 strcpy (buf, "bb,");
6805 if (GET_MODE (operands[0]) == DImode)
6806 strcat (buf, "*");
6807 if ((which == 0 && negated)
6808 || (which == 1 && ! negated))
6809 strcat (buf, "<");
6810 else
6811 strcat (buf, ">=");
6812 if (negated)
6813 strcat (buf, ",n %0,%1,.+12\n\tb %3");
6814 else
6815 strcat (buf, ",n %0,%1,.+12\n\tb %2");
6817 /* Handle short backwards branch with an unfilled delay slot.
6818 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6819 taken and untaken branches. */
6820 else if (dbr_sequence_length () == 0
6821 && ! forward_branch_p (insn)
6822 && INSN_ADDRESSES_SET_P ()
6823 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6824 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6826 strcpy (buf, "bb,");
6827 if (GET_MODE (operands[0]) == DImode)
6828 strcat (buf, "*");
6829 if ((which == 0 && negated)
6830 || (which == 1 && ! negated))
6831 strcat (buf, ">=");
6832 else
6833 strcat (buf, "<");
6834 if (negated)
6835 strcat (buf, " %0,%1,%3%#");
6836 else
6837 strcat (buf, " %0,%1,%2%#");
6839 else
6841 if (GET_MODE (operands[0]) == DImode)
6842 strcpy (buf, "extrd,s,*");
6843 else
6844 strcpy (buf, "{extrs,|extrw,s,}");
6845 if ((which == 0 && negated)
6846 || (which == 1 && ! negated))
6847 strcat (buf, "<");
6848 else
6849 strcat (buf, ">=");
6850 if (nullify && negated)
6851 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
6852 else if (nullify && ! negated)
6853 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
6854 else if (negated)
6855 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
6856 else
6857 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
6859 break;
6861 default:
6862 /* The reversed conditional branch must branch over one additional
6863 instruction if the delay slot is filled and needs to be extracted
6864 by pa_output_lbranch. If the delay slot is empty or this is a
6865 nullified forward branch, the instruction after the reversed
6866 condition branch must be nullified. */
6867 if (dbr_sequence_length () == 0
6868 || (nullify && forward_branch_p (insn)))
6870 nullify = 1;
6871 xdelay = 0;
6872 operands[4] = GEN_INT (length);
6874 else
6876 xdelay = 1;
6877 operands[4] = GEN_INT (length + 4);
6880 if (GET_MODE (operands[0]) == DImode)
6881 strcpy (buf, "bb,*");
6882 else
6883 strcpy (buf, "bb,");
6884 if ((which == 0 && negated)
6885 || (which == 1 && !negated))
6886 strcat (buf, "<");
6887 else
6888 strcat (buf, ">=");
6889 if (nullify)
6890 strcat (buf, ",n %0,%1,.+%4");
6891 else
6892 strcat (buf, " %0,%1,.+%4");
6893 output_asm_insn (buf, operands);
6894 return pa_output_lbranch (negated ? operands[3] : operands[2],
6895 insn, xdelay);
6897 return buf;
6900 /* This routine handles all the branch-on-variable-bit conditional branch
6901 sequences we might need to generate. It handles nullification of delay
6902 slots, varying length branches, negated branches and all combinations
6903 of the above. it returns the appropriate output template to emit the
6904 branch. */
6906 const char *
6907 pa_output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn,
6908 int which)
6910 static char buf[100];
6911 bool useskip;
6912 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6913 int length = get_attr_length (insn);
6914 int xdelay;
6916 /* A conditional branch to the following instruction (e.g. the delay slot) is
6917 asking for a disaster. I do not think this can happen as this pattern
6918 is only used when optimizing; jump optimization should eliminate the
6919 jump. But be prepared just in case. */
6921 if (branch_to_delay_slot_p (insn))
6922 return "nop";
6924 /* If this is a long branch with its delay slot unfilled, set `nullify'
6925 as it can nullify the delay slot and save a nop. */
6926 if (length == 8 && dbr_sequence_length () == 0)
6927 nullify = 1;
6929 /* If this is a short forward conditional branch which did not get
6930 its delay slot filled, the delay slot can still be nullified. */
6931 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6932 nullify = forward_branch_p (insn);
6934 /* A forward branch over a single nullified insn can be done with a
6935 extrs instruction. This avoids a single cycle penalty due to
6936 mis-predicted branch if we fall through (branch not taken). */
6937 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6939 switch (length)
6942 /* All short conditional branches except backwards with an unfilled
6943 delay slot. */
6944 case 4:
6945 if (useskip)
6946 strcpy (buf, "{vextrs,|extrw,s,}");
6947 else
6948 strcpy (buf, "{bvb,|bb,}");
6949 if (useskip && GET_MODE (operands[0]) == DImode)
6950 strcpy (buf, "extrd,s,*");
6951 else if (GET_MODE (operands[0]) == DImode)
6952 strcpy (buf, "bb,*");
6953 if ((which == 0 && negated)
6954 || (which == 1 && ! negated))
6955 strcat (buf, ">=");
6956 else
6957 strcat (buf, "<");
6958 if (useskip)
6959 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
6960 else if (nullify && negated)
6962 if (branch_needs_nop_p (insn))
6963 strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
6964 else
6965 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
6967 else if (nullify && ! negated)
6969 if (branch_needs_nop_p (insn))
6970 strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
6971 else
6972 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
6974 else if (! nullify && negated)
6975 strcat (buf, "{ %0,%3| %0,%%sar,%3}");
6976 else if (! nullify && ! negated)
6977 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
6978 break;
6980 /* All long conditionals. Note a short backward branch with an
6981 unfilled delay slot is treated just like a long backward branch
6982 with an unfilled delay slot. */
6983 case 8:
6984 /* Handle weird backwards branch with a filled delay slot
6985 which is nullified. */
6986 if (dbr_sequence_length () != 0
6987 && ! forward_branch_p (insn)
6988 && nullify)
6990 strcpy (buf, "{bvb,|bb,}");
6991 if (GET_MODE (operands[0]) == DImode)
6992 strcat (buf, "*");
6993 if ((which == 0 && negated)
6994 || (which == 1 && ! negated))
6995 strcat (buf, "<");
6996 else
6997 strcat (buf, ">=");
6998 if (negated)
6999 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
7000 else
7001 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
7003 /* Handle short backwards branch with an unfilled delay slot.
7004 Using a bb;nop rather than extrs;bl saves 1 cycle for both
7005 taken and untaken branches. */
7006 else if (dbr_sequence_length () == 0
7007 && ! forward_branch_p (insn)
7008 && INSN_ADDRESSES_SET_P ()
7009 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7010 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7012 strcpy (buf, "{bvb,|bb,}");
7013 if (GET_MODE (operands[0]) == DImode)
7014 strcat (buf, "*");
7015 if ((which == 0 && negated)
7016 || (which == 1 && ! negated))
7017 strcat (buf, ">=");
7018 else
7019 strcat (buf, "<");
7020 if (negated)
7021 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
7022 else
7023 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
7025 else
7027 strcpy (buf, "{vextrs,|extrw,s,}");
7028 if (GET_MODE (operands[0]) == DImode)
7029 strcpy (buf, "extrd,s,*");
7030 if ((which == 0 && negated)
7031 || (which == 1 && ! negated))
7032 strcat (buf, "<");
7033 else
7034 strcat (buf, ">=");
7035 if (nullify && negated)
7036 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
7037 else if (nullify && ! negated)
7038 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
7039 else if (negated)
7040 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
7041 else
7042 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
7044 break;
7046 default:
7047 /* The reversed conditional branch must branch over one additional
7048 instruction if the delay slot is filled and needs to be extracted
7049 by pa_output_lbranch. If the delay slot is empty or this is a
7050 nullified forward branch, the instruction after the reversed
7051 condition branch must be nullified. */
7052 if (dbr_sequence_length () == 0
7053 || (nullify && forward_branch_p (insn)))
7055 nullify = 1;
7056 xdelay = 0;
7057 operands[4] = GEN_INT (length);
7059 else
7061 xdelay = 1;
7062 operands[4] = GEN_INT (length + 4);
7065 if (GET_MODE (operands[0]) == DImode)
7066 strcpy (buf, "bb,*");
7067 else
7068 strcpy (buf, "{bvb,|bb,}");
7069 if ((which == 0 && negated)
7070 || (which == 1 && !negated))
7071 strcat (buf, "<");
7072 else
7073 strcat (buf, ">=");
7074 if (nullify)
7075 strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
7076 else
7077 strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
7078 output_asm_insn (buf, operands);
7079 return pa_output_lbranch (negated ? operands[3] : operands[2],
7080 insn, xdelay);
7082 return buf;
7085 /* Return the output template for emitting a dbra type insn.
7087 Note it may perform some output operations on its own before
7088 returning the final output string. */
7089 const char *
7090 pa_output_dbra (rtx *operands, rtx insn, int which_alternative)
7092 int length = get_attr_length (insn);
7094 /* A conditional branch to the following instruction (e.g. the delay slot) is
7095 asking for a disaster. Be prepared! */
7097 if (branch_to_delay_slot_p (insn))
7099 if (which_alternative == 0)
7100 return "ldo %1(%0),%0";
7101 else if (which_alternative == 1)
7103 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
7104 output_asm_insn ("ldw -16(%%r30),%4", operands);
7105 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7106 return "{fldws|fldw} -16(%%r30),%0";
7108 else
7110 output_asm_insn ("ldw %0,%4", operands);
7111 return "ldo %1(%4),%4\n\tstw %4,%0";
7115 if (which_alternative == 0)
7117 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7118 int xdelay;
7120 /* If this is a long branch with its delay slot unfilled, set `nullify'
7121 as it can nullify the delay slot and save a nop. */
7122 if (length == 8 && dbr_sequence_length () == 0)
7123 nullify = 1;
7125 /* If this is a short forward conditional branch which did not get
7126 its delay slot filled, the delay slot can still be nullified. */
7127 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7128 nullify = forward_branch_p (insn);
7130 switch (length)
7132 case 4:
7133 if (nullify)
7135 if (branch_needs_nop_p (insn))
7136 return "addib,%C2,n %1,%0,%3%#";
7137 else
7138 return "addib,%C2,n %1,%0,%3";
7140 else
7141 return "addib,%C2 %1,%0,%3";
7143 case 8:
7144 /* Handle weird backwards branch with a fulled delay slot
7145 which is nullified. */
7146 if (dbr_sequence_length () != 0
7147 && ! forward_branch_p (insn)
7148 && nullify)
7149 return "addib,%N2,n %1,%0,.+12\n\tb %3";
7150 /* Handle short backwards branch with an unfilled delay slot.
7151 Using a addb;nop rather than addi;bl saves 1 cycle for both
7152 taken and untaken branches. */
7153 else if (dbr_sequence_length () == 0
7154 && ! forward_branch_p (insn)
7155 && INSN_ADDRESSES_SET_P ()
7156 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7157 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7158 return "addib,%C2 %1,%0,%3%#";
7160 /* Handle normal cases. */
7161 if (nullify)
7162 return "addi,%N2 %1,%0,%0\n\tb,n %3";
7163 else
7164 return "addi,%N2 %1,%0,%0\n\tb %3";
7166 default:
7167 /* The reversed conditional branch must branch over one additional
7168 instruction if the delay slot is filled and needs to be extracted
7169 by pa_output_lbranch. If the delay slot is empty or this is a
7170 nullified forward branch, the instruction after the reversed
7171 condition branch must be nullified. */
7172 if (dbr_sequence_length () == 0
7173 || (nullify && forward_branch_p (insn)))
7175 nullify = 1;
7176 xdelay = 0;
7177 operands[4] = GEN_INT (length);
7179 else
7181 xdelay = 1;
7182 operands[4] = GEN_INT (length + 4);
7185 if (nullify)
7186 output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
7187 else
7188 output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
7190 return pa_output_lbranch (operands[3], insn, xdelay);
7194 /* Deal with gross reload from FP register case. */
7195 else if (which_alternative == 1)
7197 /* Move loop counter from FP register to MEM then into a GR,
7198 increment the GR, store the GR into MEM, and finally reload
7199 the FP register from MEM from within the branch's delay slot. */
7200 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
7201 operands);
7202 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7203 if (length == 24)
7204 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
7205 else if (length == 28)
7206 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7207 else
7209 operands[5] = GEN_INT (length - 16);
7210 output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
7211 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7212 return pa_output_lbranch (operands[3], insn, 0);
7215 /* Deal with gross reload from memory case. */
7216 else
7218 /* Reload loop counter from memory, the store back to memory
7219 happens in the branch's delay slot. */
7220 output_asm_insn ("ldw %0,%4", operands);
7221 if (length == 12)
7222 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
7223 else if (length == 16)
7224 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
7225 else
7227 operands[5] = GEN_INT (length - 4);
7228 output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
7229 return pa_output_lbranch (operands[3], insn, 0);
7234 /* Return the output template for emitting a movb type insn.
7236 Note it may perform some output operations on its own before
7237 returning the final output string. */
7238 const char *
7239 pa_output_movb (rtx *operands, rtx insn, int which_alternative,
7240 int reverse_comparison)
7242 int length = get_attr_length (insn);
7244 /* A conditional branch to the following instruction (e.g. the delay slot) is
7245 asking for a disaster. Be prepared! */
7247 if (branch_to_delay_slot_p (insn))
7249 if (which_alternative == 0)
7250 return "copy %1,%0";
7251 else if (which_alternative == 1)
7253 output_asm_insn ("stw %1,-16(%%r30)", operands);
7254 return "{fldws|fldw} -16(%%r30),%0";
7256 else if (which_alternative == 2)
7257 return "stw %1,%0";
7258 else
7259 return "mtsar %r1";
7262 /* Support the second variant. */
7263 if (reverse_comparison)
7264 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
7266 if (which_alternative == 0)
7268 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7269 int xdelay;
7271 /* If this is a long branch with its delay slot unfilled, set `nullify'
7272 as it can nullify the delay slot and save a nop. */
7273 if (length == 8 && dbr_sequence_length () == 0)
7274 nullify = 1;
7276 /* If this is a short forward conditional branch which did not get
7277 its delay slot filled, the delay slot can still be nullified. */
7278 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7279 nullify = forward_branch_p (insn);
7281 switch (length)
7283 case 4:
7284 if (nullify)
7286 if (branch_needs_nop_p (insn))
7287 return "movb,%C2,n %1,%0,%3%#";
7288 else
7289 return "movb,%C2,n %1,%0,%3";
7291 else
7292 return "movb,%C2 %1,%0,%3";
7294 case 8:
7295 /* Handle weird backwards branch with a filled delay slot
7296 which is nullified. */
7297 if (dbr_sequence_length () != 0
7298 && ! forward_branch_p (insn)
7299 && nullify)
7300 return "movb,%N2,n %1,%0,.+12\n\tb %3";
7302 /* Handle short backwards branch with an unfilled delay slot.
7303 Using a movb;nop rather than or;bl saves 1 cycle for both
7304 taken and untaken branches. */
7305 else if (dbr_sequence_length () == 0
7306 && ! forward_branch_p (insn)
7307 && INSN_ADDRESSES_SET_P ()
7308 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7309 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7310 return "movb,%C2 %1,%0,%3%#";
7311 /* Handle normal cases. */
7312 if (nullify)
7313 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
7314 else
7315 return "or,%N2 %1,%%r0,%0\n\tb %3";
7317 default:
7318 /* The reversed conditional branch must branch over one additional
7319 instruction if the delay slot is filled and needs to be extracted
7320 by pa_output_lbranch. If the delay slot is empty or this is a
7321 nullified forward branch, the instruction after the reversed
7322 condition branch must be nullified. */
7323 if (dbr_sequence_length () == 0
7324 || (nullify && forward_branch_p (insn)))
7326 nullify = 1;
7327 xdelay = 0;
7328 operands[4] = GEN_INT (length);
7330 else
7332 xdelay = 1;
7333 operands[4] = GEN_INT (length + 4);
7336 if (nullify)
7337 output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
7338 else
7339 output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
7341 return pa_output_lbranch (operands[3], insn, xdelay);
7344 /* Deal with gross reload for FP destination register case. */
7345 else if (which_alternative == 1)
7347 /* Move source register to MEM, perform the branch test, then
7348 finally load the FP register from MEM from within the branch's
7349 delay slot. */
7350 output_asm_insn ("stw %1,-16(%%r30)", operands);
7351 if (length == 12)
7352 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7353 else if (length == 16)
7354 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7355 else
7357 operands[4] = GEN_INT (length - 4);
7358 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
7359 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7360 return pa_output_lbranch (operands[3], insn, 0);
7363 /* Deal with gross reload from memory case. */
7364 else if (which_alternative == 2)
7366 /* Reload loop counter from memory, the store back to memory
7367 happens in the branch's delay slot. */
7368 if (length == 8)
7369 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7370 else if (length == 12)
7371 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7372 else
7374 operands[4] = GEN_INT (length);
7375 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7376 operands);
7377 return pa_output_lbranch (operands[3], insn, 0);
7380 /* Handle SAR as a destination. */
7381 else
7383 if (length == 8)
7384 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7385 else if (length == 12)
7386 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7387 else
7389 operands[4] = GEN_INT (length);
7390 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7391 operands);
7392 return pa_output_lbranch (operands[3], insn, 0);
7397 /* Copy any FP arguments in INSN into integer registers. */
7398 static void
7399 copy_fp_args (rtx insn)
7401 rtx link;
7402 rtx xoperands[2];
7404 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7406 int arg_mode, regno;
7407 rtx use = XEXP (link, 0);
7409 if (! (GET_CODE (use) == USE
7410 && GET_CODE (XEXP (use, 0)) == REG
7411 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7412 continue;
7414 arg_mode = GET_MODE (XEXP (use, 0));
7415 regno = REGNO (XEXP (use, 0));
7417 /* Is it a floating point register? */
7418 if (regno >= 32 && regno <= 39)
7420 /* Copy the FP register into an integer register via memory. */
7421 if (arg_mode == SFmode)
7423 xoperands[0] = XEXP (use, 0);
7424 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
7425 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
7426 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7428 else
7430 xoperands[0] = XEXP (use, 0);
7431 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
7432 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
7433 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
7434 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7440 /* Compute length of the FP argument copy sequence for INSN. */
7441 static int
7442 length_fp_args (rtx insn)
7444 int length = 0;
7445 rtx link;
7447 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7449 int arg_mode, regno;
7450 rtx use = XEXP (link, 0);
7452 if (! (GET_CODE (use) == USE
7453 && GET_CODE (XEXP (use, 0)) == REG
7454 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7455 continue;
7457 arg_mode = GET_MODE (XEXP (use, 0));
7458 regno = REGNO (XEXP (use, 0));
7460 /* Is it a floating point register? */
7461 if (regno >= 32 && regno <= 39)
7463 if (arg_mode == SFmode)
7464 length += 8;
7465 else
7466 length += 12;
7470 return length;
7473 /* Return the attribute length for the millicode call instruction INSN.
7474 The length must match the code generated by pa_output_millicode_call.
7475 We include the delay slot in the returned length as it is better to
7476 over estimate the length than to under estimate it. */
7479 pa_attr_length_millicode_call (rtx insn)
7481 unsigned long distance = -1;
7482 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7484 if (INSN_ADDRESSES_SET_P ())
7486 distance = (total + insn_current_reference_address (insn));
7487 if (distance < total)
7488 distance = -1;
7491 if (TARGET_64BIT)
7493 if (!TARGET_LONG_CALLS && distance < 7600000)
7494 return 8;
7496 return 20;
7498 else if (TARGET_PORTABLE_RUNTIME)
7499 return 24;
7500 else
7502 if (!TARGET_LONG_CALLS && distance < MAX_PCREL17F_OFFSET)
7503 return 8;
7505 if (TARGET_LONG_ABS_CALL && !flag_pic)
7506 return 12;
7508 return 24;
7512 /* INSN is a function call. It may have an unconditional jump
7513 in its delay slot.
7515 CALL_DEST is the routine we are calling. */
7517 const char *
7518 pa_output_millicode_call (rtx insn, rtx call_dest)
7520 int attr_length = get_attr_length (insn);
7521 int seq_length = dbr_sequence_length ();
7522 int distance;
7523 rtx seq_insn;
7524 rtx xoperands[3];
7526 xoperands[0] = call_dest;
7527 xoperands[2] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
7529 /* Handle the common case where we are sure that the branch will
7530 reach the beginning of the $CODE$ subspace. The within reach
7531 form of the $$sh_func_adrs call has a length of 28. Because it
7532 has an attribute type of sh_func_adrs, it never has a nonzero
7533 sequence length (i.e., the delay slot is never filled). */
7534 if (!TARGET_LONG_CALLS
7535 && (attr_length == 8
7536 || (attr_length == 28
7537 && get_attr_type (insn) == TYPE_SH_FUNC_ADRS)))
7539 output_asm_insn ("{bl|b,l} %0,%2", xoperands);
7541 else
7543 if (TARGET_64BIT)
7545 /* It might seem that one insn could be saved by accessing
7546 the millicode function using the linkage table. However,
7547 this doesn't work in shared libraries and other dynamically
7548 loaded objects. Using a pc-relative sequence also avoids
7549 problems related to the implicit use of the gp register. */
7550 output_asm_insn ("b,l .+8,%%r1", xoperands);
7552 if (TARGET_GAS)
7554 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
7555 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
7557 else
7559 xoperands[1] = gen_label_rtx ();
7560 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7561 targetm.asm_out.internal_label (asm_out_file, "L",
7562 CODE_LABEL_NUMBER (xoperands[1]));
7563 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7566 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7568 else if (TARGET_PORTABLE_RUNTIME)
7570 /* Pure portable runtime doesn't allow be/ble; we also don't
7571 have PIC support in the assembler/linker, so this sequence
7572 is needed. */
7574 /* Get the address of our target into %r1. */
7575 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7576 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7578 /* Get our return address into %r31. */
7579 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7580 output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
7582 /* Jump to our target address in %r1. */
7583 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7585 else if (!flag_pic)
7587 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7588 if (TARGET_PA_20)
7589 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
7590 else
7591 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7593 else
7595 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7596 output_asm_insn ("addi 16,%%r1,%%r31", xoperands);
7598 if (TARGET_SOM || !TARGET_GAS)
7600 /* The HP assembler can generate relocations for the
7601 difference of two symbols. GAS can do this for a
7602 millicode symbol but not an arbitrary external
7603 symbol when generating SOM output. */
7604 xoperands[1] = gen_label_rtx ();
7605 targetm.asm_out.internal_label (asm_out_file, "L",
7606 CODE_LABEL_NUMBER (xoperands[1]));
7607 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7608 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7610 else
7612 output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands);
7613 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1",
7614 xoperands);
7617 /* Jump to our target address in %r1. */
7618 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7622 if (seq_length == 0)
7623 output_asm_insn ("nop", xoperands);
7625 /* We are done if there isn't a jump in the delay slot. */
7626 if (seq_length == 0 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7627 return "";
7629 /* This call has an unconditional jump in its delay slot. */
7630 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7632 /* See if the return address can be adjusted. Use the containing
7633 sequence insn's address. */
7634 if (INSN_ADDRESSES_SET_P ())
7636 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7637 distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7638 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7640 if (VAL_14_BITS_P (distance))
7642 xoperands[1] = gen_label_rtx ();
7643 output_asm_insn ("ldo %0-%1(%2),%2", xoperands);
7644 targetm.asm_out.internal_label (asm_out_file, "L",
7645 CODE_LABEL_NUMBER (xoperands[1]));
7647 else
7648 /* ??? This branch may not reach its target. */
7649 output_asm_insn ("nop\n\tb,n %0", xoperands);
7651 else
7652 /* ??? This branch may not reach its target. */
7653 output_asm_insn ("nop\n\tb,n %0", xoperands);
7655 /* Delete the jump. */
7656 SET_INSN_DELETED (NEXT_INSN (insn));
7658 return "";
7661 /* Return the attribute length of the call instruction INSN. The SIBCALL
7662 flag indicates whether INSN is a regular call or a sibling call. The
7663 length returned must be longer than the code actually generated by
7664 pa_output_call. Since branch shortening is done before delay branch
7665 sequencing, there is no way to determine whether or not the delay
7666 slot will be filled during branch shortening. Even when the delay
7667 slot is filled, we may have to add a nop if the delay slot contains
7668 a branch that can't reach its target. Thus, we always have to include
7669 the delay slot in the length estimate. This used to be done in
7670 pa_adjust_insn_length but we do it here now as some sequences always
7671 fill the delay slot and we can save four bytes in the estimate for
7672 these sequences. */
7675 pa_attr_length_call (rtx insn, int sibcall)
7677 int local_call;
7678 rtx call, call_dest;
7679 tree call_decl;
7680 int length = 0;
7681 rtx pat = PATTERN (insn);
7682 unsigned long distance = -1;
7684 gcc_assert (GET_CODE (insn) == CALL_INSN);
7686 if (INSN_ADDRESSES_SET_P ())
7688 unsigned long total;
7690 total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7691 distance = (total + insn_current_reference_address (insn));
7692 if (distance < total)
7693 distance = -1;
7696 gcc_assert (GET_CODE (pat) == PARALLEL);
7698 /* Get the call rtx. */
7699 call = XVECEXP (pat, 0, 0);
7700 if (GET_CODE (call) == SET)
7701 call = SET_SRC (call);
7703 gcc_assert (GET_CODE (call) == CALL);
7705 /* Determine if this is a local call. */
7706 call_dest = XEXP (XEXP (call, 0), 0);
7707 call_decl = SYMBOL_REF_DECL (call_dest);
7708 local_call = call_decl && targetm.binds_local_p (call_decl);
7710 /* pc-relative branch. */
7711 if (!TARGET_LONG_CALLS
7712 && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7713 || distance < MAX_PCREL17F_OFFSET))
7714 length += 8;
7716 /* 64-bit plabel sequence. */
7717 else if (TARGET_64BIT && !local_call)
7718 length += sibcall ? 28 : 24;
7720 /* non-pic long absolute branch sequence. */
7721 else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7722 length += 12;
7724 /* long pc-relative branch sequence. */
7725 else if (TARGET_LONG_PIC_SDIFF_CALL
7726 || (TARGET_GAS && !TARGET_SOM
7727 && (TARGET_LONG_PIC_PCREL_CALL || local_call)))
7729 length += 20;
7731 if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7732 length += 8;
7735 /* 32-bit plabel sequence. */
7736 else
7738 length += 32;
7740 if (TARGET_SOM)
7741 length += length_fp_args (insn);
7743 if (flag_pic)
7744 length += 4;
7746 if (!TARGET_PA_20)
7748 if (!sibcall)
7749 length += 8;
7751 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7752 length += 8;
7756 return length;
7759 /* INSN is a function call. It may have an unconditional jump
7760 in its delay slot.
7762 CALL_DEST is the routine we are calling. */
7764 const char *
7765 pa_output_call (rtx insn, rtx call_dest, int sibcall)
7767 int delay_insn_deleted = 0;
7768 int delay_slot_filled = 0;
7769 int seq_length = dbr_sequence_length ();
7770 tree call_decl = SYMBOL_REF_DECL (call_dest);
7771 int local_call = call_decl && targetm.binds_local_p (call_decl);
7772 rtx xoperands[2];
7774 xoperands[0] = call_dest;
7776 /* Handle the common case where we're sure that the branch will reach
7777 the beginning of the "$CODE$" subspace. This is the beginning of
7778 the current function if we are in a named section. */
7779 if (!TARGET_LONG_CALLS && pa_attr_length_call (insn, sibcall) == 8)
7781 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
7782 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7784 else
7786 if (TARGET_64BIT && !local_call)
7788 /* ??? As far as I can tell, the HP linker doesn't support the
7789 long pc-relative sequence described in the 64-bit runtime
7790 architecture. So, we use a slightly longer indirect call. */
7791 xoperands[0] = pa_get_deferred_plabel (call_dest);
7792 xoperands[1] = gen_label_rtx ();
7794 /* If this isn't a sibcall, we put the load of %r27 into the
7795 delay slot. We can't do this in a sibcall as we don't
7796 have a second call-clobbered scratch register available. */
7797 if (seq_length != 0
7798 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7799 && !sibcall)
7801 final_scan_insn (NEXT_INSN (insn), asm_out_file,
7802 optimize, 0, NULL);
7804 /* Now delete the delay insn. */
7805 SET_INSN_DELETED (NEXT_INSN (insn));
7806 delay_insn_deleted = 1;
7809 output_asm_insn ("addil LT'%0,%%r27", xoperands);
7810 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
7811 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
7813 if (sibcall)
7815 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7816 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands);
7817 output_asm_insn ("bve (%%r1)", xoperands);
7819 else
7821 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
7822 output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
7823 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7824 delay_slot_filled = 1;
7827 else
7829 int indirect_call = 0;
7831 /* Emit a long call. There are several different sequences
7832 of increasing length and complexity. In most cases,
7833 they don't allow an instruction in the delay slot. */
7834 if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7835 && !TARGET_LONG_PIC_SDIFF_CALL
7836 && !(TARGET_GAS && !TARGET_SOM
7837 && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7838 && !TARGET_64BIT)
7839 indirect_call = 1;
7841 if (seq_length != 0
7842 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7843 && !sibcall
7844 && (!TARGET_PA_20
7845 || indirect_call
7846 || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)))
7848 /* A non-jump insn in the delay slot. By definition we can
7849 emit this insn before the call (and in fact before argument
7850 relocating. */
7851 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
7852 NULL);
7854 /* Now delete the delay insn. */
7855 SET_INSN_DELETED (NEXT_INSN (insn));
7856 delay_insn_deleted = 1;
7859 if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7861 /* This is the best sequence for making long calls in
7862 non-pic code. Unfortunately, GNU ld doesn't provide
7863 the stub needed for external calls, and GAS's support
7864 for this with the SOM linker is buggy. It is safe
7865 to use this for local calls. */
7866 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7867 if (sibcall)
7868 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
7869 else
7871 if (TARGET_PA_20)
7872 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
7873 xoperands);
7874 else
7875 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7877 output_asm_insn ("copy %%r31,%%r2", xoperands);
7878 delay_slot_filled = 1;
7881 else
7883 if (TARGET_LONG_PIC_SDIFF_CALL)
7885 /* The HP assembler and linker can handle relocations
7886 for the difference of two symbols. The HP assembler
7887 recognizes the sequence as a pc-relative call and
7888 the linker provides stubs when needed. */
7889 xoperands[1] = gen_label_rtx ();
7890 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7891 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7892 targetm.asm_out.internal_label (asm_out_file, "L",
7893 CODE_LABEL_NUMBER (xoperands[1]));
7894 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7896 else if (TARGET_GAS && !TARGET_SOM
7897 && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7899 /* GAS currently can't generate the relocations that
7900 are needed for the SOM linker under HP-UX using this
7901 sequence. The GNU linker doesn't generate the stubs
7902 that are needed for external calls on TARGET_ELF32
7903 with this sequence. For now, we have to use a
7904 longer plabel sequence when using GAS. */
7905 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7906 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1",
7907 xoperands);
7908 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1",
7909 xoperands);
7911 else
7913 /* Emit a long plabel-based call sequence. This is
7914 essentially an inline implementation of $$dyncall.
7915 We don't actually try to call $$dyncall as this is
7916 as difficult as calling the function itself. */
7917 xoperands[0] = pa_get_deferred_plabel (call_dest);
7918 xoperands[1] = gen_label_rtx ();
7920 /* Since the call is indirect, FP arguments in registers
7921 need to be copied to the general registers. Then, the
7922 argument relocation stub will copy them back. */
7923 if (TARGET_SOM)
7924 copy_fp_args (insn);
7926 if (flag_pic)
7928 output_asm_insn ("addil LT'%0,%%r19", xoperands);
7929 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
7930 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands);
7932 else
7934 output_asm_insn ("addil LR'%0-$global$,%%r27",
7935 xoperands);
7936 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
7937 xoperands);
7940 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands);
7941 output_asm_insn ("depi 0,31,2,%%r1", xoperands);
7942 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands);
7943 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands);
7945 if (!sibcall && !TARGET_PA_20)
7947 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
7948 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
7949 output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
7950 else
7951 output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
7955 if (TARGET_PA_20)
7957 if (sibcall)
7958 output_asm_insn ("bve (%%r1)", xoperands);
7959 else
7961 if (indirect_call)
7963 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7964 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
7965 delay_slot_filled = 1;
7967 else
7968 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7971 else
7973 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7974 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
7975 xoperands);
7977 if (sibcall)
7979 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
7980 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
7981 else
7982 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
7984 else
7986 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
7987 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
7988 else
7989 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
7991 if (indirect_call)
7992 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
7993 else
7994 output_asm_insn ("copy %%r31,%%r2", xoperands);
7995 delay_slot_filled = 1;
8002 if (!delay_slot_filled && (seq_length == 0 || delay_insn_deleted))
8003 output_asm_insn ("nop", xoperands);
8005 /* We are done if there isn't a jump in the delay slot. */
8006 if (seq_length == 0
8007 || delay_insn_deleted
8008 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
8009 return "";
8011 /* A sibcall should never have a branch in the delay slot. */
8012 gcc_assert (!sibcall);
8014 /* This call has an unconditional jump in its delay slot. */
8015 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
8017 if (!delay_slot_filled && INSN_ADDRESSES_SET_P ())
8019 /* See if the return address can be adjusted. Use the containing
8020 sequence insn's address. This would break the regular call/return@
8021 relationship assumed by the table based eh unwinder, so only do that
8022 if the call is not possibly throwing. */
8023 rtx seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
8024 int distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
8025 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
8027 if (VAL_14_BITS_P (distance)
8028 && !(can_throw_internal (insn) || can_throw_external (insn)))
8030 xoperands[1] = gen_label_rtx ();
8031 output_asm_insn ("ldo %0-%1(%%r2),%%r2", xoperands);
8032 targetm.asm_out.internal_label (asm_out_file, "L",
8033 CODE_LABEL_NUMBER (xoperands[1]));
8035 else
8036 output_asm_insn ("nop\n\tb,n %0", xoperands);
8038 else
8039 output_asm_insn ("b,n %0", xoperands);
8041 /* Delete the jump. */
8042 SET_INSN_DELETED (NEXT_INSN (insn));
8044 return "";
8047 /* Return the attribute length of the indirect call instruction INSN.
8048 The length must match the code generated by output_indirect call.
8049 The returned length includes the delay slot. Currently, the delay
8050 slot of an indirect call sequence is not exposed and it is used by
8051 the sequence itself. */
8054 pa_attr_length_indirect_call (rtx insn)
8056 unsigned long distance = -1;
8057 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
8059 if (INSN_ADDRESSES_SET_P ())
8061 distance = (total + insn_current_reference_address (insn));
8062 if (distance < total)
8063 distance = -1;
8066 if (TARGET_64BIT)
8067 return 12;
8069 if (TARGET_FAST_INDIRECT_CALLS
8070 || (!TARGET_PORTABLE_RUNTIME
8071 && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
8072 || distance < MAX_PCREL17F_OFFSET)))
8073 return 8;
8075 if (flag_pic)
8076 return 24;
8078 if (TARGET_PORTABLE_RUNTIME)
8079 return 20;
8081 /* Out of reach, can use ble. */
8082 return 12;
8085 const char *
8086 pa_output_indirect_call (rtx insn, rtx call_dest)
8088 rtx xoperands[1];
8090 if (TARGET_64BIT)
8092 xoperands[0] = call_dest;
8093 output_asm_insn ("ldd 16(%0),%%r2", xoperands);
8094 output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands);
8095 return "";
8098 /* First the special case for kernels, level 0 systems, etc. */
8099 if (TARGET_FAST_INDIRECT_CALLS)
8100 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8102 /* Now the normal case -- we can reach $$dyncall directly or
8103 we're sure that we can get there via a long-branch stub.
8105 No need to check target flags as the length uniquely identifies
8106 the remaining cases. */
8107 if (pa_attr_length_indirect_call (insn) == 8)
8109 /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
8110 $$dyncall. Since BLE uses %r31 as the link register, the 22-bit
8111 variant of the B,L instruction can't be used on the SOM target. */
8112 if (TARGET_PA_20 && !TARGET_SOM)
8113 return ".CALL\tARGW0=GR\n\tb,l $$dyncall,%%r2\n\tcopy %%r2,%%r31";
8114 else
8115 return ".CALL\tARGW0=GR\n\tbl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
8118 /* Long millicode call, but we are not generating PIC or portable runtime
8119 code. */
8120 if (pa_attr_length_indirect_call (insn) == 12)
8121 return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
8123 /* Long millicode call for portable runtime. */
8124 if (pa_attr_length_indirect_call (insn) == 20)
8125 return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)\n\tnop";
8127 /* We need a long PIC call to $$dyncall. */
8128 xoperands[0] = NULL_RTX;
8129 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
8130 if (TARGET_SOM || !TARGET_GAS)
8132 xoperands[0] = gen_label_rtx ();
8133 output_asm_insn ("addil L'$$dyncall-%0,%%r1", xoperands);
8134 targetm.asm_out.internal_label (asm_out_file, "L",
8135 CODE_LABEL_NUMBER (xoperands[0]));
8136 output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands);
8138 else
8140 output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r1", xoperands);
8141 output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
8142 xoperands);
8144 output_asm_insn ("blr %%r0,%%r2", xoperands);
8145 output_asm_insn ("bv,n %%r0(%%r1)\n\tnop", xoperands);
8146 return "";
8149 /* In HPUX 8.0's shared library scheme, special relocations are needed
8150 for function labels if they might be passed to a function
8151 in a shared library (because shared libraries don't live in code
8152 space), and special magic is needed to construct their address. */
8154 void
8155 pa_encode_label (rtx sym)
8157 const char *str = XSTR (sym, 0);
8158 int len = strlen (str) + 1;
8159 char *newstr, *p;
8161 p = newstr = XALLOCAVEC (char, len + 1);
8162 *p++ = '@';
8163 strcpy (p, str);
8165 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
8168 static void
8169 pa_encode_section_info (tree decl, rtx rtl, int first)
8171 int old_referenced = 0;
8173 if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF)
8174 old_referenced
8175 = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED;
8177 default_encode_section_info (decl, rtl, first);
8179 if (first && TEXT_SPACE_P (decl))
8181 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
8182 if (TREE_CODE (decl) == FUNCTION_DECL)
8183 pa_encode_label (XEXP (rtl, 0));
8185 else if (old_referenced)
8186 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced;
8189 /* This is sort of inverse to pa_encode_section_info. */
8191 static const char *
8192 pa_strip_name_encoding (const char *str)
8194 str += (*str == '@');
8195 str += (*str == '*');
8196 return str;
8199 /* Returns 1 if OP is a function label involved in a simple addition
8200 with a constant. Used to keep certain patterns from matching
8201 during instruction combination. */
8203 pa_is_function_label_plus_const (rtx op)
8205 /* Strip off any CONST. */
8206 if (GET_CODE (op) == CONST)
8207 op = XEXP (op, 0);
8209 return (GET_CODE (op) == PLUS
8210 && function_label_operand (XEXP (op, 0), VOIDmode)
8211 && GET_CODE (XEXP (op, 1)) == CONST_INT);
8214 /* Output assembly code for a thunk to FUNCTION. */
8216 static void
8217 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
8218 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
8219 tree function)
8221 static unsigned int current_thunk_number;
8222 int val_14 = VAL_14_BITS_P (delta);
8223 unsigned int old_last_address = last_address, nbytes = 0;
8224 char label[16];
8225 rtx xoperands[4];
8227 xoperands[0] = XEXP (DECL_RTL (function), 0);
8228 xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
8229 xoperands[2] = GEN_INT (delta);
8231 ASM_OUTPUT_LABEL (file, XSTR (xoperands[1], 0));
8232 fprintf (file, "\t.PROC\n\t.CALLINFO FRAME=0,NO_CALLS\n\t.ENTRY\n");
8234 /* Output the thunk. We know that the function is in the same
8235 translation unit (i.e., the same space) as the thunk, and that
8236 thunks are output after their method. Thus, we don't need an
8237 external branch to reach the function. With SOM and GAS,
8238 functions and thunks are effectively in different sections.
8239 Thus, we can always use a IA-relative branch and the linker
8240 will add a long branch stub if necessary.
8242 However, we have to be careful when generating PIC code on the
8243 SOM port to ensure that the sequence does not transfer to an
8244 import stub for the target function as this could clobber the
8245 return value saved at SP-24. This would also apply to the
8246 32-bit linux port if the multi-space model is implemented. */
8247 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8248 && !(flag_pic && TREE_PUBLIC (function))
8249 && (TARGET_GAS || last_address < 262132))
8250 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8251 && ((targetm_common.have_named_sections
8252 && DECL_SECTION_NAME (thunk_fndecl) != NULL
8253 /* The GNU 64-bit linker has rather poor stub management.
8254 So, we use a long branch from thunks that aren't in
8255 the same section as the target function. */
8256 && ((!TARGET_64BIT
8257 && (DECL_SECTION_NAME (thunk_fndecl)
8258 != DECL_SECTION_NAME (function)))
8259 || ((DECL_SECTION_NAME (thunk_fndecl)
8260 == DECL_SECTION_NAME (function))
8261 && last_address < 262132)))
8262 || (targetm_common.have_named_sections
8263 && DECL_SECTION_NAME (thunk_fndecl) == NULL
8264 && DECL_SECTION_NAME (function) == NULL
8265 && last_address < 262132)
8266 || (!targetm_common.have_named_sections
8267 && last_address < 262132))))
8269 if (!val_14)
8270 output_asm_insn ("addil L'%2,%%r26", xoperands);
8272 output_asm_insn ("b %0", xoperands);
8274 if (val_14)
8276 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8277 nbytes += 8;
8279 else
8281 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8282 nbytes += 12;
8285 else if (TARGET_64BIT)
8287 /* We only have one call-clobbered scratch register, so we can't
8288 make use of the delay slot if delta doesn't fit in 14 bits. */
8289 if (!val_14)
8291 output_asm_insn ("addil L'%2,%%r26", xoperands);
8292 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8295 output_asm_insn ("b,l .+8,%%r1", xoperands);
8297 if (TARGET_GAS)
8299 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
8300 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
8302 else
8304 xoperands[3] = GEN_INT (val_14 ? 8 : 16);
8305 output_asm_insn ("addil L'%0-%1-%3,%%r1", xoperands);
8308 if (val_14)
8310 output_asm_insn ("bv %%r0(%%r1)", xoperands);
8311 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8312 nbytes += 20;
8314 else
8316 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8317 nbytes += 24;
8320 else if (TARGET_PORTABLE_RUNTIME)
8322 output_asm_insn ("ldil L'%0,%%r1", xoperands);
8323 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
8325 if (!val_14)
8326 output_asm_insn ("addil L'%2,%%r26", xoperands);
8328 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8330 if (val_14)
8332 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8333 nbytes += 16;
8335 else
8337 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8338 nbytes += 20;
8341 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8343 /* The function is accessible from outside this module. The only
8344 way to avoid an import stub between the thunk and function is to
8345 call the function directly with an indirect sequence similar to
8346 that used by $$dyncall. This is possible because $$dyncall acts
8347 as the import stub in an indirect call. */
8348 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8349 xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8350 output_asm_insn ("addil LT'%3,%%r19", xoperands);
8351 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8352 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8353 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8354 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8355 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8356 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8358 if (!val_14)
8360 output_asm_insn ("addil L'%2,%%r26", xoperands);
8361 nbytes += 4;
8364 if (TARGET_PA_20)
8366 output_asm_insn ("bve (%%r22)", xoperands);
8367 nbytes += 36;
8369 else if (TARGET_NO_SPACE_REGS)
8371 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
8372 nbytes += 36;
8374 else
8376 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8377 output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8378 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
8379 nbytes += 44;
8382 if (val_14)
8383 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8384 else
8385 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8387 else if (flag_pic)
8389 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
8391 if (TARGET_SOM || !TARGET_GAS)
8393 output_asm_insn ("addil L'%0-%1-8,%%r1", xoperands);
8394 output_asm_insn ("ldo R'%0-%1-8(%%r1),%%r22", xoperands);
8396 else
8398 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
8399 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r22", xoperands);
8402 if (!val_14)
8403 output_asm_insn ("addil L'%2,%%r26", xoperands);
8405 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8407 if (val_14)
8409 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8410 nbytes += 20;
8412 else
8414 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8415 nbytes += 24;
8418 else
8420 if (!val_14)
8421 output_asm_insn ("addil L'%2,%%r26", xoperands);
8423 output_asm_insn ("ldil L'%0,%%r22", xoperands);
8424 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8426 if (val_14)
8428 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8429 nbytes += 12;
8431 else
8433 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8434 nbytes += 16;
8438 fprintf (file, "\t.EXIT\n\t.PROCEND\n");
8440 if (TARGET_SOM && TARGET_GAS)
8442 /* We done with this subspace except possibly for some additional
8443 debug information. Forget that we are in this subspace to ensure
8444 that the next function is output in its own subspace. */
8445 in_section = NULL;
8446 cfun->machine->in_nsubspa = 2;
8449 if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8451 switch_to_section (data_section);
8452 output_asm_insn (".align 4", xoperands);
8453 ASM_OUTPUT_LABEL (file, label);
8454 output_asm_insn (".word P'%0", xoperands);
8457 current_thunk_number++;
8458 nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
8459 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
8460 last_address += nbytes;
8461 if (old_last_address > last_address)
8462 last_address = UINT_MAX;
8463 update_total_code_bytes (nbytes);
8466 /* Only direct calls to static functions are allowed to be sibling (tail)
8467 call optimized.
8469 This restriction is necessary because some linker generated stubs will
8470 store return pointers into rp' in some cases which might clobber a
8471 live value already in rp'.
8473 In a sibcall the current function and the target function share stack
8474 space. Thus if the path to the current function and the path to the
8475 target function save a value in rp', they save the value into the
8476 same stack slot, which has undesirable consequences.
8478 Because of the deferred binding nature of shared libraries any function
8479 with external scope could be in a different load module and thus require
8480 rp' to be saved when calling that function. So sibcall optimizations
8481 can only be safe for static function.
8483 Note that GCC never needs return value relocations, so we don't have to
8484 worry about static calls with return value relocations (which require
8485 saving rp').
8487 It is safe to perform a sibcall optimization when the target function
8488 will never return. */
8489 static bool
8490 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8492 if (TARGET_PORTABLE_RUNTIME)
8493 return false;
8495 /* Sibcalls are ok for TARGET_ELF32 as along as the linker is used in
8496 single subspace mode and the call is not indirect. As far as I know,
8497 there is no operating system support for the multiple subspace mode.
8498 It might be possible to support indirect calls if we didn't use
8499 $$dyncall (see the indirect sequence generated in pa_output_call). */
8500 if (TARGET_ELF32)
8501 return (decl != NULL_TREE);
8503 /* Sibcalls are not ok because the arg pointer register is not a fixed
8504 register. This prevents the sibcall optimization from occurring. In
8505 addition, there are problems with stub placement using GNU ld. This
8506 is because a normal sibcall branch uses a 17-bit relocation while
8507 a regular call branch uses a 22-bit relocation. As a result, more
8508 care needs to be taken in the placement of long-branch stubs. */
8509 if (TARGET_64BIT)
8510 return false;
8512 /* Sibcalls are only ok within a translation unit. */
8513 return (decl && !TREE_PUBLIC (decl));
8516 /* ??? Addition is not commutative on the PA due to the weird implicit
8517 space register selection rules for memory addresses. Therefore, we
8518 don't consider a + b == b + a, as this might be inside a MEM. */
8519 static bool
8520 pa_commutative_p (const_rtx x, int outer_code)
8522 return (COMMUTATIVE_P (x)
8523 && (TARGET_NO_SPACE_REGS
8524 || (outer_code != UNKNOWN && outer_code != MEM)
8525 || GET_CODE (x) != PLUS));
8528 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8529 use in fmpyadd instructions. */
8531 pa_fmpyaddoperands (rtx *operands)
8533 enum machine_mode mode = GET_MODE (operands[0]);
8535 /* Must be a floating point mode. */
8536 if (mode != SFmode && mode != DFmode)
8537 return 0;
8539 /* All modes must be the same. */
8540 if (! (mode == GET_MODE (operands[1])
8541 && mode == GET_MODE (operands[2])
8542 && mode == GET_MODE (operands[3])
8543 && mode == GET_MODE (operands[4])
8544 && mode == GET_MODE (operands[5])))
8545 return 0;
8547 /* All operands must be registers. */
8548 if (! (GET_CODE (operands[1]) == REG
8549 && GET_CODE (operands[2]) == REG
8550 && GET_CODE (operands[3]) == REG
8551 && GET_CODE (operands[4]) == REG
8552 && GET_CODE (operands[5]) == REG))
8553 return 0;
8555 /* Only 2 real operands to the addition. One of the input operands must
8556 be the same as the output operand. */
8557 if (! rtx_equal_p (operands[3], operands[4])
8558 && ! rtx_equal_p (operands[3], operands[5]))
8559 return 0;
8561 /* Inout operand of add cannot conflict with any operands from multiply. */
8562 if (rtx_equal_p (operands[3], operands[0])
8563 || rtx_equal_p (operands[3], operands[1])
8564 || rtx_equal_p (operands[3], operands[2]))
8565 return 0;
8567 /* multiply cannot feed into addition operands. */
8568 if (rtx_equal_p (operands[4], operands[0])
8569 || rtx_equal_p (operands[5], operands[0]))
8570 return 0;
8572 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8573 if (mode == SFmode
8574 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8575 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8576 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8577 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8578 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8579 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8580 return 0;
8582 /* Passed. Operands are suitable for fmpyadd. */
8583 return 1;
8586 #if !defined(USE_COLLECT2)
8587 static void
8588 pa_asm_out_constructor (rtx symbol, int priority)
8590 if (!function_label_operand (symbol, VOIDmode))
8591 pa_encode_label (symbol);
8593 #ifdef CTORS_SECTION_ASM_OP
8594 default_ctor_section_asm_out_constructor (symbol, priority);
8595 #else
8596 # ifdef TARGET_ASM_NAMED_SECTION
8597 default_named_section_asm_out_constructor (symbol, priority);
8598 # else
8599 default_stabs_asm_out_constructor (symbol, priority);
8600 # endif
8601 #endif
8604 static void
8605 pa_asm_out_destructor (rtx symbol, int priority)
8607 if (!function_label_operand (symbol, VOIDmode))
8608 pa_encode_label (symbol);
8610 #ifdef DTORS_SECTION_ASM_OP
8611 default_dtor_section_asm_out_destructor (symbol, priority);
8612 #else
8613 # ifdef TARGET_ASM_NAMED_SECTION
8614 default_named_section_asm_out_destructor (symbol, priority);
8615 # else
8616 default_stabs_asm_out_destructor (symbol, priority);
8617 # endif
8618 #endif
8620 #endif
8622 /* This function places uninitialized global data in the bss section.
8623 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
8624 function on the SOM port to prevent uninitialized global data from
8625 being placed in the data section. */
8627 void
8628 pa_asm_output_aligned_bss (FILE *stream,
8629 const char *name,
8630 unsigned HOST_WIDE_INT size,
8631 unsigned int align)
8633 switch_to_section (bss_section);
8634 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8636 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
8637 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
8638 #endif
8640 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
8641 ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
8642 #endif
8644 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8645 ASM_OUTPUT_LABEL (stream, name);
8646 fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8649 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
8650 that doesn't allow the alignment of global common storage to be directly
8651 specified. The SOM linker aligns common storage based on the rounded
8652 value of the NUM_BYTES parameter in the .comm directive. It's not
8653 possible to use the .align directive as it doesn't affect the alignment
8654 of the label associated with a .comm directive. */
8656 void
8657 pa_asm_output_aligned_common (FILE *stream,
8658 const char *name,
8659 unsigned HOST_WIDE_INT size,
8660 unsigned int align)
8662 unsigned int max_common_align;
8664 max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
8665 if (align > max_common_align)
8667 warning (0, "alignment (%u) for %s exceeds maximum alignment "
8668 "for global common data. Using %u",
8669 align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT);
8670 align = max_common_align;
8673 switch_to_section (bss_section);
8675 assemble_name (stream, name);
8676 fprintf (stream, "\t.comm "HOST_WIDE_INT_PRINT_UNSIGNED"\n",
8677 MAX (size, align / BITS_PER_UNIT));
8680 /* We can't use .comm for local common storage as the SOM linker effectively
8681 treats the symbol as universal and uses the same storage for local symbols
8682 with the same name in different object files. The .block directive
8683 reserves an uninitialized block of storage. However, it's not common
8684 storage. Fortunately, GCC never requests common storage with the same
8685 name in any given translation unit. */
8687 void
8688 pa_asm_output_aligned_local (FILE *stream,
8689 const char *name,
8690 unsigned HOST_WIDE_INT size,
8691 unsigned int align)
8693 switch_to_section (bss_section);
8694 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8696 #ifdef LOCAL_ASM_OP
8697 fprintf (stream, "%s", LOCAL_ASM_OP);
8698 assemble_name (stream, name);
8699 fprintf (stream, "\n");
8700 #endif
8702 ASM_OUTPUT_LABEL (stream, name);
8703 fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8706 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8707 use in fmpysub instructions. */
8709 pa_fmpysuboperands (rtx *operands)
8711 enum machine_mode mode = GET_MODE (operands[0]);
8713 /* Must be a floating point mode. */
8714 if (mode != SFmode && mode != DFmode)
8715 return 0;
8717 /* All modes must be the same. */
8718 if (! (mode == GET_MODE (operands[1])
8719 && mode == GET_MODE (operands[2])
8720 && mode == GET_MODE (operands[3])
8721 && mode == GET_MODE (operands[4])
8722 && mode == GET_MODE (operands[5])))
8723 return 0;
8725 /* All operands must be registers. */
8726 if (! (GET_CODE (operands[1]) == REG
8727 && GET_CODE (operands[2]) == REG
8728 && GET_CODE (operands[3]) == REG
8729 && GET_CODE (operands[4]) == REG
8730 && GET_CODE (operands[5]) == REG))
8731 return 0;
8733 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
8734 operation, so operands[4] must be the same as operand[3]. */
8735 if (! rtx_equal_p (operands[3], operands[4]))
8736 return 0;
8738 /* multiply cannot feed into subtraction. */
8739 if (rtx_equal_p (operands[5], operands[0]))
8740 return 0;
8742 /* Inout operand of sub cannot conflict with any operands from multiply. */
8743 if (rtx_equal_p (operands[3], operands[0])
8744 || rtx_equal_p (operands[3], operands[1])
8745 || rtx_equal_p (operands[3], operands[2]))
8746 return 0;
8748 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8749 if (mode == SFmode
8750 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8751 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8752 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8753 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8754 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8755 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8756 return 0;
8758 /* Passed. Operands are suitable for fmpysub. */
8759 return 1;
8762 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
8763 constants for shadd instructions. */
8765 pa_shadd_constant_p (int val)
8767 if (val == 2 || val == 4 || val == 8)
8768 return 1;
8769 else
8770 return 0;
8773 /* Return TRUE if INSN branches forward. */
8775 static bool
8776 forward_branch_p (rtx insn)
8778 rtx lab = JUMP_LABEL (insn);
8780 /* The INSN must have a jump label. */
8781 gcc_assert (lab != NULL_RTX);
8783 if (INSN_ADDRESSES_SET_P ())
8784 return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn));
8786 while (insn)
8788 if (insn == lab)
8789 return true;
8790 else
8791 insn = NEXT_INSN (insn);
8794 return false;
8797 /* Return 1 if INSN is in the delay slot of a call instruction. */
8799 pa_jump_in_call_delay (rtx insn)
8802 if (GET_CODE (insn) != JUMP_INSN)
8803 return 0;
8805 if (PREV_INSN (insn)
8806 && PREV_INSN (PREV_INSN (insn))
8807 && GET_CODE (next_real_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
8809 rtx test_insn = next_real_insn (PREV_INSN (PREV_INSN (insn)));
8811 return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
8812 && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
8815 else
8816 return 0;
8819 /* Output an unconditional move and branch insn. */
8821 const char *
8822 pa_output_parallel_movb (rtx *operands, rtx insn)
8824 int length = get_attr_length (insn);
8826 /* These are the cases in which we win. */
8827 if (length == 4)
8828 return "mov%I1b,tr %1,%0,%2";
8830 /* None of the following cases win, but they don't lose either. */
8831 if (length == 8)
8833 if (dbr_sequence_length () == 0)
8835 /* Nothing in the delay slot, fake it by putting the combined
8836 insn (the copy or add) in the delay slot of a bl. */
8837 if (GET_CODE (operands[1]) == CONST_INT)
8838 return "b %2\n\tldi %1,%0";
8839 else
8840 return "b %2\n\tcopy %1,%0";
8842 else
8844 /* Something in the delay slot, but we've got a long branch. */
8845 if (GET_CODE (operands[1]) == CONST_INT)
8846 return "ldi %1,%0\n\tb %2";
8847 else
8848 return "copy %1,%0\n\tb %2";
8852 if (GET_CODE (operands[1]) == CONST_INT)
8853 output_asm_insn ("ldi %1,%0", operands);
8854 else
8855 output_asm_insn ("copy %1,%0", operands);
8856 return pa_output_lbranch (operands[2], insn, 1);
8859 /* Output an unconditional add and branch insn. */
8861 const char *
8862 pa_output_parallel_addb (rtx *operands, rtx insn)
8864 int length = get_attr_length (insn);
8866 /* To make life easy we want operand0 to be the shared input/output
8867 operand and operand1 to be the readonly operand. */
8868 if (operands[0] == operands[1])
8869 operands[1] = operands[2];
8871 /* These are the cases in which we win. */
8872 if (length == 4)
8873 return "add%I1b,tr %1,%0,%3";
8875 /* None of the following cases win, but they don't lose either. */
8876 if (length == 8)
8878 if (dbr_sequence_length () == 0)
8879 /* Nothing in the delay slot, fake it by putting the combined
8880 insn (the copy or add) in the delay slot of a bl. */
8881 return "b %3\n\tadd%I1 %1,%0,%0";
8882 else
8883 /* Something in the delay slot, but we've got a long branch. */
8884 return "add%I1 %1,%0,%0\n\tb %3";
8887 output_asm_insn ("add%I1 %1,%0,%0", operands);
8888 return pa_output_lbranch (operands[3], insn, 1);
8891 /* Return nonzero if INSN (a jump insn) immediately follows a call
8892 to a named function. This is used to avoid filling the delay slot
8893 of the jump since it can usually be eliminated by modifying RP in
8894 the delay slot of the call. */
8897 pa_following_call (rtx insn)
8899 if (! TARGET_JUMP_IN_DELAY)
8900 return 0;
8902 /* Find the previous real insn, skipping NOTEs. */
8903 insn = PREV_INSN (insn);
8904 while (insn && GET_CODE (insn) == NOTE)
8905 insn = PREV_INSN (insn);
8907 /* Check for CALL_INSNs and millicode calls. */
8908 if (insn
8909 && ((GET_CODE (insn) == CALL_INSN
8910 && get_attr_type (insn) != TYPE_DYNCALL)
8911 || (GET_CODE (insn) == INSN
8912 && GET_CODE (PATTERN (insn)) != SEQUENCE
8913 && GET_CODE (PATTERN (insn)) != USE
8914 && GET_CODE (PATTERN (insn)) != CLOBBER
8915 && get_attr_type (insn) == TYPE_MILLI)))
8916 return 1;
8918 return 0;
8921 /* We use this hook to perform a PA specific optimization which is difficult
8922 to do in earlier passes.
8924 We want the delay slots of branches within jump tables to be filled.
8925 None of the compiler passes at the moment even has the notion that a
8926 PA jump table doesn't contain addresses, but instead contains actual
8927 instructions!
8929 Because we actually jump into the table, the addresses of each entry
8930 must stay constant in relation to the beginning of the table (which
8931 itself must stay constant relative to the instruction to jump into
8932 it). I don't believe we can guarantee earlier passes of the compiler
8933 will adhere to those rules.
8935 So, late in the compilation process we find all the jump tables, and
8936 expand them into real code -- e.g. each entry in the jump table vector
8937 will get an appropriate label followed by a jump to the final target.
8939 Reorg and the final jump pass can then optimize these branches and
8940 fill their delay slots. We end up with smaller, more efficient code.
8942 The jump instructions within the table are special; we must be able
8943 to identify them during assembly output (if the jumps don't get filled
8944 we need to emit a nop rather than nullifying the delay slot)). We
8945 identify jumps in switch tables by using insns with the attribute
8946 type TYPE_BTABLE_BRANCH.
8948 We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB
8949 insns. This serves two purposes, first it prevents jump.c from
8950 noticing that the last N entries in the table jump to the instruction
8951 immediately after the table and deleting the jumps. Second, those
8952 insns mark where we should emit .begin_brtab and .end_brtab directives
8953 when using GAS (allows for better link time optimizations). */
8955 static void
8956 pa_reorg (void)
8958 rtx insn;
8960 remove_useless_addtr_insns (1);
8962 if (pa_cpu < PROCESSOR_8000)
8963 pa_combine_instructions ();
8966 /* This is fairly cheap, so always run it if optimizing. */
8967 if (optimize > 0 && !TARGET_BIG_SWITCH)
8969 /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns. */
8970 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8972 rtx pattern, tmp, location, label;
8973 unsigned int length, i;
8975 /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode. */
8976 if (GET_CODE (insn) != JUMP_INSN
8977 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8978 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8979 continue;
8981 /* Emit marker for the beginning of the branch table. */
8982 emit_insn_before (gen_begin_brtab (), insn);
8984 pattern = PATTERN (insn);
8985 location = PREV_INSN (insn);
8986 length = XVECLEN (pattern, GET_CODE (pattern) == ADDR_DIFF_VEC);
8988 for (i = 0; i < length; i++)
8990 /* Emit a label before each jump to keep jump.c from
8991 removing this code. */
8992 tmp = gen_label_rtx ();
8993 LABEL_NUSES (tmp) = 1;
8994 emit_label_after (tmp, location);
8995 location = NEXT_INSN (location);
8997 if (GET_CODE (pattern) == ADDR_VEC)
8998 label = XEXP (XVECEXP (pattern, 0, i), 0);
8999 else
9000 label = XEXP (XVECEXP (pattern, 1, i), 0);
9002 tmp = gen_short_jump (label);
9004 /* Emit the jump itself. */
9005 tmp = emit_jump_insn_after (tmp, location);
9006 JUMP_LABEL (tmp) = label;
9007 LABEL_NUSES (label)++;
9008 location = NEXT_INSN (location);
9010 /* Emit a BARRIER after the jump. */
9011 emit_barrier_after (location);
9012 location = NEXT_INSN (location);
9015 /* Emit marker for the end of the branch table. */
9016 emit_insn_before (gen_end_brtab (), location);
9017 location = NEXT_INSN (location);
9018 emit_barrier_after (location);
9020 /* Delete the ADDR_VEC or ADDR_DIFF_VEC. */
9021 delete_insn (insn);
9024 else
9026 /* Still need brtab marker insns. FIXME: the presence of these
9027 markers disables output of the branch table to readonly memory,
9028 and any alignment directives that might be needed. Possibly,
9029 the begin_brtab insn should be output before the label for the
9030 table. This doesn't matter at the moment since the tables are
9031 always output in the text section. */
9032 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9034 /* Find an ADDR_VEC insn. */
9035 if (GET_CODE (insn) != JUMP_INSN
9036 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
9037 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
9038 continue;
9040 /* Now generate markers for the beginning and end of the
9041 branch table. */
9042 emit_insn_before (gen_begin_brtab (), insn);
9043 emit_insn_after (gen_end_brtab (), insn);
9048 /* The PA has a number of odd instructions which can perform multiple
9049 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
9050 it may be profitable to combine two instructions into one instruction
9051 with two outputs. It's not profitable PA2.0 machines because the
9052 two outputs would take two slots in the reorder buffers.
9054 This routine finds instructions which can be combined and combines
9055 them. We only support some of the potential combinations, and we
9056 only try common ways to find suitable instructions.
9058 * addb can add two registers or a register and a small integer
9059 and jump to a nearby (+-8k) location. Normally the jump to the
9060 nearby location is conditional on the result of the add, but by
9061 using the "true" condition we can make the jump unconditional.
9062 Thus addb can perform two independent operations in one insn.
9064 * movb is similar to addb in that it can perform a reg->reg
9065 or small immediate->reg copy and jump to a nearby (+-8k location).
9067 * fmpyadd and fmpysub can perform a FP multiply and either an
9068 FP add or FP sub if the operands of the multiply and add/sub are
9069 independent (there are other minor restrictions). Note both
9070 the fmpy and fadd/fsub can in theory move to better spots according
9071 to data dependencies, but for now we require the fmpy stay at a
9072 fixed location.
9074 * Many of the memory operations can perform pre & post updates
9075 of index registers. GCC's pre/post increment/decrement addressing
9076 is far too simple to take advantage of all the possibilities. This
9077 pass may not be suitable since those insns may not be independent.
9079 * comclr can compare two ints or an int and a register, nullify
9080 the following instruction and zero some other register. This
9081 is more difficult to use as it's harder to find an insn which
9082 will generate a comclr than finding something like an unconditional
9083 branch. (conditional moves & long branches create comclr insns).
9085 * Most arithmetic operations can conditionally skip the next
9086 instruction. They can be viewed as "perform this operation
9087 and conditionally jump to this nearby location" (where nearby
9088 is an insns away). These are difficult to use due to the
9089 branch length restrictions. */
9091 static void
9092 pa_combine_instructions (void)
9094 rtx anchor, new_rtx;
9096 /* This can get expensive since the basic algorithm is on the
9097 order of O(n^2) (or worse). Only do it for -O2 or higher
9098 levels of optimization. */
9099 if (optimize < 2)
9100 return;
9102 /* Walk down the list of insns looking for "anchor" insns which
9103 may be combined with "floating" insns. As the name implies,
9104 "anchor" instructions don't move, while "floating" insns may
9105 move around. */
9106 new_rtx = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
9107 new_rtx = make_insn_raw (new_rtx);
9109 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
9111 enum attr_pa_combine_type anchor_attr;
9112 enum attr_pa_combine_type floater_attr;
9114 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
9115 Also ignore any special USE insns. */
9116 if ((GET_CODE (anchor) != INSN
9117 && GET_CODE (anchor) != JUMP_INSN
9118 && GET_CODE (anchor) != CALL_INSN)
9119 || GET_CODE (PATTERN (anchor)) == USE
9120 || GET_CODE (PATTERN (anchor)) == CLOBBER
9121 || GET_CODE (PATTERN (anchor)) == ADDR_VEC
9122 || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC)
9123 continue;
9125 anchor_attr = get_attr_pa_combine_type (anchor);
9126 /* See if anchor is an insn suitable for combination. */
9127 if (anchor_attr == PA_COMBINE_TYPE_FMPY
9128 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
9129 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9130 && ! forward_branch_p (anchor)))
9132 rtx floater;
9134 for (floater = PREV_INSN (anchor);
9135 floater;
9136 floater = PREV_INSN (floater))
9138 if (GET_CODE (floater) == NOTE
9139 || (GET_CODE (floater) == INSN
9140 && (GET_CODE (PATTERN (floater)) == USE
9141 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9142 continue;
9144 /* Anything except a regular INSN will stop our search. */
9145 if (GET_CODE (floater) != INSN
9146 || GET_CODE (PATTERN (floater)) == ADDR_VEC
9147 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
9149 floater = NULL_RTX;
9150 break;
9153 /* See if FLOATER is suitable for combination with the
9154 anchor. */
9155 floater_attr = get_attr_pa_combine_type (floater);
9156 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9157 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9158 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9159 && floater_attr == PA_COMBINE_TYPE_FMPY))
9161 /* If ANCHOR and FLOATER can be combined, then we're
9162 done with this pass. */
9163 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9164 SET_DEST (PATTERN (floater)),
9165 XEXP (SET_SRC (PATTERN (floater)), 0),
9166 XEXP (SET_SRC (PATTERN (floater)), 1)))
9167 break;
9170 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9171 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
9173 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
9175 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9176 SET_DEST (PATTERN (floater)),
9177 XEXP (SET_SRC (PATTERN (floater)), 0),
9178 XEXP (SET_SRC (PATTERN (floater)), 1)))
9179 break;
9181 else
9183 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9184 SET_DEST (PATTERN (floater)),
9185 SET_SRC (PATTERN (floater)),
9186 SET_SRC (PATTERN (floater))))
9187 break;
9192 /* If we didn't find anything on the backwards scan try forwards. */
9193 if (!floater
9194 && (anchor_attr == PA_COMBINE_TYPE_FMPY
9195 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
9197 for (floater = anchor; floater; floater = NEXT_INSN (floater))
9199 if (GET_CODE (floater) == NOTE
9200 || (GET_CODE (floater) == INSN
9201 && (GET_CODE (PATTERN (floater)) == USE
9202 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9204 continue;
9206 /* Anything except a regular INSN will stop our search. */
9207 if (GET_CODE (floater) != INSN
9208 || GET_CODE (PATTERN (floater)) == ADDR_VEC
9209 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
9211 floater = NULL_RTX;
9212 break;
9215 /* See if FLOATER is suitable for combination with the
9216 anchor. */
9217 floater_attr = get_attr_pa_combine_type (floater);
9218 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9219 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9220 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9221 && floater_attr == PA_COMBINE_TYPE_FMPY))
9223 /* If ANCHOR and FLOATER can be combined, then we're
9224 done with this pass. */
9225 if (pa_can_combine_p (new_rtx, anchor, floater, 1,
9226 SET_DEST (PATTERN (floater)),
9227 XEXP (SET_SRC (PATTERN (floater)),
9229 XEXP (SET_SRC (PATTERN (floater)),
9230 1)))
9231 break;
9236 /* FLOATER will be nonzero if we found a suitable floating
9237 insn for combination with ANCHOR. */
9238 if (floater
9239 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9240 || anchor_attr == PA_COMBINE_TYPE_FMPY))
9242 /* Emit the new instruction and delete the old anchor. */
9243 emit_insn_before (gen_rtx_PARALLEL
9244 (VOIDmode,
9245 gen_rtvec (2, PATTERN (anchor),
9246 PATTERN (floater))),
9247 anchor);
9249 SET_INSN_DELETED (anchor);
9251 /* Emit a special USE insn for FLOATER, then delete
9252 the floating insn. */
9253 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
9254 delete_insn (floater);
9256 continue;
9258 else if (floater
9259 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
9261 rtx temp;
9262 /* Emit the new_jump instruction and delete the old anchor. */
9263 temp
9264 = emit_jump_insn_before (gen_rtx_PARALLEL
9265 (VOIDmode,
9266 gen_rtvec (2, PATTERN (anchor),
9267 PATTERN (floater))),
9268 anchor);
9270 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
9271 SET_INSN_DELETED (anchor);
9273 /* Emit a special USE insn for FLOATER, then delete
9274 the floating insn. */
9275 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
9276 delete_insn (floater);
9277 continue;
9283 static int
9284 pa_can_combine_p (rtx new_rtx, rtx anchor, rtx floater, int reversed, rtx dest,
9285 rtx src1, rtx src2)
9287 int insn_code_number;
9288 rtx start, end;
9290 /* Create a PARALLEL with the patterns of ANCHOR and
9291 FLOATER, try to recognize it, then test constraints
9292 for the resulting pattern.
9294 If the pattern doesn't match or the constraints
9295 aren't met keep searching for a suitable floater
9296 insn. */
9297 XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor);
9298 XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater);
9299 INSN_CODE (new_rtx) = -1;
9300 insn_code_number = recog_memoized (new_rtx);
9301 if (insn_code_number < 0
9302 || (extract_insn (new_rtx), ! constrain_operands (1)))
9303 return 0;
9305 if (reversed)
9307 start = anchor;
9308 end = floater;
9310 else
9312 start = floater;
9313 end = anchor;
9316 /* There's up to three operands to consider. One
9317 output and two inputs.
9319 The output must not be used between FLOATER & ANCHOR
9320 exclusive. The inputs must not be set between
9321 FLOATER and ANCHOR exclusive. */
9323 if (reg_used_between_p (dest, start, end))
9324 return 0;
9326 if (reg_set_between_p (src1, start, end))
9327 return 0;
9329 if (reg_set_between_p (src2, start, end))
9330 return 0;
9332 /* If we get here, then everything is good. */
9333 return 1;
9336 /* Return nonzero if references for INSN are delayed.
9338 Millicode insns are actually function calls with some special
9339 constraints on arguments and register usage.
9341 Millicode calls always expect their arguments in the integer argument
9342 registers, and always return their result in %r29 (ret1). They
9343 are expected to clobber their arguments, %r1, %r29, and the return
9344 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9346 This function tells reorg that the references to arguments and
9347 millicode calls do not appear to happen until after the millicode call.
9348 This allows reorg to put insns which set the argument registers into the
9349 delay slot of the millicode call -- thus they act more like traditional
9350 CALL_INSNs.
9352 Note we cannot consider side effects of the insn to be delayed because
9353 the branch and link insn will clobber the return pointer. If we happened
9354 to use the return pointer in the delay slot of the call, then we lose.
9356 get_attr_type will try to recognize the given insn, so make sure to
9357 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9358 in particular. */
9360 pa_insn_refs_are_delayed (rtx insn)
9362 return ((GET_CODE (insn) == INSN
9363 && GET_CODE (PATTERN (insn)) != SEQUENCE
9364 && GET_CODE (PATTERN (insn)) != USE
9365 && GET_CODE (PATTERN (insn)) != CLOBBER
9366 && get_attr_type (insn) == TYPE_MILLI));
9369 /* Promote the return value, but not the arguments. */
9371 static enum machine_mode
9372 pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9373 enum machine_mode mode,
9374 int *punsignedp ATTRIBUTE_UNUSED,
9375 const_tree fntype ATTRIBUTE_UNUSED,
9376 int for_return)
9378 if (for_return == 0)
9379 return mode;
9380 return promote_mode (type, mode, punsignedp);
9383 /* On the HP-PA the value is found in register(s) 28(-29), unless
9384 the mode is SF or DF. Then the value is returned in fr4 (32).
9386 This must perform the same promotions as PROMOTE_MODE, else promoting
9387 return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
9389 Small structures must be returned in a PARALLEL on PA64 in order
9390 to match the HP Compiler ABI. */
9392 static rtx
9393 pa_function_value (const_tree valtype,
9394 const_tree func ATTRIBUTE_UNUSED,
9395 bool outgoing ATTRIBUTE_UNUSED)
9397 enum machine_mode valmode;
9399 if (AGGREGATE_TYPE_P (valtype)
9400 || TREE_CODE (valtype) == COMPLEX_TYPE
9401 || TREE_CODE (valtype) == VECTOR_TYPE)
9403 if (TARGET_64BIT)
9405 /* Aggregates with a size less than or equal to 128 bits are
9406 returned in GR 28(-29). They are left justified. The pad
9407 bits are undefined. Larger aggregates are returned in
9408 memory. */
9409 rtx loc[2];
9410 int i, offset = 0;
9411 int ub = int_size_in_bytes (valtype) <= UNITS_PER_WORD ? 1 : 2;
9413 for (i = 0; i < ub; i++)
9415 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9416 gen_rtx_REG (DImode, 28 + i),
9417 GEN_INT (offset));
9418 offset += 8;
9421 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
9423 else if (int_size_in_bytes (valtype) > UNITS_PER_WORD)
9425 /* Aggregates 5 to 8 bytes in size are returned in general
9426 registers r28-r29 in the same manner as other non
9427 floating-point objects. The data is right-justified and
9428 zero-extended to 64 bits. This is opposite to the normal
9429 justification used on big endian targets and requires
9430 special treatment. */
9431 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9432 gen_rtx_REG (DImode, 28), const0_rtx);
9433 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9437 if ((INTEGRAL_TYPE_P (valtype)
9438 && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD)
9439 || POINTER_TYPE_P (valtype))
9440 valmode = word_mode;
9441 else
9442 valmode = TYPE_MODE (valtype);
9444 if (TREE_CODE (valtype) == REAL_TYPE
9445 && !AGGREGATE_TYPE_P (valtype)
9446 && TYPE_MODE (valtype) != TFmode
9447 && !TARGET_SOFT_FLOAT)
9448 return gen_rtx_REG (valmode, 32);
9450 return gen_rtx_REG (valmode, 28);
9453 /* Implement the TARGET_LIBCALL_VALUE hook. */
9455 static rtx
9456 pa_libcall_value (enum machine_mode mode,
9457 const_rtx fun ATTRIBUTE_UNUSED)
9459 if (! TARGET_SOFT_FLOAT
9460 && (mode == SFmode || mode == DFmode))
9461 return gen_rtx_REG (mode, 32);
9462 else
9463 return gen_rtx_REG (mode, 28);
9466 /* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook. */
9468 static bool
9469 pa_function_value_regno_p (const unsigned int regno)
9471 if (regno == 28
9472 || (! TARGET_SOFT_FLOAT && regno == 32))
9473 return true;
9475 return false;
9478 /* Update the data in CUM to advance over an argument
9479 of mode MODE and data type TYPE.
9480 (TYPE is null for libcalls where that information may not be available.) */
9482 static void
9483 pa_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
9484 const_tree type, bool named ATTRIBUTE_UNUSED)
9486 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9487 int arg_size = FUNCTION_ARG_SIZE (mode, type);
9489 cum->nargs_prototype--;
9490 cum->words += (arg_size
9491 + ((cum->words & 01)
9492 && type != NULL_TREE
9493 && arg_size > 1));
9496 /* Return the location of a parameter that is passed in a register or NULL
9497 if the parameter has any component that is passed in memory.
9499 This is new code and will be pushed to into the net sources after
9500 further testing.
9502 ??? We might want to restructure this so that it looks more like other
9503 ports. */
9504 static rtx
9505 pa_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
9506 const_tree type, bool named ATTRIBUTE_UNUSED)
9508 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9509 int max_arg_words = (TARGET_64BIT ? 8 : 4);
9510 int alignment = 0;
9511 int arg_size;
9512 int fpr_reg_base;
9513 int gpr_reg_base;
9514 rtx retval;
9516 if (mode == VOIDmode)
9517 return NULL_RTX;
9519 arg_size = FUNCTION_ARG_SIZE (mode, type);
9521 /* If this arg would be passed partially or totally on the stack, then
9522 this routine should return zero. pa_arg_partial_bytes will
9523 handle arguments which are split between regs and stack slots if
9524 the ABI mandates split arguments. */
9525 if (!TARGET_64BIT)
9527 /* The 32-bit ABI does not split arguments. */
9528 if (cum->words + arg_size > max_arg_words)
9529 return NULL_RTX;
9531 else
9533 if (arg_size > 1)
9534 alignment = cum->words & 1;
9535 if (cum->words + alignment >= max_arg_words)
9536 return NULL_RTX;
9539 /* The 32bit ABIs and the 64bit ABIs are rather different,
9540 particularly in their handling of FP registers. We might
9541 be able to cleverly share code between them, but I'm not
9542 going to bother in the hope that splitting them up results
9543 in code that is more easily understood. */
9545 if (TARGET_64BIT)
9547 /* Advance the base registers to their current locations.
9549 Remember, gprs grow towards smaller register numbers while
9550 fprs grow to higher register numbers. Also remember that
9551 although FP regs are 32-bit addressable, we pretend that
9552 the registers are 64-bits wide. */
9553 gpr_reg_base = 26 - cum->words;
9554 fpr_reg_base = 32 + cum->words;
9556 /* Arguments wider than one word and small aggregates need special
9557 treatment. */
9558 if (arg_size > 1
9559 || mode == BLKmode
9560 || (type && (AGGREGATE_TYPE_P (type)
9561 || TREE_CODE (type) == COMPLEX_TYPE
9562 || TREE_CODE (type) == VECTOR_TYPE)))
9564 /* Double-extended precision (80-bit), quad-precision (128-bit)
9565 and aggregates including complex numbers are aligned on
9566 128-bit boundaries. The first eight 64-bit argument slots
9567 are associated one-to-one, with general registers r26
9568 through r19, and also with floating-point registers fr4
9569 through fr11. Arguments larger than one word are always
9570 passed in general registers.
9572 Using a PARALLEL with a word mode register results in left
9573 justified data on a big-endian target. */
9575 rtx loc[8];
9576 int i, offset = 0, ub = arg_size;
9578 /* Align the base register. */
9579 gpr_reg_base -= alignment;
9581 ub = MIN (ub, max_arg_words - cum->words - alignment);
9582 for (i = 0; i < ub; i++)
9584 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9585 gen_rtx_REG (DImode, gpr_reg_base),
9586 GEN_INT (offset));
9587 gpr_reg_base -= 1;
9588 offset += 8;
9591 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
9594 else
9596 /* If the argument is larger than a word, then we know precisely
9597 which registers we must use. */
9598 if (arg_size > 1)
9600 if (cum->words)
9602 gpr_reg_base = 23;
9603 fpr_reg_base = 38;
9605 else
9607 gpr_reg_base = 25;
9608 fpr_reg_base = 34;
9611 /* Structures 5 to 8 bytes in size are passed in the general
9612 registers in the same manner as other non floating-point
9613 objects. The data is right-justified and zero-extended
9614 to 64 bits. This is opposite to the normal justification
9615 used on big endian targets and requires special treatment.
9616 We now define BLOCK_REG_PADDING to pad these objects.
9617 Aggregates, complex and vector types are passed in the same
9618 manner as structures. */
9619 if (mode == BLKmode
9620 || (type && (AGGREGATE_TYPE_P (type)
9621 || TREE_CODE (type) == COMPLEX_TYPE
9622 || TREE_CODE (type) == VECTOR_TYPE)))
9624 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9625 gen_rtx_REG (DImode, gpr_reg_base),
9626 const0_rtx);
9627 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9630 else
9632 /* We have a single word (32 bits). A simple computation
9633 will get us the register #s we need. */
9634 gpr_reg_base = 26 - cum->words;
9635 fpr_reg_base = 32 + 2 * cum->words;
9639 /* Determine if the argument needs to be passed in both general and
9640 floating point registers. */
9641 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
9642 /* If we are doing soft-float with portable runtime, then there
9643 is no need to worry about FP regs. */
9644 && !TARGET_SOFT_FLOAT
9645 /* The parameter must be some kind of scalar float, else we just
9646 pass it in integer registers. */
9647 && GET_MODE_CLASS (mode) == MODE_FLOAT
9648 /* The target function must not have a prototype. */
9649 && cum->nargs_prototype <= 0
9650 /* libcalls do not need to pass items in both FP and general
9651 registers. */
9652 && type != NULL_TREE
9653 /* All this hair applies to "outgoing" args only. This includes
9654 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
9655 && !cum->incoming)
9656 /* Also pass outgoing floating arguments in both registers in indirect
9657 calls with the 32 bit ABI and the HP assembler since there is no
9658 way to the specify argument locations in static functions. */
9659 || (!TARGET_64BIT
9660 && !TARGET_GAS
9661 && !cum->incoming
9662 && cum->indirect
9663 && GET_MODE_CLASS (mode) == MODE_FLOAT))
9665 retval
9666 = gen_rtx_PARALLEL
9667 (mode,
9668 gen_rtvec (2,
9669 gen_rtx_EXPR_LIST (VOIDmode,
9670 gen_rtx_REG (mode, fpr_reg_base),
9671 const0_rtx),
9672 gen_rtx_EXPR_LIST (VOIDmode,
9673 gen_rtx_REG (mode, gpr_reg_base),
9674 const0_rtx)));
9676 else
9678 /* See if we should pass this parameter in a general register. */
9679 if (TARGET_SOFT_FLOAT
9680 /* Indirect calls in the normal 32bit ABI require all arguments
9681 to be passed in general registers. */
9682 || (!TARGET_PORTABLE_RUNTIME
9683 && !TARGET_64BIT
9684 && !TARGET_ELF32
9685 && cum->indirect)
9686 /* If the parameter is not a scalar floating-point parameter,
9687 then it belongs in GPRs. */
9688 || GET_MODE_CLASS (mode) != MODE_FLOAT
9689 /* Structure with single SFmode field belongs in GPR. */
9690 || (type && AGGREGATE_TYPE_P (type)))
9691 retval = gen_rtx_REG (mode, gpr_reg_base);
9692 else
9693 retval = gen_rtx_REG (mode, fpr_reg_base);
9695 return retval;
9698 /* Arguments larger than one word are double word aligned. */
9700 static unsigned int
9701 pa_function_arg_boundary (enum machine_mode mode, const_tree type)
9703 bool singleword = (type
9704 ? (integer_zerop (TYPE_SIZE (type))
9705 || !TREE_CONSTANT (TYPE_SIZE (type))
9706 || int_size_in_bytes (type) <= UNITS_PER_WORD)
9707 : GET_MODE_SIZE (mode) <= UNITS_PER_WORD);
9709 return singleword ? PARM_BOUNDARY : MAX_PARM_BOUNDARY;
9712 /* If this arg would be passed totally in registers or totally on the stack,
9713 then this routine should return zero. */
9715 static int
9716 pa_arg_partial_bytes (cumulative_args_t cum_v, enum machine_mode mode,
9717 tree type, bool named ATTRIBUTE_UNUSED)
9719 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9720 unsigned int max_arg_words = 8;
9721 unsigned int offset = 0;
9723 if (!TARGET_64BIT)
9724 return 0;
9726 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
9727 offset = 1;
9729 if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
9730 /* Arg fits fully into registers. */
9731 return 0;
9732 else if (cum->words + offset >= max_arg_words)
9733 /* Arg fully on the stack. */
9734 return 0;
9735 else
9736 /* Arg is split. */
9737 return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
9741 /* A get_unnamed_section callback for switching to the text section.
9743 This function is only used with SOM. Because we don't support
9744 named subspaces, we can only create a new subspace or switch back
9745 to the default text subspace. */
9747 static void
9748 som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED)
9750 gcc_assert (TARGET_SOM);
9751 if (TARGET_GAS)
9753 if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
9755 /* We only want to emit a .nsubspa directive once at the
9756 start of the function. */
9757 cfun->machine->in_nsubspa = 1;
9759 /* Create a new subspace for the text. This provides
9760 better stub placement and one-only functions. */
9761 if (cfun->decl
9762 && DECL_ONE_ONLY (cfun->decl)
9763 && !DECL_WEAK (cfun->decl))
9765 output_section_asm_op ("\t.SPACE $TEXT$\n"
9766 "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
9767 "ACCESS=44,SORT=24,COMDAT");
9768 return;
9771 else
9773 /* There isn't a current function or the body of the current
9774 function has been completed. So, we are changing to the
9775 text section to output debugging information. Thus, we
9776 need to forget that we are in the text section so that
9777 varasm.c will call us when text_section is selected again. */
9778 gcc_assert (!cfun || !cfun->machine
9779 || cfun->machine->in_nsubspa == 2);
9780 in_section = NULL;
9782 output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
9783 return;
9785 output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
9788 /* A get_unnamed_section callback for switching to comdat data
9789 sections. This function is only used with SOM. */
9791 static void
9792 som_output_comdat_data_section_asm_op (const void *data)
9794 in_section = NULL;
9795 output_section_asm_op (data);
9798 /* Implement TARGET_ASM_INITIALIZE_SECTIONS */
9800 static void
9801 pa_som_asm_init_sections (void)
9803 text_section
9804 = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
9806 /* SOM puts readonly data in the default $LIT$ subspace when PIC code
9807 is not being generated. */
9808 som_readonly_data_section
9809 = get_unnamed_section (0, output_section_asm_op,
9810 "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
9812 /* When secondary definitions are not supported, SOM makes readonly
9813 data one-only by creating a new $LIT$ subspace in $TEXT$ with
9814 the comdat flag. */
9815 som_one_only_readonly_data_section
9816 = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
9817 "\t.SPACE $TEXT$\n"
9818 "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
9819 "ACCESS=0x2c,SORT=16,COMDAT");
9822 /* When secondary definitions are not supported, SOM makes data one-only
9823 by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */
9824 som_one_only_data_section
9825 = get_unnamed_section (SECTION_WRITE,
9826 som_output_comdat_data_section_asm_op,
9827 "\t.SPACE $PRIVATE$\n"
9828 "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
9829 "ACCESS=31,SORT=24,COMDAT");
9831 if (flag_tm)
9832 som_tm_clone_table_section
9833 = get_unnamed_section (0, output_section_asm_op,
9834 "\t.SPACE $PRIVATE$\n\t.SUBSPA $TM_CLONE_TABLE$");
9836 /* FIXME: HPUX ld generates incorrect GOT entries for "T" fixups
9837 which reference data within the $TEXT$ space (for example constant
9838 strings in the $LIT$ subspace).
9840 The assemblers (GAS and HP as) both have problems with handling
9841 the difference of two symbols which is the other correct way to
9842 reference constant data during PIC code generation.
9844 So, there's no way to reference constant data which is in the
9845 $TEXT$ space during PIC generation. Instead place all constant
9846 data into the $PRIVATE$ subspace (this reduces sharing, but it
9847 works correctly). */
9848 readonly_data_section = flag_pic ? data_section : som_readonly_data_section;
9850 /* We must not have a reference to an external symbol defined in a
9851 shared library in a readonly section, else the SOM linker will
9852 complain.
9854 So, we force exception information into the data section. */
9855 exception_section = data_section;
9858 /* Implement TARGET_ASM_TM_CLONE_TABLE_SECTION. */
9860 static section *
9861 pa_som_tm_clone_table_section (void)
9863 return som_tm_clone_table_section;
9866 /* On hpux10, the linker will give an error if we have a reference
9867 in the read-only data section to a symbol defined in a shared
9868 library. Therefore, expressions that might require a reloc can
9869 not be placed in the read-only data section. */
9871 static section *
9872 pa_select_section (tree exp, int reloc,
9873 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
9875 if (TREE_CODE (exp) == VAR_DECL
9876 && TREE_READONLY (exp)
9877 && !TREE_THIS_VOLATILE (exp)
9878 && DECL_INITIAL (exp)
9879 && (DECL_INITIAL (exp) == error_mark_node
9880 || TREE_CONSTANT (DECL_INITIAL (exp)))
9881 && !reloc)
9883 if (TARGET_SOM
9884 && DECL_ONE_ONLY (exp)
9885 && !DECL_WEAK (exp))
9886 return som_one_only_readonly_data_section;
9887 else
9888 return readonly_data_section;
9890 else if (CONSTANT_CLASS_P (exp) && !reloc)
9891 return readonly_data_section;
9892 else if (TARGET_SOM
9893 && TREE_CODE (exp) == VAR_DECL
9894 && DECL_ONE_ONLY (exp)
9895 && !DECL_WEAK (exp))
9896 return som_one_only_data_section;
9897 else
9898 return data_section;
9901 static void
9902 pa_globalize_label (FILE *stream, const char *name)
9904 /* We only handle DATA objects here, functions are globalized in
9905 ASM_DECLARE_FUNCTION_NAME. */
9906 if (! FUNCTION_NAME_P (name))
9908 fputs ("\t.EXPORT ", stream);
9909 assemble_name (stream, name);
9910 fputs (",DATA\n", stream);
9914 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
9916 static rtx
9917 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
9918 int incoming ATTRIBUTE_UNUSED)
9920 return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
9923 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9925 bool
9926 pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9928 /* SOM ABI says that objects larger than 64 bits are returned in memory.
9929 PA64 ABI says that objects larger than 128 bits are returned in memory.
9930 Note, int_size_in_bytes can return -1 if the size of the object is
9931 variable or larger than the maximum value that can be expressed as
9932 a HOST_WIDE_INT. It can also return zero for an empty type. The
9933 simplest way to handle variable and empty types is to pass them in
9934 memory. This avoids problems in defining the boundaries of argument
9935 slots, allocating registers, etc. */
9936 return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
9937 || int_size_in_bytes (type) <= 0);
9940 /* Structure to hold declaration and name of external symbols that are
9941 emitted by GCC. We generate a vector of these symbols and output them
9942 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
9943 This avoids putting out names that are never really used. */
9945 typedef struct GTY(()) extern_symbol
9947 tree decl;
9948 const char *name;
9949 } extern_symbol;
9951 /* Define gc'd vector type for extern_symbol. */
9953 /* Vector of extern_symbol pointers. */
9954 static GTY(()) vec<extern_symbol, va_gc> *extern_symbols;
9956 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9957 /* Mark DECL (name NAME) as an external reference (assembler output
9958 file FILE). This saves the names to output at the end of the file
9959 if actually referenced. */
9961 void
9962 pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
9964 gcc_assert (file == asm_out_file);
9965 extern_symbol p = {decl, name};
9966 vec_safe_push (extern_symbols, p);
9969 /* Output text required at the end of an assembler file.
9970 This includes deferred plabels and .import directives for
9971 all external symbols that were actually referenced. */
9973 static void
9974 pa_hpux_file_end (void)
9976 unsigned int i;
9977 extern_symbol *p;
9979 if (!NO_DEFERRED_PROFILE_COUNTERS)
9980 output_deferred_profile_counters ();
9982 output_deferred_plabels ();
9984 for (i = 0; vec_safe_iterate (extern_symbols, i, &p); i++)
9986 tree decl = p->decl;
9988 if (!TREE_ASM_WRITTEN (decl)
9989 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
9990 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
9993 vec_free (extern_symbols);
9995 #endif
9997 /* Return true if a change from mode FROM to mode TO for a register
9998 in register class RCLASS is invalid. */
10000 bool
10001 pa_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
10002 enum reg_class rclass)
10004 if (from == to)
10005 return false;
10007 /* Reject changes to/from complex and vector modes. */
10008 if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from)
10009 || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to))
10010 return true;
10012 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to))
10013 return false;
10015 /* There is no way to load QImode or HImode values directly from
10016 memory. SImode loads to the FP registers are not zero extended.
10017 On the 64-bit target, this conflicts with the definition of
10018 LOAD_EXTEND_OP. Thus, we can't allow changing between modes
10019 with different sizes in the floating-point registers. */
10020 if (MAYBE_FP_REG_CLASS_P (rclass))
10021 return true;
10023 /* HARD_REGNO_MODE_OK places modes with sizes larger than a word
10024 in specific sets of registers. Thus, we cannot allow changing
10025 to a larger mode when it's larger than a word. */
10026 if (GET_MODE_SIZE (to) > UNITS_PER_WORD
10027 && GET_MODE_SIZE (to) > GET_MODE_SIZE (from))
10028 return true;
10030 return false;
10033 /* Returns TRUE if it is a good idea to tie two pseudo registers
10034 when one has mode MODE1 and one has mode MODE2.
10035 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
10036 for any hard reg, then this must be FALSE for correct output.
10038 We should return FALSE for QImode and HImode because these modes
10039 are not ok in the floating-point registers. However, this prevents
10040 tieing these modes to SImode and DImode in the general registers.
10041 So, this isn't a good idea. We rely on HARD_REGNO_MODE_OK and
10042 CANNOT_CHANGE_MODE_CLASS to prevent these modes from being used
10043 in the floating-point registers. */
10045 bool
10046 pa_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
10048 /* Don't tie modes in different classes. */
10049 if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2))
10050 return false;
10052 return true;
10056 /* Length in units of the trampoline instruction code. */
10058 #define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 32 : 40))
10061 /* Output assembler code for a block containing the constant parts
10062 of a trampoline, leaving space for the variable parts.\
10064 The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
10065 and then branches to the specified routine.
10067 This code template is copied from text segment to stack location
10068 and then patched with pa_trampoline_init to contain valid values,
10069 and then entered as a subroutine.
10071 It is best to keep this as small as possible to avoid having to
10072 flush multiple lines in the cache. */
10074 static void
10075 pa_asm_trampoline_template (FILE *f)
10077 if (!TARGET_64BIT)
10079 fputs ("\tldw 36(%r22),%r21\n", f);
10080 fputs ("\tbb,>=,n %r21,30,.+16\n", f);
10081 if (ASSEMBLER_DIALECT == 0)
10082 fputs ("\tdepi 0,31,2,%r21\n", f);
10083 else
10084 fputs ("\tdepwi 0,31,2,%r21\n", f);
10085 fputs ("\tldw 4(%r21),%r19\n", f);
10086 fputs ("\tldw 0(%r21),%r21\n", f);
10087 if (TARGET_PA_20)
10089 fputs ("\tbve (%r21)\n", f);
10090 fputs ("\tldw 40(%r22),%r29\n", f);
10091 fputs ("\t.word 0\n", f);
10092 fputs ("\t.word 0\n", f);
10094 else
10096 fputs ("\tldsid (%r21),%r1\n", f);
10097 fputs ("\tmtsp %r1,%sr0\n", f);
10098 fputs ("\tbe 0(%sr0,%r21)\n", f);
10099 fputs ("\tldw 40(%r22),%r29\n", f);
10101 fputs ("\t.word 0\n", f);
10102 fputs ("\t.word 0\n", f);
10103 fputs ("\t.word 0\n", f);
10104 fputs ("\t.word 0\n", f);
10106 else
10108 fputs ("\t.dword 0\n", f);
10109 fputs ("\t.dword 0\n", f);
10110 fputs ("\t.dword 0\n", f);
10111 fputs ("\t.dword 0\n", f);
10112 fputs ("\tmfia %r31\n", f);
10113 fputs ("\tldd 24(%r31),%r1\n", f);
10114 fputs ("\tldd 24(%r1),%r27\n", f);
10115 fputs ("\tldd 16(%r1),%r1\n", f);
10116 fputs ("\tbve (%r1)\n", f);
10117 fputs ("\tldd 32(%r31),%r31\n", f);
10118 fputs ("\t.dword 0 ; fptr\n", f);
10119 fputs ("\t.dword 0 ; static link\n", f);
10123 /* Emit RTL insns to initialize the variable parts of a trampoline.
10124 FNADDR is an RTX for the address of the function's pure code.
10125 CXT is an RTX for the static chain value for the function.
10127 Move the function address to the trampoline template at offset 36.
10128 Move the static chain value to trampoline template at offset 40.
10129 Move the trampoline address to trampoline template at offset 44.
10130 Move r19 to trampoline template at offset 48. The latter two
10131 words create a plabel for the indirect call to the trampoline.
10133 A similar sequence is used for the 64-bit port but the plabel is
10134 at the beginning of the trampoline.
10136 Finally, the cache entries for the trampoline code are flushed.
10137 This is necessary to ensure that the trampoline instruction sequence
10138 is written to memory prior to any attempts at prefetching the code
10139 sequence. */
10141 static void
10142 pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
10144 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10145 rtx start_addr = gen_reg_rtx (Pmode);
10146 rtx end_addr = gen_reg_rtx (Pmode);
10147 rtx line_length = gen_reg_rtx (Pmode);
10148 rtx r_tramp, tmp;
10150 emit_block_move (m_tramp, assemble_trampoline_template (),
10151 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
10152 r_tramp = force_reg (Pmode, XEXP (m_tramp, 0));
10154 if (!TARGET_64BIT)
10156 tmp = adjust_address (m_tramp, Pmode, 36);
10157 emit_move_insn (tmp, fnaddr);
10158 tmp = adjust_address (m_tramp, Pmode, 40);
10159 emit_move_insn (tmp, chain_value);
10161 /* Create a fat pointer for the trampoline. */
10162 tmp = adjust_address (m_tramp, Pmode, 44);
10163 emit_move_insn (tmp, r_tramp);
10164 tmp = adjust_address (m_tramp, Pmode, 48);
10165 emit_move_insn (tmp, gen_rtx_REG (Pmode, 19));
10167 /* fdc and fic only use registers for the address to flush,
10168 they do not accept integer displacements. We align the
10169 start and end addresses to the beginning of their respective
10170 cache lines to minimize the number of lines flushed. */
10171 emit_insn (gen_andsi3 (start_addr, r_tramp,
10172 GEN_INT (-MIN_CACHELINE_SIZE)));
10173 tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp,
10174 TRAMPOLINE_CODE_SIZE-1));
10175 emit_insn (gen_andsi3 (end_addr, tmp,
10176 GEN_INT (-MIN_CACHELINE_SIZE)));
10177 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10178 emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length));
10179 emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length,
10180 gen_reg_rtx (Pmode),
10181 gen_reg_rtx (Pmode)));
10183 else
10185 tmp = adjust_address (m_tramp, Pmode, 56);
10186 emit_move_insn (tmp, fnaddr);
10187 tmp = adjust_address (m_tramp, Pmode, 64);
10188 emit_move_insn (tmp, chain_value);
10190 /* Create a fat pointer for the trampoline. */
10191 tmp = adjust_address (m_tramp, Pmode, 16);
10192 emit_move_insn (tmp, force_reg (Pmode, plus_constant (Pmode,
10193 r_tramp, 32)));
10194 tmp = adjust_address (m_tramp, Pmode, 24);
10195 emit_move_insn (tmp, gen_rtx_REG (Pmode, 27));
10197 /* fdc and fic only use registers for the address to flush,
10198 they do not accept integer displacements. We align the
10199 start and end addresses to the beginning of their respective
10200 cache lines to minimize the number of lines flushed. */
10201 tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp, 32));
10202 emit_insn (gen_anddi3 (start_addr, tmp,
10203 GEN_INT (-MIN_CACHELINE_SIZE)));
10204 tmp = force_reg (Pmode, plus_constant (Pmode, tmp,
10205 TRAMPOLINE_CODE_SIZE - 1));
10206 emit_insn (gen_anddi3 (end_addr, tmp,
10207 GEN_INT (-MIN_CACHELINE_SIZE)));
10208 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10209 emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length));
10210 emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length,
10211 gen_reg_rtx (Pmode),
10212 gen_reg_rtx (Pmode)));
10216 /* Perform any machine-specific adjustment in the address of the trampoline.
10217 ADDR contains the address that was passed to pa_trampoline_init.
10218 Adjust the trampoline address to point to the plabel at offset 44. */
10220 static rtx
10221 pa_trampoline_adjust_address (rtx addr)
10223 if (!TARGET_64BIT)
10224 addr = memory_address (Pmode, plus_constant (Pmode, addr, 46));
10225 return addr;
10228 static rtx
10229 pa_delegitimize_address (rtx orig_x)
10231 rtx x = delegitimize_mem_from_attrs (orig_x);
10233 if (GET_CODE (x) == LO_SUM
10234 && GET_CODE (XEXP (x, 1)) == UNSPEC
10235 && XINT (XEXP (x, 1), 1) == UNSPEC_DLTIND14R)
10236 return gen_const_mem (Pmode, XVECEXP (XEXP (x, 1), 0, 0));
10237 return x;
10240 static rtx
10241 pa_internal_arg_pointer (void)
10243 /* The argument pointer and the hard frame pointer are the same in
10244 the 32-bit runtime, so we don't need a copy. */
10245 if (TARGET_64BIT)
10246 return copy_to_reg (virtual_incoming_args_rtx);
10247 else
10248 return virtual_incoming_args_rtx;
10251 /* Given FROM and TO register numbers, say whether this elimination is allowed.
10252 Frame pointer elimination is automatically handled. */
10254 static bool
10255 pa_can_eliminate (const int from, const int to)
10257 /* The argument cannot be eliminated in the 64-bit runtime. */
10258 if (TARGET_64BIT && from == ARG_POINTER_REGNUM)
10259 return false;
10261 return (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
10262 ? ! frame_pointer_needed
10263 : true);
10266 /* Define the offset between two registers, FROM to be eliminated and its
10267 replacement TO, at the start of a routine. */
10268 HOST_WIDE_INT
10269 pa_initial_elimination_offset (int from, int to)
10271 HOST_WIDE_INT offset;
10273 if ((from == HARD_FRAME_POINTER_REGNUM || from == FRAME_POINTER_REGNUM)
10274 && to == STACK_POINTER_REGNUM)
10275 offset = -pa_compute_frame_size (get_frame_size (), 0);
10276 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10277 offset = 0;
10278 else
10279 gcc_unreachable ();
10281 return offset;
10284 static void
10285 pa_conditional_register_usage (void)
10287 int i;
10289 if (!TARGET_64BIT && !TARGET_PA_11)
10291 for (i = 56; i <= FP_REG_LAST; i++)
10292 fixed_regs[i] = call_used_regs[i] = 1;
10293 for (i = 33; i < 56; i += 2)
10294 fixed_regs[i] = call_used_regs[i] = 1;
10296 if (TARGET_DISABLE_FPREGS || TARGET_SOFT_FLOAT)
10298 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
10299 fixed_regs[i] = call_used_regs[i] = 1;
10301 if (flag_pic)
10302 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
10305 /* Target hook for c_mode_for_suffix. */
10307 static enum machine_mode
10308 pa_c_mode_for_suffix (char suffix)
10310 if (HPUX_LONG_DOUBLE_LIBRARY)
10312 if (suffix == 'q')
10313 return TFmode;
10316 return VOIDmode;
10319 /* Target hook for function_section. */
10321 static section *
10322 pa_function_section (tree decl, enum node_frequency freq,
10323 bool startup, bool exit)
10325 /* Put functions in text section if target doesn't have named sections. */
10326 if (!targetm_common.have_named_sections)
10327 return text_section;
10329 /* Force nested functions into the same section as the containing
10330 function. */
10331 if (decl
10332 && DECL_SECTION_NAME (decl) == NULL_TREE
10333 && DECL_CONTEXT (decl) != NULL_TREE
10334 && TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL
10335 && DECL_SECTION_NAME (DECL_CONTEXT (decl)) == NULL_TREE)
10336 return function_section (DECL_CONTEXT (decl));
10338 /* Otherwise, use the default function section. */
10339 return default_function_section (decl, freq, startup, exit);
10342 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
10344 In 64-bit mode, we reject CONST_DOUBLES. We also reject CONST_INTS
10345 that need more than three instructions to load prior to reload. This
10346 limit is somewhat arbitrary. It takes three instructions to load a
10347 CONST_INT from memory but two are memory accesses. It may be better
10348 to increase the allowed range for CONST_INTS. We may also be able
10349 to handle CONST_DOUBLES. */
10351 static bool
10352 pa_legitimate_constant_p (enum machine_mode mode, rtx x)
10354 if (GET_MODE_CLASS (mode) == MODE_FLOAT && x != CONST0_RTX (mode))
10355 return false;
10357 if (!NEW_HP_ASSEMBLER && !TARGET_GAS && GET_CODE (x) == LABEL_REF)
10358 return false;
10360 /* TLS_MODEL_GLOBAL_DYNAMIC and TLS_MODEL_LOCAL_DYNAMIC are not
10361 legitimate constants. The other variants can't be handled by
10362 the move patterns after reload starts. */
10363 if (PA_SYMBOL_REF_TLS_P (x))
10364 return false;
10366 if (TARGET_64BIT && GET_CODE (x) == CONST_DOUBLE)
10367 return false;
10369 if (TARGET_64BIT
10370 && HOST_BITS_PER_WIDE_INT > 32
10371 && GET_CODE (x) == CONST_INT
10372 && !reload_in_progress
10373 && !reload_completed
10374 && !LEGITIMATE_64BIT_CONST_INT_P (INTVAL (x))
10375 && !pa_cint_ok_for_move (INTVAL (x)))
10376 return false;
10378 if (function_label_operand (x, mode))
10379 return false;
10381 return true;
10384 /* Implement TARGET_SECTION_TYPE_FLAGS. */
10386 static unsigned int
10387 pa_section_type_flags (tree decl, const char *name, int reloc)
10389 unsigned int flags;
10391 flags = default_section_type_flags (decl, name, reloc);
10393 /* Function labels are placed in the constant pool. This can
10394 cause a section conflict if decls are put in ".data.rel.ro"
10395 or ".data.rel.ro.local" using the __attribute__ construct. */
10396 if (strcmp (name, ".data.rel.ro") == 0
10397 || strcmp (name, ".data.rel.ro.local") == 0)
10398 flags |= SECTION_WRITE | SECTION_RELRO;
10400 return flags;
10403 #include "gt-pa.h"