Enable no-exec stacks for more targets using the Linux kernel.
[official-gcc.git] / gcc / config / pa / pa.c
blob5e945fc2f3b3852d40db02891808cee5a87a6ca4
1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992-2017 Free Software Foundation, Inc.
3 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "memmodel.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "df.h"
30 #include "tm_p.h"
31 #include "stringpool.h"
32 #include "attribs.h"
33 #include "optabs.h"
34 #include "regs.h"
35 #include "emit-rtl.h"
36 #include "recog.h"
37 #include "diagnostic-core.h"
38 #include "insn-attr.h"
39 #include "alias.h"
40 #include "fold-const.h"
41 #include "stor-layout.h"
42 #include "varasm.h"
43 #include "calls.h"
44 #include "output.h"
45 #include "except.h"
46 #include "explow.h"
47 #include "expr.h"
48 #include "reload.h"
49 #include "common/common-target.h"
50 #include "langhooks.h"
51 #include "cfgrtl.h"
52 #include "opts.h"
53 #include "builtins.h"
55 /* This file should be included last. */
56 #include "target-def.h"
58 /* Return nonzero if there is a bypass for the output of
59 OUT_INSN and the fp store IN_INSN. */
60 int
61 pa_fpstore_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
63 machine_mode store_mode;
64 machine_mode other_mode;
65 rtx set;
67 if (recog_memoized (in_insn) < 0
68 || (get_attr_type (in_insn) != TYPE_FPSTORE
69 && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
70 || recog_memoized (out_insn) < 0)
71 return 0;
73 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
75 set = single_set (out_insn);
76 if (!set)
77 return 0;
79 other_mode = GET_MODE (SET_SRC (set));
81 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
85 #ifndef DO_FRAME_NOTES
86 #ifdef INCOMING_RETURN_ADDR_RTX
87 #define DO_FRAME_NOTES 1
88 #else
89 #define DO_FRAME_NOTES 0
90 #endif
91 #endif
93 static void pa_option_override (void);
94 static void copy_reg_pointer (rtx, rtx);
95 static void fix_range (const char *);
96 static int hppa_register_move_cost (machine_mode mode, reg_class_t,
97 reg_class_t);
98 static int hppa_address_cost (rtx, machine_mode mode, addr_space_t, bool);
99 static bool hppa_rtx_costs (rtx, machine_mode, int, int, int *, bool);
100 static inline rtx force_mode (machine_mode, rtx);
101 static void pa_reorg (void);
102 static void pa_combine_instructions (void);
103 static int pa_can_combine_p (rtx_insn *, rtx_insn *, rtx_insn *, int, rtx,
104 rtx, rtx);
105 static bool forward_branch_p (rtx_insn *);
106 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
107 static void compute_zdepdi_operands (unsigned HOST_WIDE_INT, unsigned *);
108 static int compute_movmem_length (rtx_insn *);
109 static int compute_clrmem_length (rtx_insn *);
110 static bool pa_assemble_integer (rtx, unsigned int, int);
111 static void remove_useless_addtr_insns (int);
112 static void store_reg (int, HOST_WIDE_INT, int);
113 static void store_reg_modify (int, int, HOST_WIDE_INT);
114 static void load_reg (int, HOST_WIDE_INT, int);
115 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
116 static rtx pa_function_value (const_tree, const_tree, bool);
117 static rtx pa_libcall_value (machine_mode, const_rtx);
118 static bool pa_function_value_regno_p (const unsigned int);
119 static void pa_output_function_prologue (FILE *);
120 static void update_total_code_bytes (unsigned int);
121 static void pa_output_function_epilogue (FILE *);
122 static int pa_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
123 static int pa_adjust_priority (rtx_insn *, int);
124 static int pa_issue_rate (void);
125 static int pa_reloc_rw_mask (void);
126 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
127 static section *pa_som_tm_clone_table_section (void) ATTRIBUTE_UNUSED;
128 static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
129 ATTRIBUTE_UNUSED;
130 static void pa_encode_section_info (tree, rtx, int);
131 static const char *pa_strip_name_encoding (const char *);
132 static bool pa_function_ok_for_sibcall (tree, tree);
133 static void pa_globalize_label (FILE *, const char *)
134 ATTRIBUTE_UNUSED;
135 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
136 HOST_WIDE_INT, tree);
137 #if !defined(USE_COLLECT2)
138 static void pa_asm_out_constructor (rtx, int);
139 static void pa_asm_out_destructor (rtx, int);
140 #endif
141 static void pa_init_builtins (void);
142 static rtx pa_expand_builtin (tree, rtx, rtx, machine_mode mode, int);
143 static rtx hppa_builtin_saveregs (void);
144 static void hppa_va_start (tree, rtx);
145 static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
146 static bool pa_scalar_mode_supported_p (scalar_mode);
147 static bool pa_commutative_p (const_rtx x, int outer_code);
148 static void copy_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
149 static int length_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
150 static rtx hppa_legitimize_address (rtx, rtx, machine_mode);
151 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
152 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
153 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
154 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
155 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
156 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
157 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
158 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
159 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
160 static void output_deferred_plabels (void);
161 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
162 static void pa_file_end (void);
163 static void pa_init_libfuncs (void);
164 static rtx pa_struct_value_rtx (tree, int);
165 static bool pa_pass_by_reference (cumulative_args_t, machine_mode,
166 const_tree, bool);
167 static int pa_arg_partial_bytes (cumulative_args_t, machine_mode,
168 tree, bool);
169 static void pa_function_arg_advance (cumulative_args_t, machine_mode,
170 const_tree, bool);
171 static rtx pa_function_arg (cumulative_args_t, machine_mode,
172 const_tree, bool);
173 static pad_direction pa_function_arg_padding (machine_mode, const_tree);
174 static unsigned int pa_function_arg_boundary (machine_mode, const_tree);
175 static struct machine_function * pa_init_machine_status (void);
176 static reg_class_t pa_secondary_reload (bool, rtx, reg_class_t,
177 machine_mode,
178 secondary_reload_info *);
179 static bool pa_secondary_memory_needed (machine_mode,
180 reg_class_t, reg_class_t);
181 static void pa_extra_live_on_entry (bitmap);
182 static machine_mode pa_promote_function_mode (const_tree,
183 machine_mode, int *,
184 const_tree, int);
186 static void pa_asm_trampoline_template (FILE *);
187 static void pa_trampoline_init (rtx, tree, rtx);
188 static rtx pa_trampoline_adjust_address (rtx);
189 static rtx pa_delegitimize_address (rtx);
190 static bool pa_print_operand_punct_valid_p (unsigned char);
191 static rtx pa_internal_arg_pointer (void);
192 static bool pa_can_eliminate (const int, const int);
193 static void pa_conditional_register_usage (void);
194 static machine_mode pa_c_mode_for_suffix (char);
195 static section *pa_function_section (tree, enum node_frequency, bool, bool);
196 static bool pa_cannot_force_const_mem (machine_mode, rtx);
197 static bool pa_legitimate_constant_p (machine_mode, rtx);
198 static unsigned int pa_section_type_flags (tree, const char *, int);
199 static bool pa_legitimate_address_p (machine_mode, rtx, bool);
200 static bool pa_callee_copies (cumulative_args_t, machine_mode,
201 const_tree, bool);
202 static unsigned int pa_hard_regno_nregs (unsigned int, machine_mode);
203 static bool pa_hard_regno_mode_ok (unsigned int, machine_mode);
204 static bool pa_modes_tieable_p (machine_mode, machine_mode);
205 static bool pa_can_change_mode_class (machine_mode, machine_mode, reg_class_t);
207 /* The following extra sections are only used for SOM. */
208 static GTY(()) section *som_readonly_data_section;
209 static GTY(()) section *som_one_only_readonly_data_section;
210 static GTY(()) section *som_one_only_data_section;
211 static GTY(()) section *som_tm_clone_table_section;
213 /* Counts for the number of callee-saved general and floating point
214 registers which were saved by the current function's prologue. */
215 static int gr_saved, fr_saved;
217 /* Boolean indicating whether the return pointer was saved by the
218 current function's prologue. */
219 static bool rp_saved;
221 static rtx find_addr_reg (rtx);
223 /* Keep track of the number of bytes we have output in the CODE subspace
224 during this compilation so we'll know when to emit inline long-calls. */
225 unsigned long total_code_bytes;
227 /* The last address of the previous function plus the number of bytes in
228 associated thunks that have been output. This is used to determine if
229 a thunk can use an IA-relative branch to reach its target function. */
230 static unsigned int last_address;
232 /* Variables to handle plabels that we discover are necessary at assembly
233 output time. They are output after the current function. */
234 struct GTY(()) deferred_plabel
236 rtx internal_label;
237 rtx symbol;
239 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
240 deferred_plabels;
241 static size_t n_deferred_plabels = 0;
243 /* Initialize the GCC target structure. */
245 #undef TARGET_OPTION_OVERRIDE
246 #define TARGET_OPTION_OVERRIDE pa_option_override
248 #undef TARGET_ASM_ALIGNED_HI_OP
249 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
250 #undef TARGET_ASM_ALIGNED_SI_OP
251 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
252 #undef TARGET_ASM_ALIGNED_DI_OP
253 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
254 #undef TARGET_ASM_UNALIGNED_HI_OP
255 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
256 #undef TARGET_ASM_UNALIGNED_SI_OP
257 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
258 #undef TARGET_ASM_UNALIGNED_DI_OP
259 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
260 #undef TARGET_ASM_INTEGER
261 #define TARGET_ASM_INTEGER pa_assemble_integer
263 #undef TARGET_ASM_FUNCTION_PROLOGUE
264 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
265 #undef TARGET_ASM_FUNCTION_EPILOGUE
266 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
268 #undef TARGET_FUNCTION_VALUE
269 #define TARGET_FUNCTION_VALUE pa_function_value
270 #undef TARGET_LIBCALL_VALUE
271 #define TARGET_LIBCALL_VALUE pa_libcall_value
272 #undef TARGET_FUNCTION_VALUE_REGNO_P
273 #define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p
275 #undef TARGET_LEGITIMIZE_ADDRESS
276 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
278 #undef TARGET_SCHED_ADJUST_COST
279 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
280 #undef TARGET_SCHED_ADJUST_PRIORITY
281 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
282 #undef TARGET_SCHED_ISSUE_RATE
283 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
285 #undef TARGET_ENCODE_SECTION_INFO
286 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
287 #undef TARGET_STRIP_NAME_ENCODING
288 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
290 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
291 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
293 #undef TARGET_COMMUTATIVE_P
294 #define TARGET_COMMUTATIVE_P pa_commutative_p
296 #undef TARGET_ASM_OUTPUT_MI_THUNK
297 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
298 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
299 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
301 #undef TARGET_ASM_FILE_END
302 #define TARGET_ASM_FILE_END pa_file_end
304 #undef TARGET_ASM_RELOC_RW_MASK
305 #define TARGET_ASM_RELOC_RW_MASK pa_reloc_rw_mask
307 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
308 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p
310 #if !defined(USE_COLLECT2)
311 #undef TARGET_ASM_CONSTRUCTOR
312 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
313 #undef TARGET_ASM_DESTRUCTOR
314 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
315 #endif
317 #undef TARGET_INIT_BUILTINS
318 #define TARGET_INIT_BUILTINS pa_init_builtins
320 #undef TARGET_EXPAND_BUILTIN
321 #define TARGET_EXPAND_BUILTIN pa_expand_builtin
323 #undef TARGET_REGISTER_MOVE_COST
324 #define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
325 #undef TARGET_RTX_COSTS
326 #define TARGET_RTX_COSTS hppa_rtx_costs
327 #undef TARGET_ADDRESS_COST
328 #define TARGET_ADDRESS_COST hppa_address_cost
330 #undef TARGET_MACHINE_DEPENDENT_REORG
331 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
333 #undef TARGET_INIT_LIBFUNCS
334 #define TARGET_INIT_LIBFUNCS pa_init_libfuncs
336 #undef TARGET_PROMOTE_FUNCTION_MODE
337 #define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
338 #undef TARGET_PROMOTE_PROTOTYPES
339 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
341 #undef TARGET_STRUCT_VALUE_RTX
342 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
343 #undef TARGET_RETURN_IN_MEMORY
344 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
345 #undef TARGET_MUST_PASS_IN_STACK
346 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
347 #undef TARGET_PASS_BY_REFERENCE
348 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
349 #undef TARGET_CALLEE_COPIES
350 #define TARGET_CALLEE_COPIES pa_callee_copies
351 #undef TARGET_ARG_PARTIAL_BYTES
352 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
353 #undef TARGET_FUNCTION_ARG
354 #define TARGET_FUNCTION_ARG pa_function_arg
355 #undef TARGET_FUNCTION_ARG_ADVANCE
356 #define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance
357 #undef TARGET_FUNCTION_ARG_PADDING
358 #define TARGET_FUNCTION_ARG_PADDING pa_function_arg_padding
359 #undef TARGET_FUNCTION_ARG_BOUNDARY
360 #define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary
362 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
363 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
364 #undef TARGET_EXPAND_BUILTIN_VA_START
365 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
366 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
367 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
369 #undef TARGET_SCALAR_MODE_SUPPORTED_P
370 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
372 #undef TARGET_CANNOT_FORCE_CONST_MEM
373 #define TARGET_CANNOT_FORCE_CONST_MEM pa_cannot_force_const_mem
375 #undef TARGET_SECONDARY_RELOAD
376 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
377 #undef TARGET_SECONDARY_MEMORY_NEEDED
378 #define TARGET_SECONDARY_MEMORY_NEEDED pa_secondary_memory_needed
380 #undef TARGET_EXTRA_LIVE_ON_ENTRY
381 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
383 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
384 #define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
385 #undef TARGET_TRAMPOLINE_INIT
386 #define TARGET_TRAMPOLINE_INIT pa_trampoline_init
387 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
388 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
389 #undef TARGET_DELEGITIMIZE_ADDRESS
390 #define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
391 #undef TARGET_INTERNAL_ARG_POINTER
392 #define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer
393 #undef TARGET_CAN_ELIMINATE
394 #define TARGET_CAN_ELIMINATE pa_can_eliminate
395 #undef TARGET_CONDITIONAL_REGISTER_USAGE
396 #define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage
397 #undef TARGET_C_MODE_FOR_SUFFIX
398 #define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix
399 #undef TARGET_ASM_FUNCTION_SECTION
400 #define TARGET_ASM_FUNCTION_SECTION pa_function_section
402 #undef TARGET_LEGITIMATE_CONSTANT_P
403 #define TARGET_LEGITIMATE_CONSTANT_P pa_legitimate_constant_p
404 #undef TARGET_SECTION_TYPE_FLAGS
405 #define TARGET_SECTION_TYPE_FLAGS pa_section_type_flags
406 #undef TARGET_LEGITIMATE_ADDRESS_P
407 #define TARGET_LEGITIMATE_ADDRESS_P pa_legitimate_address_p
409 #undef TARGET_LRA_P
410 #define TARGET_LRA_P hook_bool_void_false
412 #undef TARGET_HARD_REGNO_NREGS
413 #define TARGET_HARD_REGNO_NREGS pa_hard_regno_nregs
414 #undef TARGET_HARD_REGNO_MODE_OK
415 #define TARGET_HARD_REGNO_MODE_OK pa_hard_regno_mode_ok
416 #undef TARGET_MODES_TIEABLE_P
417 #define TARGET_MODES_TIEABLE_P pa_modes_tieable_p
419 #undef TARGET_CAN_CHANGE_MODE_CLASS
420 #define TARGET_CAN_CHANGE_MODE_CLASS pa_can_change_mode_class
422 #undef TARGET_CONSTANT_ALIGNMENT
423 #define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings
425 struct gcc_target targetm = TARGET_INITIALIZER;
427 /* Parse the -mfixed-range= option string. */
429 static void
430 fix_range (const char *const_str)
432 int i, first, last;
433 char *str, *dash, *comma;
435 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
436 REG2 are either register names or register numbers. The effect
437 of this option is to mark the registers in the range from REG1 to
438 REG2 as ``fixed'' so they won't be used by the compiler. This is
439 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */
441 i = strlen (const_str);
442 str = (char *) alloca (i + 1);
443 memcpy (str, const_str, i + 1);
445 while (1)
447 dash = strchr (str, '-');
448 if (!dash)
450 warning (0, "value of -mfixed-range must have form REG1-REG2");
451 return;
453 *dash = '\0';
455 comma = strchr (dash + 1, ',');
456 if (comma)
457 *comma = '\0';
459 first = decode_reg_name (str);
460 if (first < 0)
462 warning (0, "unknown register name: %s", str);
463 return;
466 last = decode_reg_name (dash + 1);
467 if (last < 0)
469 warning (0, "unknown register name: %s", dash + 1);
470 return;
473 *dash = '-';
475 if (first > last)
477 warning (0, "%s-%s is an empty range", str, dash + 1);
478 return;
481 for (i = first; i <= last; ++i)
482 fixed_regs[i] = call_used_regs[i] = 1;
484 if (!comma)
485 break;
487 *comma = ',';
488 str = comma + 1;
491 /* Check if all floating point registers have been fixed. */
492 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
493 if (!fixed_regs[i])
494 break;
496 if (i > FP_REG_LAST)
497 target_flags |= MASK_DISABLE_FPREGS;
500 /* Implement the TARGET_OPTION_OVERRIDE hook. */
502 static void
503 pa_option_override (void)
505 unsigned int i;
506 cl_deferred_option *opt;
507 vec<cl_deferred_option> *v
508 = (vec<cl_deferred_option> *) pa_deferred_options;
510 if (v)
511 FOR_EACH_VEC_ELT (*v, i, opt)
513 switch (opt->opt_index)
515 case OPT_mfixed_range_:
516 fix_range (opt->arg);
517 break;
519 default:
520 gcc_unreachable ();
524 if (flag_pic && TARGET_PORTABLE_RUNTIME)
526 warning (0, "PIC code generation is not supported in the portable runtime model");
529 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
531 warning (0, "PIC code generation is not compatible with fast indirect calls");
534 if (! TARGET_GAS && write_symbols != NO_DEBUG)
536 warning (0, "-g is only supported when using GAS on this processor,");
537 warning (0, "-g option disabled");
538 write_symbols = NO_DEBUG;
541 /* We only support the "big PIC" model now. And we always generate PIC
542 code when in 64bit mode. */
543 if (flag_pic == 1 || TARGET_64BIT)
544 flag_pic = 2;
546 /* Disable -freorder-blocks-and-partition as we don't support hot and
547 cold partitioning. */
548 if (flag_reorder_blocks_and_partition)
550 inform (input_location,
551 "-freorder-blocks-and-partition does not work "
552 "on this architecture");
553 flag_reorder_blocks_and_partition = 0;
554 flag_reorder_blocks = 1;
557 /* We can't guarantee that .dword is available for 32-bit targets. */
558 if (UNITS_PER_WORD == 4)
559 targetm.asm_out.aligned_op.di = NULL;
561 /* The unaligned ops are only available when using GAS. */
562 if (!TARGET_GAS)
564 targetm.asm_out.unaligned_op.hi = NULL;
565 targetm.asm_out.unaligned_op.si = NULL;
566 targetm.asm_out.unaligned_op.di = NULL;
569 init_machine_status = pa_init_machine_status;
572 enum pa_builtins
574 PA_BUILTIN_COPYSIGNQ,
575 PA_BUILTIN_FABSQ,
576 PA_BUILTIN_INFQ,
577 PA_BUILTIN_HUGE_VALQ,
578 PA_BUILTIN_max
581 static GTY(()) tree pa_builtins[(int) PA_BUILTIN_max];
583 static void
584 pa_init_builtins (void)
586 #ifdef DONT_HAVE_FPUTC_UNLOCKED
588 tree decl = builtin_decl_explicit (BUILT_IN_PUTC_UNLOCKED);
589 set_builtin_decl (BUILT_IN_FPUTC_UNLOCKED, decl,
590 builtin_decl_implicit_p (BUILT_IN_PUTC_UNLOCKED));
592 #endif
593 #if TARGET_HPUX_11
595 tree decl;
597 if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
598 set_user_assembler_name (decl, "_Isfinite");
599 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
600 set_user_assembler_name (decl, "_Isfinitef");
602 #endif
604 if (HPUX_LONG_DOUBLE_LIBRARY)
606 tree decl, ftype;
608 /* Under HPUX, the __float128 type is a synonym for "long double". */
609 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
610 "__float128");
612 /* TFmode support builtins. */
613 ftype = build_function_type_list (long_double_type_node,
614 long_double_type_node,
615 NULL_TREE);
616 decl = add_builtin_function ("__builtin_fabsq", ftype,
617 PA_BUILTIN_FABSQ, BUILT_IN_MD,
618 "_U_Qfabs", NULL_TREE);
619 TREE_READONLY (decl) = 1;
620 pa_builtins[PA_BUILTIN_FABSQ] = decl;
622 ftype = build_function_type_list (long_double_type_node,
623 long_double_type_node,
624 long_double_type_node,
625 NULL_TREE);
626 decl = add_builtin_function ("__builtin_copysignq", ftype,
627 PA_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
628 "_U_Qfcopysign", NULL_TREE);
629 TREE_READONLY (decl) = 1;
630 pa_builtins[PA_BUILTIN_COPYSIGNQ] = decl;
632 ftype = build_function_type_list (long_double_type_node, NULL_TREE);
633 decl = add_builtin_function ("__builtin_infq", ftype,
634 PA_BUILTIN_INFQ, BUILT_IN_MD,
635 NULL, NULL_TREE);
636 pa_builtins[PA_BUILTIN_INFQ] = decl;
638 decl = add_builtin_function ("__builtin_huge_valq", ftype,
639 PA_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
640 NULL, NULL_TREE);
641 pa_builtins[PA_BUILTIN_HUGE_VALQ] = decl;
645 static rtx
646 pa_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
647 machine_mode mode ATTRIBUTE_UNUSED,
648 int ignore ATTRIBUTE_UNUSED)
650 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
651 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
653 switch (fcode)
655 case PA_BUILTIN_FABSQ:
656 case PA_BUILTIN_COPYSIGNQ:
657 return expand_call (exp, target, ignore);
659 case PA_BUILTIN_INFQ:
660 case PA_BUILTIN_HUGE_VALQ:
662 machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
663 REAL_VALUE_TYPE inf;
664 rtx tmp;
666 real_inf (&inf);
667 tmp = const_double_from_real_value (inf, target_mode);
669 tmp = validize_mem (force_const_mem (target_mode, tmp));
671 if (target == 0)
672 target = gen_reg_rtx (target_mode);
674 emit_move_insn (target, tmp);
675 return target;
678 default:
679 gcc_unreachable ();
682 return NULL_RTX;
685 /* Function to init struct machine_function.
686 This will be called, via a pointer variable,
687 from push_function_context. */
689 static struct machine_function *
690 pa_init_machine_status (void)
692 return ggc_cleared_alloc<machine_function> ();
695 /* If FROM is a probable pointer register, mark TO as a probable
696 pointer register with the same pointer alignment as FROM. */
698 static void
699 copy_reg_pointer (rtx to, rtx from)
701 if (REG_POINTER (from))
702 mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
705 /* Return 1 if X contains a symbolic expression. We know these
706 expressions will have one of a few well defined forms, so
707 we need only check those forms. */
709 pa_symbolic_expression_p (rtx x)
712 /* Strip off any HIGH. */
713 if (GET_CODE (x) == HIGH)
714 x = XEXP (x, 0);
716 return symbolic_operand (x, VOIDmode);
719 /* Accept any constant that can be moved in one instruction into a
720 general register. */
722 pa_cint_ok_for_move (unsigned HOST_WIDE_INT ival)
724 /* OK if ldo, ldil, or zdepi, can be used. */
725 return (VAL_14_BITS_P (ival)
726 || pa_ldil_cint_p (ival)
727 || pa_zdepi_cint_p (ival));
730 /* True iff ldil can be used to load this CONST_INT. The least
731 significant 11 bits of the value must be zero and the value must
732 not change sign when extended from 32 to 64 bits. */
734 pa_ldil_cint_p (unsigned HOST_WIDE_INT ival)
736 unsigned HOST_WIDE_INT x;
738 x = ival & (((unsigned HOST_WIDE_INT) -1 << 31) | 0x7ff);
739 return x == 0 || x == ((unsigned HOST_WIDE_INT) -1 << 31);
742 /* True iff zdepi can be used to generate this CONST_INT.
743 zdepi first sign extends a 5-bit signed number to a given field
744 length, then places this field anywhere in a zero. */
746 pa_zdepi_cint_p (unsigned HOST_WIDE_INT x)
748 unsigned HOST_WIDE_INT lsb_mask, t;
750 /* This might not be obvious, but it's at least fast.
751 This function is critical; we don't have the time loops would take. */
752 lsb_mask = x & -x;
753 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
754 /* Return true iff t is a power of two. */
755 return ((t & (t - 1)) == 0);
758 /* True iff depi or extru can be used to compute (reg & mask).
759 Accept bit pattern like these:
760 0....01....1
761 1....10....0
762 1..10..01..1 */
764 pa_and_mask_p (unsigned HOST_WIDE_INT mask)
766 mask = ~mask;
767 mask += mask & -mask;
768 return (mask & (mask - 1)) == 0;
771 /* True iff depi can be used to compute (reg | MASK). */
773 pa_ior_mask_p (unsigned HOST_WIDE_INT mask)
775 mask += mask & -mask;
776 return (mask & (mask - 1)) == 0;
779 /* Legitimize PIC addresses. If the address is already
780 position-independent, we return ORIG. Newly generated
781 position-independent addresses go to REG. If we need more
782 than one register, we lose. */
784 static rtx
785 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
787 rtx pic_ref = orig;
789 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
791 /* Labels need special handling. */
792 if (pic_label_operand (orig, mode))
794 rtx_insn *insn;
796 /* We do not want to go through the movXX expanders here since that
797 would create recursion.
799 Nor do we really want to call a generator for a named pattern
800 since that requires multiple patterns if we want to support
801 multiple word sizes.
803 So instead we just emit the raw set, which avoids the movXX
804 expanders completely. */
805 mark_reg_pointer (reg, BITS_PER_UNIT);
806 insn = emit_insn (gen_rtx_SET (reg, orig));
808 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
809 add_reg_note (insn, REG_EQUAL, orig);
811 /* During and after reload, we need to generate a REG_LABEL_OPERAND note
812 and update LABEL_NUSES because this is not done automatically. */
813 if (reload_in_progress || reload_completed)
815 /* Extract LABEL_REF. */
816 if (GET_CODE (orig) == CONST)
817 orig = XEXP (XEXP (orig, 0), 0);
818 /* Extract CODE_LABEL. */
819 orig = XEXP (orig, 0);
820 add_reg_note (insn, REG_LABEL_OPERAND, orig);
821 /* Make sure we have label and not a note. */
822 if (LABEL_P (orig))
823 LABEL_NUSES (orig)++;
825 crtl->uses_pic_offset_table = 1;
826 return reg;
828 if (GET_CODE (orig) == SYMBOL_REF)
830 rtx_insn *insn;
831 rtx tmp_reg;
833 gcc_assert (reg);
835 /* Before reload, allocate a temporary register for the intermediate
836 result. This allows the sequence to be deleted when the final
837 result is unused and the insns are trivially dead. */
838 tmp_reg = ((reload_in_progress || reload_completed)
839 ? reg : gen_reg_rtx (Pmode));
841 if (function_label_operand (orig, VOIDmode))
843 /* Force function label into memory in word mode. */
844 orig = XEXP (force_const_mem (word_mode, orig), 0);
845 /* Load plabel address from DLT. */
846 emit_move_insn (tmp_reg,
847 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
848 gen_rtx_HIGH (word_mode, orig)));
849 pic_ref
850 = gen_const_mem (Pmode,
851 gen_rtx_LO_SUM (Pmode, tmp_reg,
852 gen_rtx_UNSPEC (Pmode,
853 gen_rtvec (1, orig),
854 UNSPEC_DLTIND14R)));
855 emit_move_insn (reg, pic_ref);
856 /* Now load address of function descriptor. */
857 pic_ref = gen_rtx_MEM (Pmode, reg);
859 else
861 /* Load symbol reference from DLT. */
862 emit_move_insn (tmp_reg,
863 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
864 gen_rtx_HIGH (word_mode, orig)));
865 pic_ref
866 = gen_const_mem (Pmode,
867 gen_rtx_LO_SUM (Pmode, tmp_reg,
868 gen_rtx_UNSPEC (Pmode,
869 gen_rtvec (1, orig),
870 UNSPEC_DLTIND14R)));
873 crtl->uses_pic_offset_table = 1;
874 mark_reg_pointer (reg, BITS_PER_UNIT);
875 insn = emit_move_insn (reg, pic_ref);
877 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
878 set_unique_reg_note (insn, REG_EQUAL, orig);
880 return reg;
882 else if (GET_CODE (orig) == CONST)
884 rtx base;
886 if (GET_CODE (XEXP (orig, 0)) == PLUS
887 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
888 return orig;
890 gcc_assert (reg);
891 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
893 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
894 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
895 base == reg ? 0 : reg);
897 if (GET_CODE (orig) == CONST_INT)
899 if (INT_14_BITS (orig))
900 return plus_constant (Pmode, base, INTVAL (orig));
901 orig = force_reg (Pmode, orig);
903 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
904 /* Likewise, should we set special REG_NOTEs here? */
907 return pic_ref;
910 static GTY(()) rtx gen_tls_tga;
912 static rtx
913 gen_tls_get_addr (void)
915 if (!gen_tls_tga)
916 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
917 return gen_tls_tga;
920 static rtx
921 hppa_tls_call (rtx arg)
923 rtx ret;
925 ret = gen_reg_rtx (Pmode);
926 emit_library_call_value (gen_tls_get_addr (), ret,
927 LCT_CONST, Pmode, arg, Pmode);
929 return ret;
932 static rtx
933 legitimize_tls_address (rtx addr)
935 rtx ret, tmp, t1, t2, tp;
936 rtx_insn *insn;
938 /* Currently, we can't handle anything but a SYMBOL_REF. */
939 if (GET_CODE (addr) != SYMBOL_REF)
940 return addr;
942 switch (SYMBOL_REF_TLS_MODEL (addr))
944 case TLS_MODEL_GLOBAL_DYNAMIC:
945 tmp = gen_reg_rtx (Pmode);
946 if (flag_pic)
947 emit_insn (gen_tgd_load_pic (tmp, addr));
948 else
949 emit_insn (gen_tgd_load (tmp, addr));
950 ret = hppa_tls_call (tmp);
951 break;
953 case TLS_MODEL_LOCAL_DYNAMIC:
954 ret = gen_reg_rtx (Pmode);
955 tmp = gen_reg_rtx (Pmode);
956 start_sequence ();
957 if (flag_pic)
958 emit_insn (gen_tld_load_pic (tmp, addr));
959 else
960 emit_insn (gen_tld_load (tmp, addr));
961 t1 = hppa_tls_call (tmp);
962 insn = get_insns ();
963 end_sequence ();
964 t2 = gen_reg_rtx (Pmode);
965 emit_libcall_block (insn, t2, t1,
966 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
967 UNSPEC_TLSLDBASE));
968 emit_insn (gen_tld_offset_load (ret, addr, t2));
969 break;
971 case TLS_MODEL_INITIAL_EXEC:
972 tp = gen_reg_rtx (Pmode);
973 tmp = gen_reg_rtx (Pmode);
974 ret = gen_reg_rtx (Pmode);
975 emit_insn (gen_tp_load (tp));
976 if (flag_pic)
977 emit_insn (gen_tie_load_pic (tmp, addr));
978 else
979 emit_insn (gen_tie_load (tmp, addr));
980 emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
981 break;
983 case TLS_MODEL_LOCAL_EXEC:
984 tp = gen_reg_rtx (Pmode);
985 ret = gen_reg_rtx (Pmode);
986 emit_insn (gen_tp_load (tp));
987 emit_insn (gen_tle_load (ret, addr, tp));
988 break;
990 default:
991 gcc_unreachable ();
994 return ret;
997 /* Helper for hppa_legitimize_address. Given X, return true if it
998 is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8.
1000 This respectively represent canonical shift-add rtxs or scaled
1001 memory addresses. */
1002 static bool
1003 mem_shadd_or_shadd_rtx_p (rtx x)
1005 return ((GET_CODE (x) == ASHIFT
1006 || GET_CODE (x) == MULT)
1007 && GET_CODE (XEXP (x, 1)) == CONST_INT
1008 && ((GET_CODE (x) == ASHIFT
1009 && pa_shadd_constant_p (INTVAL (XEXP (x, 1))))
1010 || (GET_CODE (x) == MULT
1011 && pa_mem_shadd_constant_p (INTVAL (XEXP (x, 1))))));
1014 /* Try machine-dependent ways of modifying an illegitimate address
1015 to be legitimate. If we find one, return the new, valid address.
1016 This macro is used in only one place: `memory_address' in explow.c.
1018 OLDX is the address as it was before break_out_memory_refs was called.
1019 In some cases it is useful to look at this to decide what needs to be done.
1021 It is always safe for this macro to do nothing. It exists to recognize
1022 opportunities to optimize the output.
1024 For the PA, transform:
1026 memory(X + <large int>)
1028 into:
1030 if (<large int> & mask) >= 16
1031 Y = (<large int> & ~mask) + mask + 1 Round up.
1032 else
1033 Y = (<large int> & ~mask) Round down.
1034 Z = X + Y
1035 memory (Z + (<large int> - Y));
1037 This is for CSE to find several similar references, and only use one Z.
1039 X can either be a SYMBOL_REF or REG, but because combine cannot
1040 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
1041 D will not fit in 14 bits.
1043 MODE_FLOAT references allow displacements which fit in 5 bits, so use
1044 0x1f as the mask.
1046 MODE_INT references allow displacements which fit in 14 bits, so use
1047 0x3fff as the mask.
1049 This relies on the fact that most mode MODE_FLOAT references will use FP
1050 registers and most mode MODE_INT references will use integer registers.
1051 (In the rare case of an FP register used in an integer MODE, we depend
1052 on secondary reloads to clean things up.)
1055 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
1056 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
1057 addressing modes to be used).
1059 Note that the addresses passed into hppa_legitimize_address always
1060 come from a MEM, so we only have to match the MULT form on incoming
1061 addresses. But to be future proof we also match the ASHIFT form.
1063 However, this routine always places those shift-add sequences into
1064 registers, so we have to generate the ASHIFT form as our output.
1066 Put X and Z into registers. Then put the entire expression into
1067 a register. */
1070 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1071 machine_mode mode)
1073 rtx orig = x;
1075 /* We need to canonicalize the order of operands in unscaled indexed
1076 addresses since the code that checks if an address is valid doesn't
1077 always try both orders. */
1078 if (!TARGET_NO_SPACE_REGS
1079 && GET_CODE (x) == PLUS
1080 && GET_MODE (x) == Pmode
1081 && REG_P (XEXP (x, 0))
1082 && REG_P (XEXP (x, 1))
1083 && REG_POINTER (XEXP (x, 0))
1084 && !REG_POINTER (XEXP (x, 1)))
1085 return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
1087 if (tls_referenced_p (x))
1088 return legitimize_tls_address (x);
1089 else if (flag_pic)
1090 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
1092 /* Strip off CONST. */
1093 if (GET_CODE (x) == CONST)
1094 x = XEXP (x, 0);
1096 /* Special case. Get the SYMBOL_REF into a register and use indexing.
1097 That should always be safe. */
1098 if (GET_CODE (x) == PLUS
1099 && GET_CODE (XEXP (x, 0)) == REG
1100 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
1102 rtx reg = force_reg (Pmode, XEXP (x, 1));
1103 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
1106 /* Note we must reject symbols which represent function addresses
1107 since the assembler/linker can't handle arithmetic on plabels. */
1108 if (GET_CODE (x) == PLUS
1109 && GET_CODE (XEXP (x, 1)) == CONST_INT
1110 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
1111 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
1112 || GET_CODE (XEXP (x, 0)) == REG))
1114 rtx int_part, ptr_reg;
1115 int newoffset;
1116 int offset = INTVAL (XEXP (x, 1));
1117 int mask;
1119 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
1120 && !INT14_OK_STRICT ? 0x1f : 0x3fff);
1122 /* Choose which way to round the offset. Round up if we
1123 are >= halfway to the next boundary. */
1124 if ((offset & mask) >= ((mask + 1) / 2))
1125 newoffset = (offset & ~ mask) + mask + 1;
1126 else
1127 newoffset = (offset & ~ mask);
1129 /* If the newoffset will not fit in 14 bits (ldo), then
1130 handling this would take 4 or 5 instructions (2 to load
1131 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1132 add the new offset and the SYMBOL_REF.) Combine can
1133 not handle 4->2 or 5->2 combinations, so do not create
1134 them. */
1135 if (! VAL_14_BITS_P (newoffset)
1136 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1138 rtx const_part = plus_constant (Pmode, XEXP (x, 0), newoffset);
1139 rtx tmp_reg
1140 = force_reg (Pmode,
1141 gen_rtx_HIGH (Pmode, const_part));
1142 ptr_reg
1143 = force_reg (Pmode,
1144 gen_rtx_LO_SUM (Pmode,
1145 tmp_reg, const_part));
1147 else
1149 if (! VAL_14_BITS_P (newoffset))
1150 int_part = force_reg (Pmode, GEN_INT (newoffset));
1151 else
1152 int_part = GEN_INT (newoffset);
1154 ptr_reg = force_reg (Pmode,
1155 gen_rtx_PLUS (Pmode,
1156 force_reg (Pmode, XEXP (x, 0)),
1157 int_part));
1159 return plus_constant (Pmode, ptr_reg, offset - newoffset);
1162 /* Handle (plus (mult (a) (mem_shadd_constant)) (b)). */
1164 if (GET_CODE (x) == PLUS
1165 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1166 && (OBJECT_P (XEXP (x, 1))
1167 || GET_CODE (XEXP (x, 1)) == SUBREG)
1168 && GET_CODE (XEXP (x, 1)) != CONST)
1170 /* If we were given a MULT, we must fix the constant
1171 as we're going to create the ASHIFT form. */
1172 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1173 if (GET_CODE (XEXP (x, 0)) == MULT)
1174 shift_val = exact_log2 (shift_val);
1176 rtx reg1, reg2;
1177 reg1 = XEXP (x, 1);
1178 if (GET_CODE (reg1) != REG)
1179 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1181 reg2 = XEXP (XEXP (x, 0), 0);
1182 if (GET_CODE (reg2) != REG)
1183 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1185 return force_reg (Pmode,
1186 gen_rtx_PLUS (Pmode,
1187 gen_rtx_ASHIFT (Pmode, reg2,
1188 GEN_INT (shift_val)),
1189 reg1));
1192 /* Similarly for (plus (plus (mult (a) (mem_shadd_constant)) (b)) (c)).
1194 Only do so for floating point modes since this is more speculative
1195 and we lose if it's an integer store. */
1196 if (GET_CODE (x) == PLUS
1197 && GET_CODE (XEXP (x, 0)) == PLUS
1198 && mem_shadd_or_shadd_rtx_p (XEXP (XEXP (x, 0), 0))
1199 && (mode == SFmode || mode == DFmode))
1201 int shift_val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
1203 /* If we were given a MULT, we must fix the constant
1204 as we're going to create the ASHIFT form. */
1205 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
1206 shift_val = exact_log2 (shift_val);
1208 /* Try and figure out what to use as a base register. */
1209 rtx reg1, reg2, base, idx;
1211 reg1 = XEXP (XEXP (x, 0), 1);
1212 reg2 = XEXP (x, 1);
1213 base = NULL_RTX;
1214 idx = NULL_RTX;
1216 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1217 then pa_emit_move_sequence will turn on REG_POINTER so we'll know
1218 it's a base register below. */
1219 if (GET_CODE (reg1) != REG)
1220 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1222 if (GET_CODE (reg2) != REG)
1223 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1225 /* Figure out what the base and index are. */
1227 if (GET_CODE (reg1) == REG
1228 && REG_POINTER (reg1))
1230 base = reg1;
1231 idx = gen_rtx_PLUS (Pmode,
1232 gen_rtx_ASHIFT (Pmode,
1233 XEXP (XEXP (XEXP (x, 0), 0), 0),
1234 GEN_INT (shift_val)),
1235 XEXP (x, 1));
1237 else if (GET_CODE (reg2) == REG
1238 && REG_POINTER (reg2))
1240 base = reg2;
1241 idx = XEXP (x, 0);
1244 if (base == 0)
1245 return orig;
1247 /* If the index adds a large constant, try to scale the
1248 constant so that it can be loaded with only one insn. */
1249 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1250 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1251 / INTVAL (XEXP (XEXP (idx, 0), 1)))
1252 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1254 /* Divide the CONST_INT by the scale factor, then add it to A. */
1255 int val = INTVAL (XEXP (idx, 1));
1256 val /= (1 << shift_val);
1258 reg1 = XEXP (XEXP (idx, 0), 0);
1259 if (GET_CODE (reg1) != REG)
1260 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1262 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1264 /* We can now generate a simple scaled indexed address. */
1265 return
1266 force_reg
1267 (Pmode, gen_rtx_PLUS (Pmode,
1268 gen_rtx_ASHIFT (Pmode, reg1,
1269 GEN_INT (shift_val)),
1270 base));
1273 /* If B + C is still a valid base register, then add them. */
1274 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1275 && INTVAL (XEXP (idx, 1)) <= 4096
1276 && INTVAL (XEXP (idx, 1)) >= -4096)
1278 rtx reg1, reg2;
1280 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1282 reg2 = XEXP (XEXP (idx, 0), 0);
1283 if (GET_CODE (reg2) != CONST_INT)
1284 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1286 return force_reg (Pmode,
1287 gen_rtx_PLUS (Pmode,
1288 gen_rtx_ASHIFT (Pmode, reg2,
1289 GEN_INT (shift_val)),
1290 reg1));
1293 /* Get the index into a register, then add the base + index and
1294 return a register holding the result. */
1296 /* First get A into a register. */
1297 reg1 = XEXP (XEXP (idx, 0), 0);
1298 if (GET_CODE (reg1) != REG)
1299 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1301 /* And get B into a register. */
1302 reg2 = XEXP (idx, 1);
1303 if (GET_CODE (reg2) != REG)
1304 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1306 reg1 = force_reg (Pmode,
1307 gen_rtx_PLUS (Pmode,
1308 gen_rtx_ASHIFT (Pmode, reg1,
1309 GEN_INT (shift_val)),
1310 reg2));
1312 /* Add the result to our base register and return. */
1313 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1317 /* Uh-oh. We might have an address for x[n-100000]. This needs
1318 special handling to avoid creating an indexed memory address
1319 with x-100000 as the base.
1321 If the constant part is small enough, then it's still safe because
1322 there is a guard page at the beginning and end of the data segment.
1324 Scaled references are common enough that we want to try and rearrange the
1325 terms so that we can use indexing for these addresses too. Only
1326 do the optimization for floatint point modes. */
1328 if (GET_CODE (x) == PLUS
1329 && pa_symbolic_expression_p (XEXP (x, 1)))
1331 /* Ugly. We modify things here so that the address offset specified
1332 by the index expression is computed first, then added to x to form
1333 the entire address. */
1335 rtx regx1, regx2, regy1, regy2, y;
1337 /* Strip off any CONST. */
1338 y = XEXP (x, 1);
1339 if (GET_CODE (y) == CONST)
1340 y = XEXP (y, 0);
1342 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1344 /* See if this looks like
1345 (plus (mult (reg) (mem_shadd_const))
1346 (const (plus (symbol_ref) (const_int))))
1348 Where const_int is small. In that case the const
1349 expression is a valid pointer for indexing.
1351 If const_int is big, but can be divided evenly by shadd_const
1352 and added to (reg). This allows more scaled indexed addresses. */
1353 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1354 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1355 && GET_CODE (XEXP (y, 1)) == CONST_INT
1356 && INTVAL (XEXP (y, 1)) >= -4096
1357 && INTVAL (XEXP (y, 1)) <= 4095)
1359 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1361 /* If we were given a MULT, we must fix the constant
1362 as we're going to create the ASHIFT form. */
1363 if (GET_CODE (XEXP (x, 0)) == MULT)
1364 shift_val = exact_log2 (shift_val);
1366 rtx reg1, reg2;
1368 reg1 = XEXP (x, 1);
1369 if (GET_CODE (reg1) != REG)
1370 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1372 reg2 = XEXP (XEXP (x, 0), 0);
1373 if (GET_CODE (reg2) != REG)
1374 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1376 return
1377 force_reg (Pmode,
1378 gen_rtx_PLUS (Pmode,
1379 gen_rtx_ASHIFT (Pmode,
1380 reg2,
1381 GEN_INT (shift_val)),
1382 reg1));
1384 else if ((mode == DFmode || mode == SFmode)
1385 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1386 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1387 && GET_CODE (XEXP (y, 1)) == CONST_INT
1388 && INTVAL (XEXP (y, 1)) % (1 << INTVAL (XEXP (XEXP (x, 0), 1))) == 0)
1390 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1392 /* If we were given a MULT, we must fix the constant
1393 as we're going to create the ASHIFT form. */
1394 if (GET_CODE (XEXP (x, 0)) == MULT)
1395 shift_val = exact_log2 (shift_val);
1397 regx1
1398 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1399 / INTVAL (XEXP (XEXP (x, 0), 1))));
1400 regx2 = XEXP (XEXP (x, 0), 0);
1401 if (GET_CODE (regx2) != REG)
1402 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1403 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1404 regx2, regx1));
1405 return
1406 force_reg (Pmode,
1407 gen_rtx_PLUS (Pmode,
1408 gen_rtx_ASHIFT (Pmode, regx2,
1409 GEN_INT (shift_val)),
1410 force_reg (Pmode, XEXP (y, 0))));
1412 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1413 && INTVAL (XEXP (y, 1)) >= -4096
1414 && INTVAL (XEXP (y, 1)) <= 4095)
1416 /* This is safe because of the guard page at the
1417 beginning and end of the data space. Just
1418 return the original address. */
1419 return orig;
1421 else
1423 /* Doesn't look like one we can optimize. */
1424 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1425 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1426 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1427 regx1 = force_reg (Pmode,
1428 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1429 regx1, regy2));
1430 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1435 return orig;
1438 /* Implement the TARGET_REGISTER_MOVE_COST hook.
1440 Compute extra cost of moving data between one register class
1441 and another.
1443 Make moves from SAR so expensive they should never happen. We used to
1444 have 0xffff here, but that generates overflow in rare cases.
1446 Copies involving a FP register and a non-FP register are relatively
1447 expensive because they must go through memory.
1449 Other copies are reasonably cheap. */
1451 static int
1452 hppa_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
1453 reg_class_t from, reg_class_t to)
1455 if (from == SHIFT_REGS)
1456 return 0x100;
1457 else if (to == SHIFT_REGS && FP_REG_CLASS_P (from))
1458 return 18;
1459 else if ((FP_REG_CLASS_P (from) && ! FP_REG_CLASS_P (to))
1460 || (FP_REG_CLASS_P (to) && ! FP_REG_CLASS_P (from)))
1461 return 16;
1462 else
1463 return 2;
1466 /* For the HPPA, REG and REG+CONST is cost 0
1467 and addresses involving symbolic constants are cost 2.
1469 PIC addresses are very expensive.
1471 It is no coincidence that this has the same structure
1472 as pa_legitimate_address_p. */
1474 static int
1475 hppa_address_cost (rtx X, machine_mode mode ATTRIBUTE_UNUSED,
1476 addr_space_t as ATTRIBUTE_UNUSED,
1477 bool speed ATTRIBUTE_UNUSED)
1479 switch (GET_CODE (X))
1481 case REG:
1482 case PLUS:
1483 case LO_SUM:
1484 return 1;
1485 case HIGH:
1486 return 2;
1487 default:
1488 return 4;
1492 /* Compute a (partial) cost for rtx X. Return true if the complete
1493 cost has been computed, and false if subexpressions should be
1494 scanned. In either case, *TOTAL contains the cost result. */
1496 static bool
1497 hppa_rtx_costs (rtx x, machine_mode mode, int outer_code,
1498 int opno ATTRIBUTE_UNUSED,
1499 int *total, bool speed ATTRIBUTE_UNUSED)
1501 int factor;
1502 int code = GET_CODE (x);
1504 switch (code)
1506 case CONST_INT:
1507 if (INTVAL (x) == 0)
1508 *total = 0;
1509 else if (INT_14_BITS (x))
1510 *total = 1;
1511 else
1512 *total = 2;
1513 return true;
1515 case HIGH:
1516 *total = 2;
1517 return true;
1519 case CONST:
1520 case LABEL_REF:
1521 case SYMBOL_REF:
1522 *total = 4;
1523 return true;
1525 case CONST_DOUBLE:
1526 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1527 && outer_code != SET)
1528 *total = 0;
1529 else
1530 *total = 8;
1531 return true;
1533 case MULT:
1534 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1536 *total = COSTS_N_INSNS (3);
1537 return true;
1540 /* A mode size N times larger than SImode needs O(N*N) more insns. */
1541 factor = GET_MODE_SIZE (mode) / 4;
1542 if (factor == 0)
1543 factor = 1;
1545 if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1546 *total = factor * factor * COSTS_N_INSNS (8);
1547 else
1548 *total = factor * factor * COSTS_N_INSNS (20);
1549 return true;
1551 case DIV:
1552 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1554 *total = COSTS_N_INSNS (14);
1555 return true;
1557 /* FALLTHRU */
1559 case UDIV:
1560 case MOD:
1561 case UMOD:
1562 /* A mode size N times larger than SImode needs O(N*N) more insns. */
1563 factor = GET_MODE_SIZE (mode) / 4;
1564 if (factor == 0)
1565 factor = 1;
1567 *total = factor * factor * COSTS_N_INSNS (60);
1568 return true;
1570 case PLUS: /* this includes shNadd insns */
1571 case MINUS:
1572 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1574 *total = COSTS_N_INSNS (3);
1575 return true;
1578 /* A size N times larger than UNITS_PER_WORD needs N times as
1579 many insns, taking N times as long. */
1580 factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
1581 if (factor == 0)
1582 factor = 1;
1583 *total = factor * COSTS_N_INSNS (1);
1584 return true;
1586 case ASHIFT:
1587 case ASHIFTRT:
1588 case LSHIFTRT:
1589 *total = COSTS_N_INSNS (1);
1590 return true;
1592 default:
1593 return false;
1597 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1598 new rtx with the correct mode. */
1599 static inline rtx
1600 force_mode (machine_mode mode, rtx orig)
1602 if (mode == GET_MODE (orig))
1603 return orig;
1605 gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
1607 return gen_rtx_REG (mode, REGNO (orig));
1610 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
1612 static bool
1613 pa_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1615 return tls_referenced_p (x);
1618 /* Emit insns to move operands[1] into operands[0].
1620 Return 1 if we have written out everything that needs to be done to
1621 do the move. Otherwise, return 0 and the caller will emit the move
1622 normally.
1624 Note SCRATCH_REG may not be in the proper mode depending on how it
1625 will be used. This routine is responsible for creating a new copy
1626 of SCRATCH_REG in the proper mode. */
1629 pa_emit_move_sequence (rtx *operands, machine_mode mode, rtx scratch_reg)
1631 register rtx operand0 = operands[0];
1632 register rtx operand1 = operands[1];
1633 register rtx tem;
1635 /* We can only handle indexed addresses in the destination operand
1636 of floating point stores. Thus, we need to break out indexed
1637 addresses from the destination operand. */
1638 if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1640 gcc_assert (can_create_pseudo_p ());
1642 tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1643 operand0 = replace_equiv_address (operand0, tem);
1646 /* On targets with non-equivalent space registers, break out unscaled
1647 indexed addresses from the source operand before the final CSE.
1648 We have to do this because the REG_POINTER flag is not correctly
1649 carried through various optimization passes and CSE may substitute
1650 a pseudo without the pointer set for one with the pointer set. As
1651 a result, we loose various opportunities to create insns with
1652 unscaled indexed addresses. */
1653 if (!TARGET_NO_SPACE_REGS
1654 && !cse_not_expected
1655 && GET_CODE (operand1) == MEM
1656 && GET_CODE (XEXP (operand1, 0)) == PLUS
1657 && REG_P (XEXP (XEXP (operand1, 0), 0))
1658 && REG_P (XEXP (XEXP (operand1, 0), 1)))
1659 operand1
1660 = replace_equiv_address (operand1,
1661 copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1663 if (scratch_reg
1664 && reload_in_progress && GET_CODE (operand0) == REG
1665 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1666 operand0 = reg_equiv_mem (REGNO (operand0));
1667 else if (scratch_reg
1668 && reload_in_progress && GET_CODE (operand0) == SUBREG
1669 && GET_CODE (SUBREG_REG (operand0)) == REG
1670 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1672 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1673 the code which tracks sets/uses for delete_output_reload. */
1674 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1675 reg_equiv_mem (REGNO (SUBREG_REG (operand0))),
1676 SUBREG_BYTE (operand0));
1677 operand0 = alter_subreg (&temp, true);
1680 if (scratch_reg
1681 && reload_in_progress && GET_CODE (operand1) == REG
1682 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1683 operand1 = reg_equiv_mem (REGNO (operand1));
1684 else if (scratch_reg
1685 && reload_in_progress && GET_CODE (operand1) == SUBREG
1686 && GET_CODE (SUBREG_REG (operand1)) == REG
1687 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1689 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1690 the code which tracks sets/uses for delete_output_reload. */
1691 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1692 reg_equiv_mem (REGNO (SUBREG_REG (operand1))),
1693 SUBREG_BYTE (operand1));
1694 operand1 = alter_subreg (&temp, true);
1697 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1698 && ((tem = find_replacement (&XEXP (operand0, 0)))
1699 != XEXP (operand0, 0)))
1700 operand0 = replace_equiv_address (operand0, tem);
1702 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1703 && ((tem = find_replacement (&XEXP (operand1, 0)))
1704 != XEXP (operand1, 0)))
1705 operand1 = replace_equiv_address (operand1, tem);
1707 /* Handle secondary reloads for loads/stores of FP registers from
1708 REG+D addresses where D does not fit in 5 or 14 bits, including
1709 (subreg (mem (addr))) cases, and reloads for other unsupported
1710 memory operands. */
1711 if (scratch_reg
1712 && FP_REG_P (operand0)
1713 && (MEM_P (operand1)
1714 || (GET_CODE (operand1) == SUBREG
1715 && MEM_P (XEXP (operand1, 0)))))
1717 rtx op1 = operand1;
1719 if (GET_CODE (op1) == SUBREG)
1720 op1 = XEXP (op1, 0);
1722 if (reg_plus_base_memory_operand (op1, GET_MODE (op1)))
1724 if (!(TARGET_PA_20
1725 && !TARGET_ELF32
1726 && INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
1727 && !INT_5_BITS (XEXP (XEXP (op1, 0), 1)))
1729 /* SCRATCH_REG will hold an address and maybe the actual data.
1730 We want it in WORD_MODE regardless of what mode it was
1731 originally given to us. */
1732 scratch_reg = force_mode (word_mode, scratch_reg);
1734 /* D might not fit in 14 bits either; for such cases load D
1735 into scratch reg. */
1736 if (!INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
1738 emit_move_insn (scratch_reg, XEXP (XEXP (op1, 0), 1));
1739 emit_move_insn (scratch_reg,
1740 gen_rtx_fmt_ee (GET_CODE (XEXP (op1, 0)),
1741 Pmode,
1742 XEXP (XEXP (op1, 0), 0),
1743 scratch_reg));
1745 else
1746 emit_move_insn (scratch_reg, XEXP (op1, 0));
1747 emit_insn (gen_rtx_SET (operand0,
1748 replace_equiv_address (op1, scratch_reg)));
1749 return 1;
1752 else if ((!INT14_OK_STRICT && symbolic_memory_operand (op1, VOIDmode))
1753 || IS_LO_SUM_DLT_ADDR_P (XEXP (op1, 0))
1754 || IS_INDEX_ADDR_P (XEXP (op1, 0)))
1756 /* Load memory address into SCRATCH_REG. */
1757 scratch_reg = force_mode (word_mode, scratch_reg);
1758 emit_move_insn (scratch_reg, XEXP (op1, 0));
1759 emit_insn (gen_rtx_SET (operand0,
1760 replace_equiv_address (op1, scratch_reg)));
1761 return 1;
1764 else if (scratch_reg
1765 && FP_REG_P (operand1)
1766 && (MEM_P (operand0)
1767 || (GET_CODE (operand0) == SUBREG
1768 && MEM_P (XEXP (operand0, 0)))))
1770 rtx op0 = operand0;
1772 if (GET_CODE (op0) == SUBREG)
1773 op0 = XEXP (op0, 0);
1775 if (reg_plus_base_memory_operand (op0, GET_MODE (op0)))
1777 if (!(TARGET_PA_20
1778 && !TARGET_ELF32
1779 && INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
1780 && !INT_5_BITS (XEXP (XEXP (op0, 0), 1)))
1782 /* SCRATCH_REG will hold an address and maybe the actual data.
1783 We want it in WORD_MODE regardless of what mode it was
1784 originally given to us. */
1785 scratch_reg = force_mode (word_mode, scratch_reg);
1787 /* D might not fit in 14 bits either; for such cases load D
1788 into scratch reg. */
1789 if (!INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
1791 emit_move_insn (scratch_reg, XEXP (XEXP (op0, 0), 1));
1792 emit_move_insn (scratch_reg,
1793 gen_rtx_fmt_ee (GET_CODE (XEXP (op0, 0)),
1794 Pmode,
1795 XEXP (XEXP (op0, 0), 0),
1796 scratch_reg));
1798 else
1799 emit_move_insn (scratch_reg, XEXP (op0, 0));
1800 emit_insn (gen_rtx_SET (replace_equiv_address (op0, scratch_reg),
1801 operand1));
1802 return 1;
1805 else if ((!INT14_OK_STRICT && symbolic_memory_operand (op0, VOIDmode))
1806 || IS_LO_SUM_DLT_ADDR_P (XEXP (op0, 0))
1807 || IS_INDEX_ADDR_P (XEXP (op0, 0)))
1809 /* Load memory address into SCRATCH_REG. */
1810 scratch_reg = force_mode (word_mode, scratch_reg);
1811 emit_move_insn (scratch_reg, XEXP (op0, 0));
1812 emit_insn (gen_rtx_SET (replace_equiv_address (op0, scratch_reg),
1813 operand1));
1814 return 1;
1817 /* Handle secondary reloads for loads of FP registers from constant
1818 expressions by forcing the constant into memory. For the most part,
1819 this is only necessary for SImode and DImode.
1821 Use scratch_reg to hold the address of the memory location. */
1822 else if (scratch_reg
1823 && CONSTANT_P (operand1)
1824 && FP_REG_P (operand0))
1826 rtx const_mem, xoperands[2];
1828 if (operand1 == CONST0_RTX (mode))
1830 emit_insn (gen_rtx_SET (operand0, operand1));
1831 return 1;
1834 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1835 it in WORD_MODE regardless of what mode it was originally given
1836 to us. */
1837 scratch_reg = force_mode (word_mode, scratch_reg);
1839 /* Force the constant into memory and put the address of the
1840 memory location into scratch_reg. */
1841 const_mem = force_const_mem (mode, operand1);
1842 xoperands[0] = scratch_reg;
1843 xoperands[1] = XEXP (const_mem, 0);
1844 pa_emit_move_sequence (xoperands, Pmode, 0);
1846 /* Now load the destination register. */
1847 emit_insn (gen_rtx_SET (operand0,
1848 replace_equiv_address (const_mem, scratch_reg)));
1849 return 1;
1851 /* Handle secondary reloads for SAR. These occur when trying to load
1852 the SAR from memory or a constant. */
1853 else if (scratch_reg
1854 && GET_CODE (operand0) == REG
1855 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1856 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1857 && (GET_CODE (operand1) == MEM || GET_CODE (operand1) == CONST_INT))
1859 /* D might not fit in 14 bits either; for such cases load D into
1860 scratch reg. */
1861 if (GET_CODE (operand1) == MEM
1862 && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0)))
1864 /* We are reloading the address into the scratch register, so we
1865 want to make sure the scratch register is a full register. */
1866 scratch_reg = force_mode (word_mode, scratch_reg);
1868 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1869 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1870 0)),
1871 Pmode,
1872 XEXP (XEXP (operand1, 0),
1874 scratch_reg));
1876 /* Now we are going to load the scratch register from memory,
1877 we want to load it in the same width as the original MEM,
1878 which must be the same as the width of the ultimate destination,
1879 OPERAND0. */
1880 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1882 emit_move_insn (scratch_reg,
1883 replace_equiv_address (operand1, scratch_reg));
1885 else
1887 /* We want to load the scratch register using the same mode as
1888 the ultimate destination. */
1889 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1891 emit_move_insn (scratch_reg, operand1);
1894 /* And emit the insn to set the ultimate destination. We know that
1895 the scratch register has the same mode as the destination at this
1896 point. */
1897 emit_move_insn (operand0, scratch_reg);
1898 return 1;
1901 /* Handle the most common case: storing into a register. */
1902 if (register_operand (operand0, mode))
1904 /* Legitimize TLS symbol references. This happens for references
1905 that aren't a legitimate constant. */
1906 if (PA_SYMBOL_REF_TLS_P (operand1))
1907 operand1 = legitimize_tls_address (operand1);
1909 if (register_operand (operand1, mode)
1910 || (GET_CODE (operand1) == CONST_INT
1911 && pa_cint_ok_for_move (UINTVAL (operand1)))
1912 || (operand1 == CONST0_RTX (mode))
1913 || (GET_CODE (operand1) == HIGH
1914 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1915 /* Only `general_operands' can come here, so MEM is ok. */
1916 || GET_CODE (operand1) == MEM)
1918 /* Various sets are created during RTL generation which don't
1919 have the REG_POINTER flag correctly set. After the CSE pass,
1920 instruction recognition can fail if we don't consistently
1921 set this flag when performing register copies. This should
1922 also improve the opportunities for creating insns that use
1923 unscaled indexing. */
1924 if (REG_P (operand0) && REG_P (operand1))
1926 if (REG_POINTER (operand1)
1927 && !REG_POINTER (operand0)
1928 && !HARD_REGISTER_P (operand0))
1929 copy_reg_pointer (operand0, operand1);
1932 /* When MEMs are broken out, the REG_POINTER flag doesn't
1933 get set. In some cases, we can set the REG_POINTER flag
1934 from the declaration for the MEM. */
1935 if (REG_P (operand0)
1936 && GET_CODE (operand1) == MEM
1937 && !REG_POINTER (operand0))
1939 tree decl = MEM_EXPR (operand1);
1941 /* Set the register pointer flag and register alignment
1942 if the declaration for this memory reference is a
1943 pointer type. */
1944 if (decl)
1946 tree type;
1948 /* If this is a COMPONENT_REF, use the FIELD_DECL from
1949 tree operand 1. */
1950 if (TREE_CODE (decl) == COMPONENT_REF)
1951 decl = TREE_OPERAND (decl, 1);
1953 type = TREE_TYPE (decl);
1954 type = strip_array_types (type);
1956 if (POINTER_TYPE_P (type))
1957 mark_reg_pointer (operand0, BITS_PER_UNIT);
1961 emit_insn (gen_rtx_SET (operand0, operand1));
1962 return 1;
1965 else if (GET_CODE (operand0) == MEM)
1967 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1968 && !(reload_in_progress || reload_completed))
1970 rtx temp = gen_reg_rtx (DFmode);
1972 emit_insn (gen_rtx_SET (temp, operand1));
1973 emit_insn (gen_rtx_SET (operand0, temp));
1974 return 1;
1976 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1978 /* Run this case quickly. */
1979 emit_insn (gen_rtx_SET (operand0, operand1));
1980 return 1;
1982 if (! (reload_in_progress || reload_completed))
1984 operands[0] = validize_mem (operand0);
1985 operands[1] = operand1 = force_reg (mode, operand1);
1989 /* Simplify the source if we need to.
1990 Note we do have to handle function labels here, even though we do
1991 not consider them legitimate constants. Loop optimizations can
1992 call the emit_move_xxx with one as a source. */
1993 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1994 || (GET_CODE (operand1) == HIGH
1995 && symbolic_operand (XEXP (operand1, 0), mode))
1996 || function_label_operand (operand1, VOIDmode)
1997 || tls_referenced_p (operand1))
1999 int ishighonly = 0;
2001 if (GET_CODE (operand1) == HIGH)
2003 ishighonly = 1;
2004 operand1 = XEXP (operand1, 0);
2006 if (symbolic_operand (operand1, mode))
2008 /* Argh. The assembler and linker can't handle arithmetic
2009 involving plabels.
2011 So we force the plabel into memory, load operand0 from
2012 the memory location, then add in the constant part. */
2013 if ((GET_CODE (operand1) == CONST
2014 && GET_CODE (XEXP (operand1, 0)) == PLUS
2015 && function_label_operand (XEXP (XEXP (operand1, 0), 0),
2016 VOIDmode))
2017 || function_label_operand (operand1, VOIDmode))
2019 rtx temp, const_part;
2021 /* Figure out what (if any) scratch register to use. */
2022 if (reload_in_progress || reload_completed)
2024 scratch_reg = scratch_reg ? scratch_reg : operand0;
2025 /* SCRATCH_REG will hold an address and maybe the actual
2026 data. We want it in WORD_MODE regardless of what mode it
2027 was originally given to us. */
2028 scratch_reg = force_mode (word_mode, scratch_reg);
2030 else if (flag_pic)
2031 scratch_reg = gen_reg_rtx (Pmode);
2033 if (GET_CODE (operand1) == CONST)
2035 /* Save away the constant part of the expression. */
2036 const_part = XEXP (XEXP (operand1, 0), 1);
2037 gcc_assert (GET_CODE (const_part) == CONST_INT);
2039 /* Force the function label into memory. */
2040 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
2042 else
2044 /* No constant part. */
2045 const_part = NULL_RTX;
2047 /* Force the function label into memory. */
2048 temp = force_const_mem (mode, operand1);
2052 /* Get the address of the memory location. PIC-ify it if
2053 necessary. */
2054 temp = XEXP (temp, 0);
2055 if (flag_pic)
2056 temp = legitimize_pic_address (temp, mode, scratch_reg);
2058 /* Put the address of the memory location into our destination
2059 register. */
2060 operands[1] = temp;
2061 pa_emit_move_sequence (operands, mode, scratch_reg);
2063 /* Now load from the memory location into our destination
2064 register. */
2065 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
2066 pa_emit_move_sequence (operands, mode, scratch_reg);
2068 /* And add back in the constant part. */
2069 if (const_part != NULL_RTX)
2070 expand_inc (operand0, const_part);
2072 return 1;
2075 if (flag_pic)
2077 rtx_insn *insn;
2078 rtx temp;
2080 if (reload_in_progress || reload_completed)
2082 temp = scratch_reg ? scratch_reg : operand0;
2083 /* TEMP will hold an address and maybe the actual
2084 data. We want it in WORD_MODE regardless of what mode it
2085 was originally given to us. */
2086 temp = force_mode (word_mode, temp);
2088 else
2089 temp = gen_reg_rtx (Pmode);
2091 /* Force (const (plus (symbol) (const_int))) to memory
2092 if the const_int will not fit in 14 bits. Although
2093 this requires a relocation, the instruction sequence
2094 needed to load the value is shorter. */
2095 if (GET_CODE (operand1) == CONST
2096 && GET_CODE (XEXP (operand1, 0)) == PLUS
2097 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
2098 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1)))
2100 rtx x, m = force_const_mem (mode, operand1);
2102 x = legitimize_pic_address (XEXP (m, 0), mode, temp);
2103 x = replace_equiv_address (m, x);
2104 insn = emit_move_insn (operand0, x);
2106 else
2108 operands[1] = legitimize_pic_address (operand1, mode, temp);
2109 if (REG_P (operand0) && REG_P (operands[1]))
2110 copy_reg_pointer (operand0, operands[1]);
2111 insn = emit_move_insn (operand0, operands[1]);
2114 /* Put a REG_EQUAL note on this insn. */
2115 set_unique_reg_note (insn, REG_EQUAL, operand1);
2117 /* On the HPPA, references to data space are supposed to use dp,
2118 register 27, but showing it in the RTL inhibits various cse
2119 and loop optimizations. */
2120 else
2122 rtx temp, set;
2124 if (reload_in_progress || reload_completed)
2126 temp = scratch_reg ? scratch_reg : operand0;
2127 /* TEMP will hold an address and maybe the actual
2128 data. We want it in WORD_MODE regardless of what mode it
2129 was originally given to us. */
2130 temp = force_mode (word_mode, temp);
2132 else
2133 temp = gen_reg_rtx (mode);
2135 /* Loading a SYMBOL_REF into a register makes that register
2136 safe to be used as the base in an indexed address.
2138 Don't mark hard registers though. That loses. */
2139 if (GET_CODE (operand0) == REG
2140 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
2141 mark_reg_pointer (operand0, BITS_PER_UNIT);
2142 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
2143 mark_reg_pointer (temp, BITS_PER_UNIT);
2145 if (ishighonly)
2146 set = gen_rtx_SET (operand0, temp);
2147 else
2148 set = gen_rtx_SET (operand0,
2149 gen_rtx_LO_SUM (mode, temp, operand1));
2151 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
2152 emit_insn (set);
2155 return 1;
2157 else if (tls_referenced_p (operand1))
2159 rtx tmp = operand1;
2160 rtx addend = NULL;
2162 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
2164 addend = XEXP (XEXP (tmp, 0), 1);
2165 tmp = XEXP (XEXP (tmp, 0), 0);
2168 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
2169 tmp = legitimize_tls_address (tmp);
2170 if (addend)
2172 tmp = gen_rtx_PLUS (mode, tmp, addend);
2173 tmp = force_operand (tmp, operands[0]);
2175 operands[1] = tmp;
2177 else if (GET_CODE (operand1) != CONST_INT
2178 || !pa_cint_ok_for_move (UINTVAL (operand1)))
2180 rtx temp;
2181 rtx_insn *insn;
2182 rtx op1 = operand1;
2183 HOST_WIDE_INT value = 0;
2184 HOST_WIDE_INT insv = 0;
2185 int insert = 0;
2187 if (GET_CODE (operand1) == CONST_INT)
2188 value = INTVAL (operand1);
2190 if (TARGET_64BIT
2191 && GET_CODE (operand1) == CONST_INT
2192 && HOST_BITS_PER_WIDE_INT > 32
2193 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
2195 HOST_WIDE_INT nval;
2197 /* Extract the low order 32 bits of the value and sign extend.
2198 If the new value is the same as the original value, we can
2199 can use the original value as-is. If the new value is
2200 different, we use it and insert the most-significant 32-bits
2201 of the original value into the final result. */
2202 nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
2203 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
2204 if (value != nval)
2206 #if HOST_BITS_PER_WIDE_INT > 32
2207 insv = value >= 0 ? value >> 32 : ~(~value >> 32);
2208 #endif
2209 insert = 1;
2210 value = nval;
2211 operand1 = GEN_INT (nval);
2215 if (reload_in_progress || reload_completed)
2216 temp = scratch_reg ? scratch_reg : operand0;
2217 else
2218 temp = gen_reg_rtx (mode);
2220 /* We don't directly split DImode constants on 32-bit targets
2221 because PLUS uses an 11-bit immediate and the insn sequence
2222 generated is not as efficient as the one using HIGH/LO_SUM. */
2223 if (GET_CODE (operand1) == CONST_INT
2224 && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
2225 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
2226 && !insert)
2228 /* Directly break constant into high and low parts. This
2229 provides better optimization opportunities because various
2230 passes recognize constants split with PLUS but not LO_SUM.
2231 We use a 14-bit signed low part except when the addition
2232 of 0x4000 to the high part might change the sign of the
2233 high part. */
2234 HOST_WIDE_INT low = value & 0x3fff;
2235 HOST_WIDE_INT high = value & ~ 0x3fff;
2237 if (low >= 0x2000)
2239 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
2240 high += 0x2000;
2241 else
2242 high += 0x4000;
2245 low = value - high;
2247 emit_insn (gen_rtx_SET (temp, GEN_INT (high)));
2248 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
2250 else
2252 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
2253 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
2256 insn = emit_move_insn (operands[0], operands[1]);
2258 /* Now insert the most significant 32 bits of the value
2259 into the register. When we don't have a second register
2260 available, it could take up to nine instructions to load
2261 a 64-bit integer constant. Prior to reload, we force
2262 constants that would take more than three instructions
2263 to load to the constant pool. During and after reload,
2264 we have to handle all possible values. */
2265 if (insert)
2267 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2268 register and the value to be inserted is outside the
2269 range that can be loaded with three depdi instructions. */
2270 if (temp != operand0 && (insv >= 16384 || insv < -16384))
2272 operand1 = GEN_INT (insv);
2274 emit_insn (gen_rtx_SET (temp,
2275 gen_rtx_HIGH (mode, operand1)));
2276 emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
2277 if (mode == DImode)
2278 insn = emit_insn (gen_insvdi (operand0, GEN_INT (32),
2279 const0_rtx, temp));
2280 else
2281 insn = emit_insn (gen_insvsi (operand0, GEN_INT (32),
2282 const0_rtx, temp));
2284 else
2286 int len = 5, pos = 27;
2288 /* Insert the bits using the depdi instruction. */
2289 while (pos >= 0)
2291 HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2292 HOST_WIDE_INT sign = v5 < 0;
2294 /* Left extend the insertion. */
2295 insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2296 while (pos > 0 && (insv & 1) == sign)
2298 insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2299 len += 1;
2300 pos -= 1;
2303 if (mode == DImode)
2304 insn = emit_insn (gen_insvdi (operand0,
2305 GEN_INT (len),
2306 GEN_INT (pos),
2307 GEN_INT (v5)));
2308 else
2309 insn = emit_insn (gen_insvsi (operand0,
2310 GEN_INT (len),
2311 GEN_INT (pos),
2312 GEN_INT (v5)));
2314 len = pos > 0 && pos < 5 ? pos : 5;
2315 pos -= len;
2320 set_unique_reg_note (insn, REG_EQUAL, op1);
2322 return 1;
2325 /* Now have insn-emit do whatever it normally does. */
2326 return 0;
2329 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2330 it will need a link/runtime reloc). */
2333 pa_reloc_needed (tree exp)
2335 int reloc = 0;
2337 switch (TREE_CODE (exp))
2339 case ADDR_EXPR:
2340 return 1;
2342 case POINTER_PLUS_EXPR:
2343 case PLUS_EXPR:
2344 case MINUS_EXPR:
2345 reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2346 reloc |= pa_reloc_needed (TREE_OPERAND (exp, 1));
2347 break;
2349 CASE_CONVERT:
2350 case NON_LVALUE_EXPR:
2351 reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2352 break;
2354 case CONSTRUCTOR:
2356 tree value;
2357 unsigned HOST_WIDE_INT ix;
2359 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2360 if (value)
2361 reloc |= pa_reloc_needed (value);
2363 break;
2365 case ERROR_MARK:
2366 break;
2368 default:
2369 break;
2371 return reloc;
2375 /* Return the best assembler insn template
2376 for moving operands[1] into operands[0] as a fullword. */
2377 const char *
2378 pa_singlemove_string (rtx *operands)
2380 HOST_WIDE_INT intval;
2382 if (GET_CODE (operands[0]) == MEM)
2383 return "stw %r1,%0";
2384 if (GET_CODE (operands[1]) == MEM)
2385 return "ldw %1,%0";
2386 if (GET_CODE (operands[1]) == CONST_DOUBLE)
2388 long i;
2390 gcc_assert (GET_MODE (operands[1]) == SFmode);
2392 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2393 bit pattern. */
2394 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (operands[1]), i);
2396 operands[1] = GEN_INT (i);
2397 /* Fall through to CONST_INT case. */
2399 if (GET_CODE (operands[1]) == CONST_INT)
2401 intval = INTVAL (operands[1]);
2403 if (VAL_14_BITS_P (intval))
2404 return "ldi %1,%0";
2405 else if ((intval & 0x7ff) == 0)
2406 return "ldil L'%1,%0";
2407 else if (pa_zdepi_cint_p (intval))
2408 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2409 else
2410 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2412 return "copy %1,%0";
2416 /* Compute position (in OP[1]) and width (in OP[2])
2417 useful for copying IMM to a register using the zdepi
2418 instructions. Store the immediate value to insert in OP[0]. */
2419 static void
2420 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2422 int lsb, len;
2424 /* Find the least significant set bit in IMM. */
2425 for (lsb = 0; lsb < 32; lsb++)
2427 if ((imm & 1) != 0)
2428 break;
2429 imm >>= 1;
2432 /* Choose variants based on *sign* of the 5-bit field. */
2433 if ((imm & 0x10) == 0)
2434 len = (lsb <= 28) ? 4 : 32 - lsb;
2435 else
2437 /* Find the width of the bitstring in IMM. */
2438 for (len = 5; len < 32 - lsb; len++)
2440 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2441 break;
2444 /* Sign extend IMM as a 5-bit value. */
2445 imm = (imm & 0xf) - 0x10;
2448 op[0] = imm;
2449 op[1] = 31 - lsb;
2450 op[2] = len;
2453 /* Compute position (in OP[1]) and width (in OP[2])
2454 useful for copying IMM to a register using the depdi,z
2455 instructions. Store the immediate value to insert in OP[0]. */
2457 static void
2458 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2460 int lsb, len, maxlen;
2462 maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64);
2464 /* Find the least significant set bit in IMM. */
2465 for (lsb = 0; lsb < maxlen; lsb++)
2467 if ((imm & 1) != 0)
2468 break;
2469 imm >>= 1;
2472 /* Choose variants based on *sign* of the 5-bit field. */
2473 if ((imm & 0x10) == 0)
2474 len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb;
2475 else
2477 /* Find the width of the bitstring in IMM. */
2478 for (len = 5; len < maxlen - lsb; len++)
2480 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2481 break;
2484 /* Extend length if host is narrow and IMM is negative. */
2485 if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb)
2486 len += 32;
2488 /* Sign extend IMM as a 5-bit value. */
2489 imm = (imm & 0xf) - 0x10;
2492 op[0] = imm;
2493 op[1] = 63 - lsb;
2494 op[2] = len;
2497 /* Output assembler code to perform a doubleword move insn
2498 with operands OPERANDS. */
2500 const char *
2501 pa_output_move_double (rtx *operands)
2503 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2504 rtx latehalf[2];
2505 rtx addreg0 = 0, addreg1 = 0;
2506 int highonly = 0;
2508 /* First classify both operands. */
2510 if (REG_P (operands[0]))
2511 optype0 = REGOP;
2512 else if (offsettable_memref_p (operands[0]))
2513 optype0 = OFFSOP;
2514 else if (GET_CODE (operands[0]) == MEM)
2515 optype0 = MEMOP;
2516 else
2517 optype0 = RNDOP;
2519 if (REG_P (operands[1]))
2520 optype1 = REGOP;
2521 else if (CONSTANT_P (operands[1]))
2522 optype1 = CNSTOP;
2523 else if (offsettable_memref_p (operands[1]))
2524 optype1 = OFFSOP;
2525 else if (GET_CODE (operands[1]) == MEM)
2526 optype1 = MEMOP;
2527 else
2528 optype1 = RNDOP;
2530 /* Check for the cases that the operand constraints are not
2531 supposed to allow to happen. */
2532 gcc_assert (optype0 == REGOP || optype1 == REGOP);
2534 /* Handle copies between general and floating registers. */
2536 if (optype0 == REGOP && optype1 == REGOP
2537 && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2539 if (FP_REG_P (operands[0]))
2541 output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2542 output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2543 return "{fldds|fldd} -16(%%sp),%0";
2545 else
2547 output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2548 output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2549 return "{ldws|ldw} -12(%%sp),%R0";
2553 /* Handle auto decrementing and incrementing loads and stores
2554 specifically, since the structure of the function doesn't work
2555 for them without major modification. Do it better when we learn
2556 this port about the general inc/dec addressing of PA.
2557 (This was written by tege. Chide him if it doesn't work.) */
2559 if (optype0 == MEMOP)
2561 /* We have to output the address syntax ourselves, since print_operand
2562 doesn't deal with the addresses we want to use. Fix this later. */
2564 rtx addr = XEXP (operands[0], 0);
2565 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2567 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2569 operands[0] = XEXP (addr, 0);
2570 gcc_assert (GET_CODE (operands[1]) == REG
2571 && GET_CODE (operands[0]) == REG);
2573 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2575 /* No overlap between high target register and address
2576 register. (We do this in a non-obvious way to
2577 save a register file writeback) */
2578 if (GET_CODE (addr) == POST_INC)
2579 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2580 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2582 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2584 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2586 operands[0] = XEXP (addr, 0);
2587 gcc_assert (GET_CODE (operands[1]) == REG
2588 && GET_CODE (operands[0]) == REG);
2590 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2591 /* No overlap between high target register and address
2592 register. (We do this in a non-obvious way to save a
2593 register file writeback) */
2594 if (GET_CODE (addr) == PRE_INC)
2595 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2596 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2599 if (optype1 == MEMOP)
2601 /* We have to output the address syntax ourselves, since print_operand
2602 doesn't deal with the addresses we want to use. Fix this later. */
2604 rtx addr = XEXP (operands[1], 0);
2605 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2607 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2609 operands[1] = XEXP (addr, 0);
2610 gcc_assert (GET_CODE (operands[0]) == REG
2611 && GET_CODE (operands[1]) == REG);
2613 if (!reg_overlap_mentioned_p (high_reg, addr))
2615 /* No overlap between high target register and address
2616 register. (We do this in a non-obvious way to
2617 save a register file writeback) */
2618 if (GET_CODE (addr) == POST_INC)
2619 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2620 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2622 else
2624 /* This is an undefined situation. We should load into the
2625 address register *and* update that register. Probably
2626 we don't need to handle this at all. */
2627 if (GET_CODE (addr) == POST_INC)
2628 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2629 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2632 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2634 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2636 operands[1] = XEXP (addr, 0);
2637 gcc_assert (GET_CODE (operands[0]) == REG
2638 && GET_CODE (operands[1]) == REG);
2640 if (!reg_overlap_mentioned_p (high_reg, addr))
2642 /* No overlap between high target register and address
2643 register. (We do this in a non-obvious way to
2644 save a register file writeback) */
2645 if (GET_CODE (addr) == PRE_INC)
2646 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2647 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2649 else
2651 /* This is an undefined situation. We should load into the
2652 address register *and* update that register. Probably
2653 we don't need to handle this at all. */
2654 if (GET_CODE (addr) == PRE_INC)
2655 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2656 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2659 else if (GET_CODE (addr) == PLUS
2660 && GET_CODE (XEXP (addr, 0)) == MULT)
2662 rtx xoperands[4];
2664 /* Load address into left half of destination register. */
2665 xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2666 xoperands[1] = XEXP (addr, 1);
2667 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2668 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2669 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2670 xoperands);
2671 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2673 else if (GET_CODE (addr) == PLUS
2674 && REG_P (XEXP (addr, 0))
2675 && REG_P (XEXP (addr, 1)))
2677 rtx xoperands[3];
2679 /* Load address into left half of destination register. */
2680 xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2681 xoperands[1] = XEXP (addr, 0);
2682 xoperands[2] = XEXP (addr, 1);
2683 output_asm_insn ("{addl|add,l} %1,%2,%0",
2684 xoperands);
2685 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2689 /* If an operand is an unoffsettable memory ref, find a register
2690 we can increment temporarily to make it refer to the second word. */
2692 if (optype0 == MEMOP)
2693 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2695 if (optype1 == MEMOP)
2696 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2698 /* Ok, we can do one word at a time.
2699 Normally we do the low-numbered word first.
2701 In either case, set up in LATEHALF the operands to use
2702 for the high-numbered word and in some cases alter the
2703 operands in OPERANDS to be suitable for the low-numbered word. */
2705 if (optype0 == REGOP)
2706 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2707 else if (optype0 == OFFSOP)
2708 latehalf[0] = adjust_address_nv (operands[0], SImode, 4);
2709 else
2710 latehalf[0] = operands[0];
2712 if (optype1 == REGOP)
2713 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2714 else if (optype1 == OFFSOP)
2715 latehalf[1] = adjust_address_nv (operands[1], SImode, 4);
2716 else if (optype1 == CNSTOP)
2718 if (GET_CODE (operands[1]) == HIGH)
2720 operands[1] = XEXP (operands[1], 0);
2721 highonly = 1;
2723 split_double (operands[1], &operands[1], &latehalf[1]);
2725 else
2726 latehalf[1] = operands[1];
2728 /* If the first move would clobber the source of the second one,
2729 do them in the other order.
2731 This can happen in two cases:
2733 mem -> register where the first half of the destination register
2734 is the same register used in the memory's address. Reload
2735 can create such insns.
2737 mem in this case will be either register indirect or register
2738 indirect plus a valid offset.
2740 register -> register move where REGNO(dst) == REGNO(src + 1)
2741 someone (Tim/Tege?) claimed this can happen for parameter loads.
2743 Handle mem -> register case first. */
2744 if (optype0 == REGOP
2745 && (optype1 == MEMOP || optype1 == OFFSOP)
2746 && refers_to_regno_p (REGNO (operands[0]), operands[1]))
2748 /* Do the late half first. */
2749 if (addreg1)
2750 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2751 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2753 /* Then clobber. */
2754 if (addreg1)
2755 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2756 return pa_singlemove_string (operands);
2759 /* Now handle register -> register case. */
2760 if (optype0 == REGOP && optype1 == REGOP
2761 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2763 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2764 return pa_singlemove_string (operands);
2767 /* Normal case: do the two words, low-numbered first. */
2769 output_asm_insn (pa_singlemove_string (operands), operands);
2771 /* Make any unoffsettable addresses point at high-numbered word. */
2772 if (addreg0)
2773 output_asm_insn ("ldo 4(%0),%0", &addreg0);
2774 if (addreg1)
2775 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2777 /* Do high-numbered word. */
2778 if (highonly)
2779 output_asm_insn ("ldil L'%1,%0", latehalf);
2780 else
2781 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2783 /* Undo the adds we just did. */
2784 if (addreg0)
2785 output_asm_insn ("ldo -4(%0),%0", &addreg0);
2786 if (addreg1)
2787 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2789 return "";
2792 const char *
2793 pa_output_fp_move_double (rtx *operands)
2795 if (FP_REG_P (operands[0]))
2797 if (FP_REG_P (operands[1])
2798 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2799 output_asm_insn ("fcpy,dbl %f1,%0", operands);
2800 else
2801 output_asm_insn ("fldd%F1 %1,%0", operands);
2803 else if (FP_REG_P (operands[1]))
2805 output_asm_insn ("fstd%F0 %1,%0", operands);
2807 else
2809 rtx xoperands[2];
2811 gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2813 /* This is a pain. You have to be prepared to deal with an
2814 arbitrary address here including pre/post increment/decrement.
2816 so avoid this in the MD. */
2817 gcc_assert (GET_CODE (operands[0]) == REG);
2819 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2820 xoperands[0] = operands[0];
2821 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2823 return "";
2826 /* Return a REG that occurs in ADDR with coefficient 1.
2827 ADDR can be effectively incremented by incrementing REG. */
2829 static rtx
2830 find_addr_reg (rtx addr)
2832 while (GET_CODE (addr) == PLUS)
2834 if (GET_CODE (XEXP (addr, 0)) == REG)
2835 addr = XEXP (addr, 0);
2836 else if (GET_CODE (XEXP (addr, 1)) == REG)
2837 addr = XEXP (addr, 1);
2838 else if (CONSTANT_P (XEXP (addr, 0)))
2839 addr = XEXP (addr, 1);
2840 else if (CONSTANT_P (XEXP (addr, 1)))
2841 addr = XEXP (addr, 0);
2842 else
2843 gcc_unreachable ();
2845 gcc_assert (GET_CODE (addr) == REG);
2846 return addr;
2849 /* Emit code to perform a block move.
2851 OPERANDS[0] is the destination pointer as a REG, clobbered.
2852 OPERANDS[1] is the source pointer as a REG, clobbered.
2853 OPERANDS[2] is a register for temporary storage.
2854 OPERANDS[3] is a register for temporary storage.
2855 OPERANDS[4] is the size as a CONST_INT
2856 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2857 OPERANDS[6] is another temporary register. */
2859 const char *
2860 pa_output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2862 int align = INTVAL (operands[5]);
2863 unsigned long n_bytes = INTVAL (operands[4]);
2865 /* We can't move more than a word at a time because the PA
2866 has no longer integer move insns. (Could use fp mem ops?) */
2867 if (align > (TARGET_64BIT ? 8 : 4))
2868 align = (TARGET_64BIT ? 8 : 4);
2870 /* Note that we know each loop below will execute at least twice
2871 (else we would have open-coded the copy). */
2872 switch (align)
2874 case 8:
2875 /* Pre-adjust the loop counter. */
2876 operands[4] = GEN_INT (n_bytes - 16);
2877 output_asm_insn ("ldi %4,%2", operands);
2879 /* Copying loop. */
2880 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2881 output_asm_insn ("ldd,ma 8(%1),%6", operands);
2882 output_asm_insn ("std,ma %3,8(%0)", operands);
2883 output_asm_insn ("addib,>= -16,%2,.-12", operands);
2884 output_asm_insn ("std,ma %6,8(%0)", operands);
2886 /* Handle the residual. There could be up to 7 bytes of
2887 residual to copy! */
2888 if (n_bytes % 16 != 0)
2890 operands[4] = GEN_INT (n_bytes % 8);
2891 if (n_bytes % 16 >= 8)
2892 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2893 if (n_bytes % 8 != 0)
2894 output_asm_insn ("ldd 0(%1),%6", operands);
2895 if (n_bytes % 16 >= 8)
2896 output_asm_insn ("std,ma %3,8(%0)", operands);
2897 if (n_bytes % 8 != 0)
2898 output_asm_insn ("stdby,e %6,%4(%0)", operands);
2900 return "";
2902 case 4:
2903 /* Pre-adjust the loop counter. */
2904 operands[4] = GEN_INT (n_bytes - 8);
2905 output_asm_insn ("ldi %4,%2", operands);
2907 /* Copying loop. */
2908 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2909 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2910 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2911 output_asm_insn ("addib,>= -8,%2,.-12", operands);
2912 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2914 /* Handle the residual. There could be up to 7 bytes of
2915 residual to copy! */
2916 if (n_bytes % 8 != 0)
2918 operands[4] = GEN_INT (n_bytes % 4);
2919 if (n_bytes % 8 >= 4)
2920 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2921 if (n_bytes % 4 != 0)
2922 output_asm_insn ("ldw 0(%1),%6", operands);
2923 if (n_bytes % 8 >= 4)
2924 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2925 if (n_bytes % 4 != 0)
2926 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2928 return "";
2930 case 2:
2931 /* Pre-adjust the loop counter. */
2932 operands[4] = GEN_INT (n_bytes - 4);
2933 output_asm_insn ("ldi %4,%2", operands);
2935 /* Copying loop. */
2936 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2937 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2938 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2939 output_asm_insn ("addib,>= -4,%2,.-12", operands);
2940 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2942 /* Handle the residual. */
2943 if (n_bytes % 4 != 0)
2945 if (n_bytes % 4 >= 2)
2946 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2947 if (n_bytes % 2 != 0)
2948 output_asm_insn ("ldb 0(%1),%6", operands);
2949 if (n_bytes % 4 >= 2)
2950 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2951 if (n_bytes % 2 != 0)
2952 output_asm_insn ("stb %6,0(%0)", operands);
2954 return "";
2956 case 1:
2957 /* Pre-adjust the loop counter. */
2958 operands[4] = GEN_INT (n_bytes - 2);
2959 output_asm_insn ("ldi %4,%2", operands);
2961 /* Copying loop. */
2962 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2963 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2964 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2965 output_asm_insn ("addib,>= -2,%2,.-12", operands);
2966 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2968 /* Handle the residual. */
2969 if (n_bytes % 2 != 0)
2971 output_asm_insn ("ldb 0(%1),%3", operands);
2972 output_asm_insn ("stb %3,0(%0)", operands);
2974 return "";
2976 default:
2977 gcc_unreachable ();
2981 /* Count the number of insns necessary to handle this block move.
2983 Basic structure is the same as emit_block_move, except that we
2984 count insns rather than emit them. */
2986 static int
2987 compute_movmem_length (rtx_insn *insn)
2989 rtx pat = PATTERN (insn);
2990 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2991 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2992 unsigned int n_insns = 0;
2994 /* We can't move more than four bytes at a time because the PA
2995 has no longer integer move insns. (Could use fp mem ops?) */
2996 if (align > (TARGET_64BIT ? 8 : 4))
2997 align = (TARGET_64BIT ? 8 : 4);
2999 /* The basic copying loop. */
3000 n_insns = 6;
3002 /* Residuals. */
3003 if (n_bytes % (2 * align) != 0)
3005 if ((n_bytes % (2 * align)) >= align)
3006 n_insns += 2;
3008 if ((n_bytes % align) != 0)
3009 n_insns += 2;
3012 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
3013 return n_insns * 4;
3016 /* Emit code to perform a block clear.
3018 OPERANDS[0] is the destination pointer as a REG, clobbered.
3019 OPERANDS[1] is a register for temporary storage.
3020 OPERANDS[2] is the size as a CONST_INT
3021 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
3023 const char *
3024 pa_output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
3026 int align = INTVAL (operands[3]);
3027 unsigned long n_bytes = INTVAL (operands[2]);
3029 /* We can't clear more than a word at a time because the PA
3030 has no longer integer move insns. */
3031 if (align > (TARGET_64BIT ? 8 : 4))
3032 align = (TARGET_64BIT ? 8 : 4);
3034 /* Note that we know each loop below will execute at least twice
3035 (else we would have open-coded the copy). */
3036 switch (align)
3038 case 8:
3039 /* Pre-adjust the loop counter. */
3040 operands[2] = GEN_INT (n_bytes - 16);
3041 output_asm_insn ("ldi %2,%1", operands);
3043 /* Loop. */
3044 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3045 output_asm_insn ("addib,>= -16,%1,.-4", operands);
3046 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3048 /* Handle the residual. There could be up to 7 bytes of
3049 residual to copy! */
3050 if (n_bytes % 16 != 0)
3052 operands[2] = GEN_INT (n_bytes % 8);
3053 if (n_bytes % 16 >= 8)
3054 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3055 if (n_bytes % 8 != 0)
3056 output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
3058 return "";
3060 case 4:
3061 /* Pre-adjust the loop counter. */
3062 operands[2] = GEN_INT (n_bytes - 8);
3063 output_asm_insn ("ldi %2,%1", operands);
3065 /* Loop. */
3066 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3067 output_asm_insn ("addib,>= -8,%1,.-4", operands);
3068 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3070 /* Handle the residual. There could be up to 7 bytes of
3071 residual to copy! */
3072 if (n_bytes % 8 != 0)
3074 operands[2] = GEN_INT (n_bytes % 4);
3075 if (n_bytes % 8 >= 4)
3076 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3077 if (n_bytes % 4 != 0)
3078 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
3080 return "";
3082 case 2:
3083 /* Pre-adjust the loop counter. */
3084 operands[2] = GEN_INT (n_bytes - 4);
3085 output_asm_insn ("ldi %2,%1", operands);
3087 /* Loop. */
3088 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3089 output_asm_insn ("addib,>= -4,%1,.-4", operands);
3090 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3092 /* Handle the residual. */
3093 if (n_bytes % 4 != 0)
3095 if (n_bytes % 4 >= 2)
3096 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3097 if (n_bytes % 2 != 0)
3098 output_asm_insn ("stb %%r0,0(%0)", operands);
3100 return "";
3102 case 1:
3103 /* Pre-adjust the loop counter. */
3104 operands[2] = GEN_INT (n_bytes - 2);
3105 output_asm_insn ("ldi %2,%1", operands);
3107 /* Loop. */
3108 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3109 output_asm_insn ("addib,>= -2,%1,.-4", operands);
3110 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3112 /* Handle the residual. */
3113 if (n_bytes % 2 != 0)
3114 output_asm_insn ("stb %%r0,0(%0)", operands);
3116 return "";
3118 default:
3119 gcc_unreachable ();
3123 /* Count the number of insns necessary to handle this block move.
3125 Basic structure is the same as emit_block_move, except that we
3126 count insns rather than emit them. */
3128 static int
3129 compute_clrmem_length (rtx_insn *insn)
3131 rtx pat = PATTERN (insn);
3132 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
3133 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
3134 unsigned int n_insns = 0;
3136 /* We can't clear more than a word at a time because the PA
3137 has no longer integer move insns. */
3138 if (align > (TARGET_64BIT ? 8 : 4))
3139 align = (TARGET_64BIT ? 8 : 4);
3141 /* The basic loop. */
3142 n_insns = 4;
3144 /* Residuals. */
3145 if (n_bytes % (2 * align) != 0)
3147 if ((n_bytes % (2 * align)) >= align)
3148 n_insns++;
3150 if ((n_bytes % align) != 0)
3151 n_insns++;
3154 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
3155 return n_insns * 4;
3159 const char *
3160 pa_output_and (rtx *operands)
3162 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3164 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3165 int ls0, ls1, ms0, p, len;
3167 for (ls0 = 0; ls0 < 32; ls0++)
3168 if ((mask & (1 << ls0)) == 0)
3169 break;
3171 for (ls1 = ls0; ls1 < 32; ls1++)
3172 if ((mask & (1 << ls1)) != 0)
3173 break;
3175 for (ms0 = ls1; ms0 < 32; ms0++)
3176 if ((mask & (1 << ms0)) == 0)
3177 break;
3179 gcc_assert (ms0 == 32);
3181 if (ls1 == 32)
3183 len = ls0;
3185 gcc_assert (len);
3187 operands[2] = GEN_INT (len);
3188 return "{extru|extrw,u} %1,31,%2,%0";
3190 else
3192 /* We could use this `depi' for the case above as well, but `depi'
3193 requires one more register file access than an `extru'. */
3195 p = 31 - ls0;
3196 len = ls1 - ls0;
3198 operands[2] = GEN_INT (p);
3199 operands[3] = GEN_INT (len);
3200 return "{depi|depwi} 0,%2,%3,%0";
3203 else
3204 return "and %1,%2,%0";
3207 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3208 storing the result in operands[0]. */
3209 const char *
3210 pa_output_64bit_and (rtx *operands)
3212 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3214 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3215 int ls0, ls1, ms0, p, len;
3217 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
3218 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
3219 break;
3221 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
3222 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
3223 break;
3225 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
3226 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
3227 break;
3229 gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
3231 if (ls1 == HOST_BITS_PER_WIDE_INT)
3233 len = ls0;
3235 gcc_assert (len);
3237 operands[2] = GEN_INT (len);
3238 return "extrd,u %1,63,%2,%0";
3240 else
3242 /* We could use this `depi' for the case above as well, but `depi'
3243 requires one more register file access than an `extru'. */
3245 p = 63 - ls0;
3246 len = ls1 - ls0;
3248 operands[2] = GEN_INT (p);
3249 operands[3] = GEN_INT (len);
3250 return "depdi 0,%2,%3,%0";
3253 else
3254 return "and %1,%2,%0";
3257 const char *
3258 pa_output_ior (rtx *operands)
3260 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3261 int bs0, bs1, p, len;
3263 if (INTVAL (operands[2]) == 0)
3264 return "copy %1,%0";
3266 for (bs0 = 0; bs0 < 32; bs0++)
3267 if ((mask & (1 << bs0)) != 0)
3268 break;
3270 for (bs1 = bs0; bs1 < 32; bs1++)
3271 if ((mask & (1 << bs1)) == 0)
3272 break;
3274 gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3276 p = 31 - bs0;
3277 len = bs1 - bs0;
3279 operands[2] = GEN_INT (p);
3280 operands[3] = GEN_INT (len);
3281 return "{depi|depwi} -1,%2,%3,%0";
3284 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3285 storing the result in operands[0]. */
3286 const char *
3287 pa_output_64bit_ior (rtx *operands)
3289 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3290 int bs0, bs1, p, len;
3292 if (INTVAL (operands[2]) == 0)
3293 return "copy %1,%0";
3295 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
3296 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
3297 break;
3299 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
3300 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
3301 break;
3303 gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
3304 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3306 p = 63 - bs0;
3307 len = bs1 - bs0;
3309 operands[2] = GEN_INT (p);
3310 operands[3] = GEN_INT (len);
3311 return "depdi -1,%2,%3,%0";
3314 /* Target hook for assembling integer objects. This code handles
3315 aligned SI and DI integers specially since function references
3316 must be preceded by P%. */
3318 static bool
3319 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
3321 bool result;
3322 tree decl = NULL;
3324 /* When we have a SYMBOL_REF with a SYMBOL_REF_DECL, we need to call
3325 call assemble_external and set the SYMBOL_REF_DECL to NULL before
3326 calling output_addr_const. Otherwise, it may call assemble_external
3327 in the midst of outputing the assembler code for the SYMBOL_REF.
3328 We restore the SYMBOL_REF_DECL after the output is done. */
3329 if (GET_CODE (x) == SYMBOL_REF)
3331 decl = SYMBOL_REF_DECL (x);
3332 if (decl)
3334 assemble_external (decl);
3335 SET_SYMBOL_REF_DECL (x, NULL);
3339 if (size == UNITS_PER_WORD
3340 && aligned_p
3341 && function_label_operand (x, VOIDmode))
3343 fputs (size == 8? "\t.dword\t" : "\t.word\t", asm_out_file);
3345 /* We don't want an OPD when generating fast indirect calls. */
3346 if (!TARGET_FAST_INDIRECT_CALLS)
3347 fputs ("P%", asm_out_file);
3349 output_addr_const (asm_out_file, x);
3350 fputc ('\n', asm_out_file);
3351 result = true;
3353 else
3354 result = default_assemble_integer (x, size, aligned_p);
3356 if (decl)
3357 SET_SYMBOL_REF_DECL (x, decl);
3359 return result;
3362 /* Output an ascii string. */
3363 void
3364 pa_output_ascii (FILE *file, const char *p, int size)
3366 int i;
3367 int chars_output;
3368 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
3370 /* The HP assembler can only take strings of 256 characters at one
3371 time. This is a limitation on input line length, *not* the
3372 length of the string. Sigh. Even worse, it seems that the
3373 restriction is in number of input characters (see \xnn &
3374 \whatever). So we have to do this very carefully. */
3376 fputs ("\t.STRING \"", file);
3378 chars_output = 0;
3379 for (i = 0; i < size; i += 4)
3381 int co = 0;
3382 int io = 0;
3383 for (io = 0, co = 0; io < MIN (4, size - i); io++)
3385 register unsigned int c = (unsigned char) p[i + io];
3387 if (c == '\"' || c == '\\')
3388 partial_output[co++] = '\\';
3389 if (c >= ' ' && c < 0177)
3390 partial_output[co++] = c;
3391 else
3393 unsigned int hexd;
3394 partial_output[co++] = '\\';
3395 partial_output[co++] = 'x';
3396 hexd = c / 16 - 0 + '0';
3397 if (hexd > '9')
3398 hexd -= '9' - 'a' + 1;
3399 partial_output[co++] = hexd;
3400 hexd = c % 16 - 0 + '0';
3401 if (hexd > '9')
3402 hexd -= '9' - 'a' + 1;
3403 partial_output[co++] = hexd;
3406 if (chars_output + co > 243)
3408 fputs ("\"\n\t.STRING \"", file);
3409 chars_output = 0;
3411 fwrite (partial_output, 1, (size_t) co, file);
3412 chars_output += co;
3413 co = 0;
3415 fputs ("\"\n", file);
3418 /* Try to rewrite floating point comparisons & branches to avoid
3419 useless add,tr insns.
3421 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3422 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3423 first attempt to remove useless add,tr insns. It is zero
3424 for the second pass as reorg sometimes leaves bogus REG_DEAD
3425 notes lying around.
3427 When CHECK_NOTES is zero we can only eliminate add,tr insns
3428 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3429 instructions. */
3430 static void
3431 remove_useless_addtr_insns (int check_notes)
3433 rtx_insn *insn;
3434 static int pass = 0;
3436 /* This is fairly cheap, so always run it when optimizing. */
3437 if (optimize > 0)
3439 int fcmp_count = 0;
3440 int fbranch_count = 0;
3442 /* Walk all the insns in this function looking for fcmp & fbranch
3443 instructions. Keep track of how many of each we find. */
3444 for (insn = get_insns (); insn; insn = next_insn (insn))
3446 rtx tmp;
3448 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
3449 if (! NONJUMP_INSN_P (insn) && ! JUMP_P (insn))
3450 continue;
3452 tmp = PATTERN (insn);
3454 /* It must be a set. */
3455 if (GET_CODE (tmp) != SET)
3456 continue;
3458 /* If the destination is CCFP, then we've found an fcmp insn. */
3459 tmp = SET_DEST (tmp);
3460 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3462 fcmp_count++;
3463 continue;
3466 tmp = PATTERN (insn);
3467 /* If this is an fbranch instruction, bump the fbranch counter. */
3468 if (GET_CODE (tmp) == SET
3469 && SET_DEST (tmp) == pc_rtx
3470 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3471 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3472 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3473 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3475 fbranch_count++;
3476 continue;
3481 /* Find all floating point compare + branch insns. If possible,
3482 reverse the comparison & the branch to avoid add,tr insns. */
3483 for (insn = get_insns (); insn; insn = next_insn (insn))
3485 rtx tmp;
3486 rtx_insn *next;
3488 /* Ignore anything that isn't an INSN. */
3489 if (! NONJUMP_INSN_P (insn))
3490 continue;
3492 tmp = PATTERN (insn);
3494 /* It must be a set. */
3495 if (GET_CODE (tmp) != SET)
3496 continue;
3498 /* The destination must be CCFP, which is register zero. */
3499 tmp = SET_DEST (tmp);
3500 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3501 continue;
3503 /* INSN should be a set of CCFP.
3505 See if the result of this insn is used in a reversed FP
3506 conditional branch. If so, reverse our condition and
3507 the branch. Doing so avoids useless add,tr insns. */
3508 next = next_insn (insn);
3509 while (next)
3511 /* Jumps, calls and labels stop our search. */
3512 if (JUMP_P (next) || CALL_P (next) || LABEL_P (next))
3513 break;
3515 /* As does another fcmp insn. */
3516 if (NONJUMP_INSN_P (next)
3517 && GET_CODE (PATTERN (next)) == SET
3518 && GET_CODE (SET_DEST (PATTERN (next))) == REG
3519 && REGNO (SET_DEST (PATTERN (next))) == 0)
3520 break;
3522 next = next_insn (next);
3525 /* Is NEXT_INSN a branch? */
3526 if (next && JUMP_P (next))
3528 rtx pattern = PATTERN (next);
3530 /* If it a reversed fp conditional branch (e.g. uses add,tr)
3531 and CCFP dies, then reverse our conditional and the branch
3532 to avoid the add,tr. */
3533 if (GET_CODE (pattern) == SET
3534 && SET_DEST (pattern) == pc_rtx
3535 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3536 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3537 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3538 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3539 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3540 && (fcmp_count == fbranch_count
3541 || (check_notes
3542 && find_regno_note (next, REG_DEAD, 0))))
3544 /* Reverse the branch. */
3545 tmp = XEXP (SET_SRC (pattern), 1);
3546 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3547 XEXP (SET_SRC (pattern), 2) = tmp;
3548 INSN_CODE (next) = -1;
3550 /* Reverse our condition. */
3551 tmp = PATTERN (insn);
3552 PUT_CODE (XEXP (tmp, 1),
3553 (reverse_condition_maybe_unordered
3554 (GET_CODE (XEXP (tmp, 1)))));
3560 pass = !pass;
3564 /* You may have trouble believing this, but this is the 32 bit HP-PA
3565 stack layout. Wow.
3567 Offset Contents
3569 Variable arguments (optional; any number may be allocated)
3571 SP-(4*(N+9)) arg word N
3573 SP-56 arg word 5
3574 SP-52 arg word 4
3576 Fixed arguments (must be allocated; may remain unused)
3578 SP-48 arg word 3
3579 SP-44 arg word 2
3580 SP-40 arg word 1
3581 SP-36 arg word 0
3583 Frame Marker
3585 SP-32 External Data Pointer (DP)
3586 SP-28 External sr4
3587 SP-24 External/stub RP (RP')
3588 SP-20 Current RP
3589 SP-16 Static Link
3590 SP-12 Clean up
3591 SP-8 Calling Stub RP (RP'')
3592 SP-4 Previous SP
3594 Top of Frame
3596 SP-0 Stack Pointer (points to next available address)
3600 /* This function saves registers as follows. Registers marked with ' are
3601 this function's registers (as opposed to the previous function's).
3602 If a frame_pointer isn't needed, r4 is saved as a general register;
3603 the space for the frame pointer is still allocated, though, to keep
3604 things simple.
3607 Top of Frame
3609 SP (FP') Previous FP
3610 SP + 4 Alignment filler (sigh)
3611 SP + 8 Space for locals reserved here.
3615 SP + n All call saved register used.
3619 SP + o All call saved fp registers used.
3623 SP + p (SP') points to next available address.
3627 /* Global variables set by output_function_prologue(). */
3628 /* Size of frame. Need to know this to emit return insns from
3629 leaf procedures. */
3630 static HOST_WIDE_INT actual_fsize, local_fsize;
3631 static int save_fregs;
3633 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3634 Handle case where DISP > 8k by using the add_high_const patterns.
3636 Note in DISP > 8k case, we will leave the high part of the address
3637 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3639 static void
3640 store_reg (int reg, HOST_WIDE_INT disp, int base)
3642 rtx dest, src, basereg;
3643 rtx_insn *insn;
3645 src = gen_rtx_REG (word_mode, reg);
3646 basereg = gen_rtx_REG (Pmode, base);
3647 if (VAL_14_BITS_P (disp))
3649 dest = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
3650 insn = emit_move_insn (dest, src);
3652 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3654 rtx delta = GEN_INT (disp);
3655 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3657 emit_move_insn (tmpreg, delta);
3658 insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3659 if (DO_FRAME_NOTES)
3661 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3662 gen_rtx_SET (tmpreg,
3663 gen_rtx_PLUS (Pmode, basereg, delta)));
3664 RTX_FRAME_RELATED_P (insn) = 1;
3666 dest = gen_rtx_MEM (word_mode, tmpreg);
3667 insn = emit_move_insn (dest, src);
3669 else
3671 rtx delta = GEN_INT (disp);
3672 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3673 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3675 emit_move_insn (tmpreg, high);
3676 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3677 insn = emit_move_insn (dest, src);
3678 if (DO_FRAME_NOTES)
3679 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3680 gen_rtx_SET (gen_rtx_MEM (word_mode,
3681 gen_rtx_PLUS (word_mode,
3682 basereg,
3683 delta)),
3684 src));
3687 if (DO_FRAME_NOTES)
3688 RTX_FRAME_RELATED_P (insn) = 1;
3691 /* Emit RTL to store REG at the memory location specified by BASE and then
3692 add MOD to BASE. MOD must be <= 8k. */
3694 static void
3695 store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3697 rtx basereg, srcreg, delta;
3698 rtx_insn *insn;
3700 gcc_assert (VAL_14_BITS_P (mod));
3702 basereg = gen_rtx_REG (Pmode, base);
3703 srcreg = gen_rtx_REG (word_mode, reg);
3704 delta = GEN_INT (mod);
3706 insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3707 if (DO_FRAME_NOTES)
3709 RTX_FRAME_RELATED_P (insn) = 1;
3711 /* RTX_FRAME_RELATED_P must be set on each frame related set
3712 in a parallel with more than one element. */
3713 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3714 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3718 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3719 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3720 whether to add a frame note or not.
3722 In the DISP > 8k case, we leave the high part of the address in %r1.
3723 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3725 static void
3726 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3728 rtx_insn *insn;
3730 if (VAL_14_BITS_P (disp))
3732 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3733 plus_constant (Pmode,
3734 gen_rtx_REG (Pmode, base), disp));
3736 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3738 rtx basereg = gen_rtx_REG (Pmode, base);
3739 rtx delta = GEN_INT (disp);
3740 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3742 emit_move_insn (tmpreg, delta);
3743 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3744 gen_rtx_PLUS (Pmode, tmpreg, basereg));
3745 if (DO_FRAME_NOTES)
3746 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3747 gen_rtx_SET (tmpreg,
3748 gen_rtx_PLUS (Pmode, basereg, delta)));
3750 else
3752 rtx basereg = gen_rtx_REG (Pmode, base);
3753 rtx delta = GEN_INT (disp);
3754 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3756 emit_move_insn (tmpreg,
3757 gen_rtx_PLUS (Pmode, basereg,
3758 gen_rtx_HIGH (Pmode, delta)));
3759 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3760 gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3763 if (DO_FRAME_NOTES && note)
3764 RTX_FRAME_RELATED_P (insn) = 1;
3767 HOST_WIDE_INT
3768 pa_compute_frame_size (HOST_WIDE_INT size, int *fregs_live)
3770 int freg_saved = 0;
3771 int i, j;
3773 /* The code in pa_expand_prologue and pa_expand_epilogue must
3774 be consistent with the rounding and size calculation done here.
3775 Change them at the same time. */
3777 /* We do our own stack alignment. First, round the size of the
3778 stack locals up to a word boundary. */
3779 size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3781 /* Space for previous frame pointer + filler. If any frame is
3782 allocated, we need to add in the STARTING_FRAME_OFFSET. We
3783 waste some space here for the sake of HP compatibility. The
3784 first slot is only used when the frame pointer is needed. */
3785 if (size || frame_pointer_needed)
3786 size += STARTING_FRAME_OFFSET;
3788 /* If the current function calls __builtin_eh_return, then we need
3789 to allocate stack space for registers that will hold data for
3790 the exception handler. */
3791 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3793 unsigned int i;
3795 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3796 continue;
3797 size += i * UNITS_PER_WORD;
3800 /* Account for space used by the callee general register saves. */
3801 for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3802 if (df_regs_ever_live_p (i))
3803 size += UNITS_PER_WORD;
3805 /* Account for space used by the callee floating point register saves. */
3806 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3807 if (df_regs_ever_live_p (i)
3808 || (!TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3810 freg_saved = 1;
3812 /* We always save both halves of the FP register, so always
3813 increment the frame size by 8 bytes. */
3814 size += 8;
3817 /* If any of the floating registers are saved, account for the
3818 alignment needed for the floating point register save block. */
3819 if (freg_saved)
3821 size = (size + 7) & ~7;
3822 if (fregs_live)
3823 *fregs_live = 1;
3826 /* The various ABIs include space for the outgoing parameters in the
3827 size of the current function's stack frame. We don't need to align
3828 for the outgoing arguments as their alignment is set by the final
3829 rounding for the frame as a whole. */
3830 size += crtl->outgoing_args_size;
3832 /* Allocate space for the fixed frame marker. This space must be
3833 allocated for any function that makes calls or allocates
3834 stack space. */
3835 if (!crtl->is_leaf || size)
3836 size += TARGET_64BIT ? 48 : 32;
3838 /* Finally, round to the preferred stack boundary. */
3839 return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3840 & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3843 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3844 of memory. If any fpu reg is used in the function, we allocate
3845 such a block here, at the bottom of the frame, just in case it's needed.
3847 If this function is a leaf procedure, then we may choose not
3848 to do a "save" insn. The decision about whether or not
3849 to do this is made in regclass.c. */
3851 static void
3852 pa_output_function_prologue (FILE *file)
3854 /* The function's label and associated .PROC must never be
3855 separated and must be output *after* any profiling declarations
3856 to avoid changing spaces/subspaces within a procedure. */
3857 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3858 fputs ("\t.PROC\n", file);
3860 /* pa_expand_prologue does the dirty work now. We just need
3861 to output the assembler directives which denote the start
3862 of a function. */
3863 fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
3864 if (crtl->is_leaf)
3865 fputs (",NO_CALLS", file);
3866 else
3867 fputs (",CALLS", file);
3868 if (rp_saved)
3869 fputs (",SAVE_RP", file);
3871 /* The SAVE_SP flag is used to indicate that register %r3 is stored
3872 at the beginning of the frame and that it is used as the frame
3873 pointer for the frame. We do this because our current frame
3874 layout doesn't conform to that specified in the HP runtime
3875 documentation and we need a way to indicate to programs such as
3876 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
3877 isn't used by HP compilers but is supported by the assembler.
3878 However, SAVE_SP is supposed to indicate that the previous stack
3879 pointer has been saved in the frame marker. */
3880 if (frame_pointer_needed)
3881 fputs (",SAVE_SP", file);
3883 /* Pass on information about the number of callee register saves
3884 performed in the prologue.
3886 The compiler is supposed to pass the highest register number
3887 saved, the assembler then has to adjust that number before
3888 entering it into the unwind descriptor (to account for any
3889 caller saved registers with lower register numbers than the
3890 first callee saved register). */
3891 if (gr_saved)
3892 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3894 if (fr_saved)
3895 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3897 fputs ("\n\t.ENTRY\n", file);
3899 remove_useless_addtr_insns (0);
3902 void
3903 pa_expand_prologue (void)
3905 int merge_sp_adjust_with_store = 0;
3906 HOST_WIDE_INT size = get_frame_size ();
3907 HOST_WIDE_INT offset;
3908 int i;
3909 rtx tmpreg;
3910 rtx_insn *insn;
3912 gr_saved = 0;
3913 fr_saved = 0;
3914 save_fregs = 0;
3916 /* Compute total size for frame pointer, filler, locals and rounding to
3917 the next word boundary. Similar code appears in pa_compute_frame_size
3918 and must be changed in tandem with this code. */
3919 local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3920 if (local_fsize || frame_pointer_needed)
3921 local_fsize += STARTING_FRAME_OFFSET;
3923 actual_fsize = pa_compute_frame_size (size, &save_fregs);
3924 if (flag_stack_usage_info)
3925 current_function_static_stack_size = actual_fsize;
3927 /* Compute a few things we will use often. */
3928 tmpreg = gen_rtx_REG (word_mode, 1);
3930 /* Save RP first. The calling conventions manual states RP will
3931 always be stored into the caller's frame at sp - 20 or sp - 16
3932 depending on which ABI is in use. */
3933 if (df_regs_ever_live_p (2) || crtl->calls_eh_return)
3935 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3936 rp_saved = true;
3938 else
3939 rp_saved = false;
3941 /* Allocate the local frame and set up the frame pointer if needed. */
3942 if (actual_fsize != 0)
3944 if (frame_pointer_needed)
3946 /* Copy the old frame pointer temporarily into %r1. Set up the
3947 new stack pointer, then store away the saved old frame pointer
3948 into the stack at sp and at the same time update the stack
3949 pointer by actual_fsize bytes. Two versions, first
3950 handles small (<8k) frames. The second handles large (>=8k)
3951 frames. */
3952 insn = emit_move_insn (tmpreg, hard_frame_pointer_rtx);
3953 if (DO_FRAME_NOTES)
3954 RTX_FRAME_RELATED_P (insn) = 1;
3956 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3957 if (DO_FRAME_NOTES)
3958 RTX_FRAME_RELATED_P (insn) = 1;
3960 if (VAL_14_BITS_P (actual_fsize))
3961 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
3962 else
3964 /* It is incorrect to store the saved frame pointer at *sp,
3965 then increment sp (writes beyond the current stack boundary).
3967 So instead use stwm to store at *sp and post-increment the
3968 stack pointer as an atomic operation. Then increment sp to
3969 finish allocating the new frame. */
3970 HOST_WIDE_INT adjust1 = 8192 - 64;
3971 HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
3973 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
3974 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3975 adjust2, 1);
3978 /* We set SAVE_SP in frames that need a frame pointer. Thus,
3979 we need to store the previous stack pointer (frame pointer)
3980 into the frame marker on targets that use the HP unwind
3981 library. This allows the HP unwind library to be used to
3982 unwind GCC frames. However, we are not fully compatible
3983 with the HP library because our frame layout differs from
3984 that specified in the HP runtime specification.
3986 We don't want a frame note on this instruction as the frame
3987 marker moves during dynamic stack allocation.
3989 This instruction also serves as a blockage to prevent
3990 register spills from being scheduled before the stack
3991 pointer is raised. This is necessary as we store
3992 registers using the frame pointer as a base register,
3993 and the frame pointer is set before sp is raised. */
3994 if (TARGET_HPUX_UNWIND_LIBRARY)
3996 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
3997 GEN_INT (TARGET_64BIT ? -8 : -4));
3999 emit_move_insn (gen_rtx_MEM (word_mode, addr),
4000 hard_frame_pointer_rtx);
4002 else
4003 emit_insn (gen_blockage ());
4005 /* no frame pointer needed. */
4006 else
4008 /* In some cases we can perform the first callee register save
4009 and allocating the stack frame at the same time. If so, just
4010 make a note of it and defer allocating the frame until saving
4011 the callee registers. */
4012 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
4013 merge_sp_adjust_with_store = 1;
4014 /* Can not optimize. Adjust the stack frame by actual_fsize
4015 bytes. */
4016 else
4017 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4018 actual_fsize, 1);
4022 /* Normal register save.
4024 Do not save the frame pointer in the frame_pointer_needed case. It
4025 was done earlier. */
4026 if (frame_pointer_needed)
4028 offset = local_fsize;
4030 /* Saving the EH return data registers in the frame is the simplest
4031 way to get the frame unwind information emitted. We put them
4032 just before the general registers. */
4033 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4035 unsigned int i, regno;
4037 for (i = 0; ; ++i)
4039 regno = EH_RETURN_DATA_REGNO (i);
4040 if (regno == INVALID_REGNUM)
4041 break;
4043 store_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4044 offset += UNITS_PER_WORD;
4048 for (i = 18; i >= 4; i--)
4049 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4051 store_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4052 offset += UNITS_PER_WORD;
4053 gr_saved++;
4055 /* Account for %r3 which is saved in a special place. */
4056 gr_saved++;
4058 /* No frame pointer needed. */
4059 else
4061 offset = local_fsize - actual_fsize;
4063 /* Saving the EH return data registers in the frame is the simplest
4064 way to get the frame unwind information emitted. */
4065 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4067 unsigned int i, regno;
4069 for (i = 0; ; ++i)
4071 regno = EH_RETURN_DATA_REGNO (i);
4072 if (regno == INVALID_REGNUM)
4073 break;
4075 /* If merge_sp_adjust_with_store is nonzero, then we can
4076 optimize the first save. */
4077 if (merge_sp_adjust_with_store)
4079 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
4080 merge_sp_adjust_with_store = 0;
4082 else
4083 store_reg (regno, offset, STACK_POINTER_REGNUM);
4084 offset += UNITS_PER_WORD;
4088 for (i = 18; i >= 3; i--)
4089 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4091 /* If merge_sp_adjust_with_store is nonzero, then we can
4092 optimize the first GR save. */
4093 if (merge_sp_adjust_with_store)
4095 store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
4096 merge_sp_adjust_with_store = 0;
4098 else
4099 store_reg (i, offset, STACK_POINTER_REGNUM);
4100 offset += UNITS_PER_WORD;
4101 gr_saved++;
4104 /* If we wanted to merge the SP adjustment with a GR save, but we never
4105 did any GR saves, then just emit the adjustment here. */
4106 if (merge_sp_adjust_with_store)
4107 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4108 actual_fsize, 1);
4111 /* The hppa calling conventions say that %r19, the pic offset
4112 register, is saved at sp - 32 (in this function's frame)
4113 when generating PIC code. FIXME: What is the correct thing
4114 to do for functions which make no calls and allocate no
4115 frame? Do we need to allocate a frame, or can we just omit
4116 the save? For now we'll just omit the save.
4118 We don't want a note on this insn as the frame marker can
4119 move if there is a dynamic stack allocation. */
4120 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
4122 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
4124 emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
4128 /* Align pointer properly (doubleword boundary). */
4129 offset = (offset + 7) & ~7;
4131 /* Floating point register store. */
4132 if (save_fregs)
4134 rtx base;
4136 /* First get the frame or stack pointer to the start of the FP register
4137 save area. */
4138 if (frame_pointer_needed)
4140 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4141 base = hard_frame_pointer_rtx;
4143 else
4145 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4146 base = stack_pointer_rtx;
4149 /* Now actually save the FP registers. */
4150 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4152 if (df_regs_ever_live_p (i)
4153 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4155 rtx addr, reg;
4156 rtx_insn *insn;
4157 addr = gen_rtx_MEM (DFmode,
4158 gen_rtx_POST_INC (word_mode, tmpreg));
4159 reg = gen_rtx_REG (DFmode, i);
4160 insn = emit_move_insn (addr, reg);
4161 if (DO_FRAME_NOTES)
4163 RTX_FRAME_RELATED_P (insn) = 1;
4164 if (TARGET_64BIT)
4166 rtx mem = gen_rtx_MEM (DFmode,
4167 plus_constant (Pmode, base,
4168 offset));
4169 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4170 gen_rtx_SET (mem, reg));
4172 else
4174 rtx meml = gen_rtx_MEM (SFmode,
4175 plus_constant (Pmode, base,
4176 offset));
4177 rtx memr = gen_rtx_MEM (SFmode,
4178 plus_constant (Pmode, base,
4179 offset + 4));
4180 rtx regl = gen_rtx_REG (SFmode, i);
4181 rtx regr = gen_rtx_REG (SFmode, i + 1);
4182 rtx setl = gen_rtx_SET (meml, regl);
4183 rtx setr = gen_rtx_SET (memr, regr);
4184 rtvec vec;
4186 RTX_FRAME_RELATED_P (setl) = 1;
4187 RTX_FRAME_RELATED_P (setr) = 1;
4188 vec = gen_rtvec (2, setl, setr);
4189 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4190 gen_rtx_SEQUENCE (VOIDmode, vec));
4193 offset += GET_MODE_SIZE (DFmode);
4194 fr_saved++;
4200 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
4201 Handle case where DISP > 8k by using the add_high_const patterns. */
4203 static void
4204 load_reg (int reg, HOST_WIDE_INT disp, int base)
4206 rtx dest = gen_rtx_REG (word_mode, reg);
4207 rtx basereg = gen_rtx_REG (Pmode, base);
4208 rtx src;
4210 if (VAL_14_BITS_P (disp))
4211 src = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
4212 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
4214 rtx delta = GEN_INT (disp);
4215 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4217 emit_move_insn (tmpreg, delta);
4218 if (TARGET_DISABLE_INDEXING)
4220 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4221 src = gen_rtx_MEM (word_mode, tmpreg);
4223 else
4224 src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4226 else
4228 rtx delta = GEN_INT (disp);
4229 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
4230 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4232 emit_move_insn (tmpreg, high);
4233 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
4236 emit_move_insn (dest, src);
4239 /* Update the total code bytes output to the text section. */
4241 static void
4242 update_total_code_bytes (unsigned int nbytes)
4244 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
4245 && !IN_NAMED_SECTION_P (cfun->decl))
4247 unsigned int old_total = total_code_bytes;
4249 total_code_bytes += nbytes;
4251 /* Be prepared to handle overflows. */
4252 if (old_total > total_code_bytes)
4253 total_code_bytes = UINT_MAX;
4257 /* This function generates the assembly code for function exit.
4258 Args are as for output_function_prologue ().
4260 The function epilogue should not depend on the current stack
4261 pointer! It should use the frame pointer only. This is mandatory
4262 because of alloca; we also take advantage of it to omit stack
4263 adjustments before returning. */
4265 static void
4266 pa_output_function_epilogue (FILE *file)
4268 rtx_insn *insn = get_last_insn ();
4269 bool extra_nop;
4271 /* pa_expand_epilogue does the dirty work now. We just need
4272 to output the assembler directives which denote the end
4273 of a function.
4275 To make debuggers happy, emit a nop if the epilogue was completely
4276 eliminated due to a volatile call as the last insn in the
4277 current function. That way the return address (in %r2) will
4278 always point to a valid instruction in the current function. */
4280 /* Get the last real insn. */
4281 if (NOTE_P (insn))
4282 insn = prev_real_insn (insn);
4284 /* If it is a sequence, then look inside. */
4285 if (insn && NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE)
4286 insn = as_a <rtx_sequence *> (PATTERN (insn))-> insn (0);
4288 /* If insn is a CALL_INSN, then it must be a call to a volatile
4289 function (otherwise there would be epilogue insns). */
4290 if (insn && CALL_P (insn))
4292 fputs ("\tnop\n", file);
4293 extra_nop = true;
4295 else
4296 extra_nop = false;
4298 fputs ("\t.EXIT\n\t.PROCEND\n", file);
4300 if (TARGET_SOM && TARGET_GAS)
4302 /* We are done with this subspace except possibly for some additional
4303 debug information. Forget that we are in this subspace to ensure
4304 that the next function is output in its own subspace. */
4305 in_section = NULL;
4306 cfun->machine->in_nsubspa = 2;
4309 /* Thunks do their own insn accounting. */
4310 if (cfun->is_thunk)
4311 return;
4313 if (INSN_ADDRESSES_SET_P ())
4315 last_address = extra_nop ? 4 : 0;
4316 insn = get_last_nonnote_insn ();
4317 if (insn)
4319 last_address += INSN_ADDRESSES (INSN_UID (insn));
4320 if (INSN_P (insn))
4321 last_address += insn_default_length (insn);
4323 last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4324 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
4326 else
4327 last_address = UINT_MAX;
4329 /* Finally, update the total number of code bytes output so far. */
4330 update_total_code_bytes (last_address);
4333 void
4334 pa_expand_epilogue (void)
4336 rtx tmpreg;
4337 HOST_WIDE_INT offset;
4338 HOST_WIDE_INT ret_off = 0;
4339 int i;
4340 int merge_sp_adjust_with_load = 0;
4342 /* We will use this often. */
4343 tmpreg = gen_rtx_REG (word_mode, 1);
4345 /* Try to restore RP early to avoid load/use interlocks when
4346 RP gets used in the return (bv) instruction. This appears to still
4347 be necessary even when we schedule the prologue and epilogue. */
4348 if (rp_saved)
4350 ret_off = TARGET_64BIT ? -16 : -20;
4351 if (frame_pointer_needed)
4353 load_reg (2, ret_off, HARD_FRAME_POINTER_REGNUM);
4354 ret_off = 0;
4356 else
4358 /* No frame pointer, and stack is smaller than 8k. */
4359 if (VAL_14_BITS_P (ret_off - actual_fsize))
4361 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4362 ret_off = 0;
4367 /* General register restores. */
4368 if (frame_pointer_needed)
4370 offset = local_fsize;
4372 /* If the current function calls __builtin_eh_return, then we need
4373 to restore the saved EH data registers. */
4374 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4376 unsigned int i, regno;
4378 for (i = 0; ; ++i)
4380 regno = EH_RETURN_DATA_REGNO (i);
4381 if (regno == INVALID_REGNUM)
4382 break;
4384 load_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4385 offset += UNITS_PER_WORD;
4389 for (i = 18; i >= 4; i--)
4390 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4392 load_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4393 offset += UNITS_PER_WORD;
4396 else
4398 offset = local_fsize - actual_fsize;
4400 /* If the current function calls __builtin_eh_return, then we need
4401 to restore the saved EH data registers. */
4402 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4404 unsigned int i, regno;
4406 for (i = 0; ; ++i)
4408 regno = EH_RETURN_DATA_REGNO (i);
4409 if (regno == INVALID_REGNUM)
4410 break;
4412 /* Only for the first load.
4413 merge_sp_adjust_with_load holds the register load
4414 with which we will merge the sp adjustment. */
4415 if (merge_sp_adjust_with_load == 0
4416 && local_fsize == 0
4417 && VAL_14_BITS_P (-actual_fsize))
4418 merge_sp_adjust_with_load = regno;
4419 else
4420 load_reg (regno, offset, STACK_POINTER_REGNUM);
4421 offset += UNITS_PER_WORD;
4425 for (i = 18; i >= 3; i--)
4427 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4429 /* Only for the first load.
4430 merge_sp_adjust_with_load holds the register load
4431 with which we will merge the sp adjustment. */
4432 if (merge_sp_adjust_with_load == 0
4433 && local_fsize == 0
4434 && VAL_14_BITS_P (-actual_fsize))
4435 merge_sp_adjust_with_load = i;
4436 else
4437 load_reg (i, offset, STACK_POINTER_REGNUM);
4438 offset += UNITS_PER_WORD;
4443 /* Align pointer properly (doubleword boundary). */
4444 offset = (offset + 7) & ~7;
4446 /* FP register restores. */
4447 if (save_fregs)
4449 /* Adjust the register to index off of. */
4450 if (frame_pointer_needed)
4451 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4452 else
4453 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4455 /* Actually do the restores now. */
4456 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4457 if (df_regs_ever_live_p (i)
4458 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4460 rtx src = gen_rtx_MEM (DFmode,
4461 gen_rtx_POST_INC (word_mode, tmpreg));
4462 rtx dest = gen_rtx_REG (DFmode, i);
4463 emit_move_insn (dest, src);
4467 /* Emit a blockage insn here to keep these insns from being moved to
4468 an earlier spot in the epilogue, or into the main instruction stream.
4470 This is necessary as we must not cut the stack back before all the
4471 restores are finished. */
4472 emit_insn (gen_blockage ());
4474 /* Reset stack pointer (and possibly frame pointer). The stack
4475 pointer is initially set to fp + 64 to avoid a race condition. */
4476 if (frame_pointer_needed)
4478 rtx delta = GEN_INT (-64);
4480 set_reg_plus_d (STACK_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM, 64, 0);
4481 emit_insn (gen_pre_load (hard_frame_pointer_rtx,
4482 stack_pointer_rtx, delta));
4484 /* If we were deferring a callee register restore, do it now. */
4485 else if (merge_sp_adjust_with_load)
4487 rtx delta = GEN_INT (-actual_fsize);
4488 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4490 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4492 else if (actual_fsize != 0)
4493 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4494 - actual_fsize, 0);
4496 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4497 frame greater than 8k), do so now. */
4498 if (ret_off != 0)
4499 load_reg (2, ret_off, STACK_POINTER_REGNUM);
4501 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4503 rtx sa = EH_RETURN_STACKADJ_RTX;
4505 emit_insn (gen_blockage ());
4506 emit_insn (TARGET_64BIT
4507 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4508 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4512 bool
4513 pa_can_use_return_insn (void)
4515 if (!reload_completed)
4516 return false;
4518 if (frame_pointer_needed)
4519 return false;
4521 if (df_regs_ever_live_p (2))
4522 return false;
4524 if (crtl->profile)
4525 return false;
4527 return pa_compute_frame_size (get_frame_size (), 0) == 0;
4531 hppa_pic_save_rtx (void)
4533 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4536 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4537 #define NO_DEFERRED_PROFILE_COUNTERS 0
4538 #endif
4541 /* Vector of funcdef numbers. */
4542 static vec<int> funcdef_nos;
4544 /* Output deferred profile counters. */
4545 static void
4546 output_deferred_profile_counters (void)
4548 unsigned int i;
4549 int align, n;
4551 if (funcdef_nos.is_empty ())
4552 return;
4554 switch_to_section (data_section);
4555 align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4556 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4558 for (i = 0; funcdef_nos.iterate (i, &n); i++)
4560 targetm.asm_out.internal_label (asm_out_file, "LP", n);
4561 assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4564 funcdef_nos.release ();
4567 void
4568 hppa_profile_hook (int label_no)
4570 /* We use SImode for the address of the function in both 32 and
4571 64-bit code to avoid having to provide DImode versions of the
4572 lcla2 and load_offset_label_address insn patterns. */
4573 rtx reg = gen_reg_rtx (SImode);
4574 rtx_code_label *label_rtx = gen_label_rtx ();
4575 rtx mcount = gen_rtx_MEM (Pmode, gen_rtx_SYMBOL_REF (Pmode, "_mcount"));
4576 int reg_parm_stack_space = REG_PARM_STACK_SPACE (NULL_TREE);
4577 rtx arg_bytes, begin_label_rtx;
4578 rtx_insn *call_insn;
4579 char begin_label_name[16];
4580 bool use_mcount_pcrel_call;
4582 /* If we can reach _mcount with a pc-relative call, we can optimize
4583 loading the address of the current function. This requires linker
4584 long branch stub support. */
4585 if (!TARGET_PORTABLE_RUNTIME
4586 && !TARGET_LONG_CALLS
4587 && (TARGET_SOM || flag_function_sections))
4588 use_mcount_pcrel_call = TRUE;
4589 else
4590 use_mcount_pcrel_call = FALSE;
4592 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4593 label_no);
4594 begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4596 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4598 if (!use_mcount_pcrel_call)
4600 /* The address of the function is loaded into %r25 with an instruction-
4601 relative sequence that avoids the use of relocations. The sequence
4602 is split so that the load_offset_label_address instruction can
4603 occupy the delay slot of the call to _mcount. */
4604 if (TARGET_PA_20)
4605 emit_insn (gen_lcla2 (reg, label_rtx));
4606 else
4607 emit_insn (gen_lcla1 (reg, label_rtx));
4609 emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25),
4610 reg,
4611 begin_label_rtx,
4612 label_rtx));
4615 if (!NO_DEFERRED_PROFILE_COUNTERS)
4617 rtx count_label_rtx, addr, r24;
4618 char count_label_name[16];
4620 funcdef_nos.safe_push (label_no);
4621 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4622 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode,
4623 ggc_strdup (count_label_name));
4625 addr = force_reg (Pmode, count_label_rtx);
4626 r24 = gen_rtx_REG (Pmode, 24);
4627 emit_move_insn (r24, addr);
4629 arg_bytes = GEN_INT (TARGET_64BIT ? 24 : 12);
4630 if (use_mcount_pcrel_call)
4631 call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes,
4632 begin_label_rtx));
4633 else
4634 call_insn = emit_call_insn (gen_call (mcount, arg_bytes));
4636 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4638 else
4640 arg_bytes = GEN_INT (TARGET_64BIT ? 16 : 8);
4641 if (use_mcount_pcrel_call)
4642 call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes,
4643 begin_label_rtx));
4644 else
4645 call_insn = emit_call_insn (gen_call (mcount, arg_bytes));
4648 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4649 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4651 /* Indicate the _mcount call cannot throw, nor will it execute a
4652 non-local goto. */
4653 make_reg_eh_region_note_nothrow_nononlocal (call_insn);
4655 /* Allocate space for fixed arguments. */
4656 if (reg_parm_stack_space > crtl->outgoing_args_size)
4657 crtl->outgoing_args_size = reg_parm_stack_space;
4660 /* Fetch the return address for the frame COUNT steps up from
4661 the current frame, after the prologue. FRAMEADDR is the
4662 frame pointer of the COUNT frame.
4664 We want to ignore any export stub remnants here. To handle this,
4665 we examine the code at the return address, and if it is an export
4666 stub, we return a memory rtx for the stub return address stored
4667 at frame-24.
4669 The value returned is used in two different ways:
4671 1. To find a function's caller.
4673 2. To change the return address for a function.
4675 This function handles most instances of case 1; however, it will
4676 fail if there are two levels of stubs to execute on the return
4677 path. The only way I believe that can happen is if the return value
4678 needs a parameter relocation, which never happens for C code.
4680 This function handles most instances of case 2; however, it will
4681 fail if we did not originally have stub code on the return path
4682 but will need stub code on the new return path. This can happen if
4683 the caller & callee are both in the main program, but the new
4684 return location is in a shared library. */
4687 pa_return_addr_rtx (int count, rtx frameaddr)
4689 rtx label;
4690 rtx rp;
4691 rtx saved_rp;
4692 rtx ins;
4694 /* The instruction stream at the return address of a PA1.X export stub is:
4696 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4697 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4698 0x00011820 | stub+16: mtsp r1,sr0
4699 0xe0400002 | stub+20: be,n 0(sr0,rp)
4701 0xe0400002 must be specified as -532676606 so that it won't be
4702 rejected as an invalid immediate operand on 64-bit hosts.
4704 The instruction stream at the return address of a PA2.0 export stub is:
4706 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4707 0xe840d002 | stub+12: bve,n (rp)
4710 HOST_WIDE_INT insns[4];
4711 int i, len;
4713 if (count != 0)
4714 return NULL_RTX;
4716 rp = get_hard_reg_initial_val (Pmode, 2);
4718 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4719 return rp;
4721 /* If there is no export stub then just use the value saved from
4722 the return pointer register. */
4724 saved_rp = gen_reg_rtx (Pmode);
4725 emit_move_insn (saved_rp, rp);
4727 /* Get pointer to the instruction stream. We have to mask out the
4728 privilege level from the two low order bits of the return address
4729 pointer here so that ins will point to the start of the first
4730 instruction that would have been executed if we returned. */
4731 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4732 label = gen_label_rtx ();
4734 if (TARGET_PA_20)
4736 insns[0] = 0x4bc23fd1;
4737 insns[1] = -398405630;
4738 len = 2;
4740 else
4742 insns[0] = 0x4bc23fd1;
4743 insns[1] = 0x004010a1;
4744 insns[2] = 0x00011820;
4745 insns[3] = -532676606;
4746 len = 4;
4749 /* Check the instruction stream at the normal return address for the
4750 export stub. If it is an export stub, than our return address is
4751 really in -24[frameaddr]. */
4753 for (i = 0; i < len; i++)
4755 rtx op0 = gen_rtx_MEM (SImode, plus_constant (Pmode, ins, i * 4));
4756 rtx op1 = GEN_INT (insns[i]);
4757 emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label);
4760 /* Here we know that our return address points to an export
4761 stub. We don't want to return the address of the export stub,
4762 but rather the return address of the export stub. That return
4763 address is stored at -24[frameaddr]. */
4765 emit_move_insn (saved_rp,
4766 gen_rtx_MEM (Pmode,
4767 memory_address (Pmode,
4768 plus_constant (Pmode, frameaddr,
4769 -24))));
4771 emit_label (label);
4773 return saved_rp;
4776 void
4777 pa_emit_bcond_fp (rtx operands[])
4779 enum rtx_code code = GET_CODE (operands[0]);
4780 rtx operand0 = operands[1];
4781 rtx operand1 = operands[2];
4782 rtx label = operands[3];
4784 emit_insn (gen_rtx_SET (gen_rtx_REG (CCFPmode, 0),
4785 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1)));
4787 emit_jump_insn (gen_rtx_SET (pc_rtx,
4788 gen_rtx_IF_THEN_ELSE (VOIDmode,
4789 gen_rtx_fmt_ee (NE,
4790 VOIDmode,
4791 gen_rtx_REG (CCFPmode, 0),
4792 const0_rtx),
4793 gen_rtx_LABEL_REF (VOIDmode, label),
4794 pc_rtx)));
4798 /* Adjust the cost of a scheduling dependency. Return the new cost of
4799 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4801 static int
4802 pa_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
4803 unsigned int)
4805 enum attr_type attr_type;
4807 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4808 true dependencies as they are described with bypasses now. */
4809 if (pa_cpu >= PROCESSOR_8000 || dep_type == 0)
4810 return cost;
4812 if (! recog_memoized (insn))
4813 return 0;
4815 attr_type = get_attr_type (insn);
4817 switch (dep_type)
4819 case REG_DEP_ANTI:
4820 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4821 cycles later. */
4823 if (attr_type == TYPE_FPLOAD)
4825 rtx pat = PATTERN (insn);
4826 rtx dep_pat = PATTERN (dep_insn);
4827 if (GET_CODE (pat) == PARALLEL)
4829 /* This happens for the fldXs,mb patterns. */
4830 pat = XVECEXP (pat, 0, 0);
4832 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4833 /* If this happens, we have to extend this to schedule
4834 optimally. Return 0 for now. */
4835 return 0;
4837 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4839 if (! recog_memoized (dep_insn))
4840 return 0;
4841 switch (get_attr_type (dep_insn))
4843 case TYPE_FPALU:
4844 case TYPE_FPMULSGL:
4845 case TYPE_FPMULDBL:
4846 case TYPE_FPDIVSGL:
4847 case TYPE_FPDIVDBL:
4848 case TYPE_FPSQRTSGL:
4849 case TYPE_FPSQRTDBL:
4850 /* A fpload can't be issued until one cycle before a
4851 preceding arithmetic operation has finished if
4852 the target of the fpload is any of the sources
4853 (or destination) of the arithmetic operation. */
4854 return insn_default_latency (dep_insn) - 1;
4856 default:
4857 return 0;
4861 else if (attr_type == TYPE_FPALU)
4863 rtx pat = PATTERN (insn);
4864 rtx dep_pat = PATTERN (dep_insn);
4865 if (GET_CODE (pat) == PARALLEL)
4867 /* This happens for the fldXs,mb patterns. */
4868 pat = XVECEXP (pat, 0, 0);
4870 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4871 /* If this happens, we have to extend this to schedule
4872 optimally. Return 0 for now. */
4873 return 0;
4875 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4877 if (! recog_memoized (dep_insn))
4878 return 0;
4879 switch (get_attr_type (dep_insn))
4881 case TYPE_FPDIVSGL:
4882 case TYPE_FPDIVDBL:
4883 case TYPE_FPSQRTSGL:
4884 case TYPE_FPSQRTDBL:
4885 /* An ALU flop can't be issued until two cycles before a
4886 preceding divide or sqrt operation has finished if
4887 the target of the ALU flop is any of the sources
4888 (or destination) of the divide or sqrt operation. */
4889 return insn_default_latency (dep_insn) - 2;
4891 default:
4892 return 0;
4897 /* For other anti dependencies, the cost is 0. */
4898 return 0;
4900 case REG_DEP_OUTPUT:
4901 /* Output dependency; DEP_INSN writes a register that INSN writes some
4902 cycles later. */
4903 if (attr_type == TYPE_FPLOAD)
4905 rtx pat = PATTERN (insn);
4906 rtx dep_pat = PATTERN (dep_insn);
4907 if (GET_CODE (pat) == PARALLEL)
4909 /* This happens for the fldXs,mb patterns. */
4910 pat = XVECEXP (pat, 0, 0);
4912 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4913 /* If this happens, we have to extend this to schedule
4914 optimally. Return 0 for now. */
4915 return 0;
4917 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4919 if (! recog_memoized (dep_insn))
4920 return 0;
4921 switch (get_attr_type (dep_insn))
4923 case TYPE_FPALU:
4924 case TYPE_FPMULSGL:
4925 case TYPE_FPMULDBL:
4926 case TYPE_FPDIVSGL:
4927 case TYPE_FPDIVDBL:
4928 case TYPE_FPSQRTSGL:
4929 case TYPE_FPSQRTDBL:
4930 /* A fpload can't be issued until one cycle before a
4931 preceding arithmetic operation has finished if
4932 the target of the fpload is the destination of the
4933 arithmetic operation.
4935 Exception: For PA7100LC, PA7200 and PA7300, the cost
4936 is 3 cycles, unless they bundle together. We also
4937 pay the penalty if the second insn is a fpload. */
4938 return insn_default_latency (dep_insn) - 1;
4940 default:
4941 return 0;
4945 else if (attr_type == TYPE_FPALU)
4947 rtx pat = PATTERN (insn);
4948 rtx dep_pat = PATTERN (dep_insn);
4949 if (GET_CODE (pat) == PARALLEL)
4951 /* This happens for the fldXs,mb patterns. */
4952 pat = XVECEXP (pat, 0, 0);
4954 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4955 /* If this happens, we have to extend this to schedule
4956 optimally. Return 0 for now. */
4957 return 0;
4959 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4961 if (! recog_memoized (dep_insn))
4962 return 0;
4963 switch (get_attr_type (dep_insn))
4965 case TYPE_FPDIVSGL:
4966 case TYPE_FPDIVDBL:
4967 case TYPE_FPSQRTSGL:
4968 case TYPE_FPSQRTDBL:
4969 /* An ALU flop can't be issued until two cycles before a
4970 preceding divide or sqrt operation has finished if
4971 the target of the ALU flop is also the target of
4972 the divide or sqrt operation. */
4973 return insn_default_latency (dep_insn) - 2;
4975 default:
4976 return 0;
4981 /* For other output dependencies, the cost is 0. */
4982 return 0;
4984 default:
4985 gcc_unreachable ();
4989 /* Adjust scheduling priorities. We use this to try and keep addil
4990 and the next use of %r1 close together. */
4991 static int
4992 pa_adjust_priority (rtx_insn *insn, int priority)
4994 rtx set = single_set (insn);
4995 rtx src, dest;
4996 if (set)
4998 src = SET_SRC (set);
4999 dest = SET_DEST (set);
5000 if (GET_CODE (src) == LO_SUM
5001 && symbolic_operand (XEXP (src, 1), VOIDmode)
5002 && ! read_only_operand (XEXP (src, 1), VOIDmode))
5003 priority >>= 3;
5005 else if (GET_CODE (src) == MEM
5006 && GET_CODE (XEXP (src, 0)) == LO_SUM
5007 && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
5008 && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
5009 priority >>= 1;
5011 else if (GET_CODE (dest) == MEM
5012 && GET_CODE (XEXP (dest, 0)) == LO_SUM
5013 && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
5014 && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
5015 priority >>= 3;
5017 return priority;
5020 /* The 700 can only issue a single insn at a time.
5021 The 7XXX processors can issue two insns at a time.
5022 The 8000 can issue 4 insns at a time. */
5023 static int
5024 pa_issue_rate (void)
5026 switch (pa_cpu)
5028 case PROCESSOR_700: return 1;
5029 case PROCESSOR_7100: return 2;
5030 case PROCESSOR_7100LC: return 2;
5031 case PROCESSOR_7200: return 2;
5032 case PROCESSOR_7300: return 2;
5033 case PROCESSOR_8000: return 4;
5035 default:
5036 gcc_unreachable ();
5042 /* Return any length plus adjustment needed by INSN which already has
5043 its length computed as LENGTH. Return LENGTH if no adjustment is
5044 necessary.
5046 Also compute the length of an inline block move here as it is too
5047 complicated to express as a length attribute in pa.md. */
5049 pa_adjust_insn_length (rtx_insn *insn, int length)
5051 rtx pat = PATTERN (insn);
5053 /* If length is negative or undefined, provide initial length. */
5054 if ((unsigned int) length >= INT_MAX)
5056 if (GET_CODE (pat) == SEQUENCE)
5057 insn = as_a <rtx_insn *> (XVECEXP (pat, 0, 0));
5059 switch (get_attr_type (insn))
5061 case TYPE_MILLI:
5062 length = pa_attr_length_millicode_call (insn);
5063 break;
5064 case TYPE_CALL:
5065 length = pa_attr_length_call (insn, 0);
5066 break;
5067 case TYPE_SIBCALL:
5068 length = pa_attr_length_call (insn, 1);
5069 break;
5070 case TYPE_DYNCALL:
5071 length = pa_attr_length_indirect_call (insn);
5072 break;
5073 case TYPE_SH_FUNC_ADRS:
5074 length = pa_attr_length_millicode_call (insn) + 20;
5075 break;
5076 default:
5077 gcc_unreachable ();
5081 /* Block move pattern. */
5082 if (NONJUMP_INSN_P (insn)
5083 && GET_CODE (pat) == PARALLEL
5084 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5085 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5086 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
5087 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
5088 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
5089 length += compute_movmem_length (insn) - 4;
5090 /* Block clear pattern. */
5091 else if (NONJUMP_INSN_P (insn)
5092 && GET_CODE (pat) == PARALLEL
5093 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5094 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5095 && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
5096 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
5097 length += compute_clrmem_length (insn) - 4;
5098 /* Conditional branch with an unfilled delay slot. */
5099 else if (JUMP_P (insn) && ! simplejump_p (insn))
5101 /* Adjust a short backwards conditional with an unfilled delay slot. */
5102 if (GET_CODE (pat) == SET
5103 && length == 4
5104 && JUMP_LABEL (insn) != NULL_RTX
5105 && ! forward_branch_p (insn))
5106 length += 4;
5107 else if (GET_CODE (pat) == PARALLEL
5108 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
5109 && length == 4)
5110 length += 4;
5111 /* Adjust dbra insn with short backwards conditional branch with
5112 unfilled delay slot -- only for case where counter is in a
5113 general register register. */
5114 else if (GET_CODE (pat) == PARALLEL
5115 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
5116 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
5117 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
5118 && length == 4
5119 && ! forward_branch_p (insn))
5120 length += 4;
5122 return length;
5125 /* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook. */
5127 static bool
5128 pa_print_operand_punct_valid_p (unsigned char code)
5130 if (code == '@'
5131 || code == '#'
5132 || code == '*'
5133 || code == '^')
5134 return true;
5136 return false;
5139 /* Print operand X (an rtx) in assembler syntax to file FILE.
5140 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
5141 For `%' followed by punctuation, CODE is the punctuation and X is null. */
5143 void
5144 pa_print_operand (FILE *file, rtx x, int code)
5146 switch (code)
5148 case '#':
5149 /* Output a 'nop' if there's nothing for the delay slot. */
5150 if (dbr_sequence_length () == 0)
5151 fputs ("\n\tnop", file);
5152 return;
5153 case '*':
5154 /* Output a nullification completer if there's nothing for the */
5155 /* delay slot or nullification is requested. */
5156 if (dbr_sequence_length () == 0 ||
5157 (final_sequence &&
5158 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
5159 fputs (",n", file);
5160 return;
5161 case 'R':
5162 /* Print out the second register name of a register pair.
5163 I.e., R (6) => 7. */
5164 fputs (reg_names[REGNO (x) + 1], file);
5165 return;
5166 case 'r':
5167 /* A register or zero. */
5168 if (x == const0_rtx
5169 || (x == CONST0_RTX (DFmode))
5170 || (x == CONST0_RTX (SFmode)))
5172 fputs ("%r0", file);
5173 return;
5175 else
5176 break;
5177 case 'f':
5178 /* A register or zero (floating point). */
5179 if (x == const0_rtx
5180 || (x == CONST0_RTX (DFmode))
5181 || (x == CONST0_RTX (SFmode)))
5183 fputs ("%fr0", file);
5184 return;
5186 else
5187 break;
5188 case 'A':
5190 rtx xoperands[2];
5192 xoperands[0] = XEXP (XEXP (x, 0), 0);
5193 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
5194 pa_output_global_address (file, xoperands[1], 0);
5195 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
5196 return;
5199 case 'C': /* Plain (C)ondition */
5200 case 'X':
5201 switch (GET_CODE (x))
5203 case EQ:
5204 fputs ("=", file); break;
5205 case NE:
5206 fputs ("<>", file); break;
5207 case GT:
5208 fputs (">", file); break;
5209 case GE:
5210 fputs (">=", file); break;
5211 case GEU:
5212 fputs (">>=", file); break;
5213 case GTU:
5214 fputs (">>", file); break;
5215 case LT:
5216 fputs ("<", file); break;
5217 case LE:
5218 fputs ("<=", file); break;
5219 case LEU:
5220 fputs ("<<=", file); break;
5221 case LTU:
5222 fputs ("<<", file); break;
5223 default:
5224 gcc_unreachable ();
5226 return;
5227 case 'N': /* Condition, (N)egated */
5228 switch (GET_CODE (x))
5230 case EQ:
5231 fputs ("<>", file); break;
5232 case NE:
5233 fputs ("=", file); break;
5234 case GT:
5235 fputs ("<=", file); break;
5236 case GE:
5237 fputs ("<", file); break;
5238 case GEU:
5239 fputs ("<<", file); break;
5240 case GTU:
5241 fputs ("<<=", file); break;
5242 case LT:
5243 fputs (">=", file); break;
5244 case LE:
5245 fputs (">", file); break;
5246 case LEU:
5247 fputs (">>", file); break;
5248 case LTU:
5249 fputs (">>=", file); break;
5250 default:
5251 gcc_unreachable ();
5253 return;
5254 /* For floating point comparisons. Note that the output
5255 predicates are the complement of the desired mode. The
5256 conditions for GT, GE, LT, LE and LTGT cause an invalid
5257 operation exception if the result is unordered and this
5258 exception is enabled in the floating-point status register. */
5259 case 'Y':
5260 switch (GET_CODE (x))
5262 case EQ:
5263 fputs ("!=", file); break;
5264 case NE:
5265 fputs ("=", file); break;
5266 case GT:
5267 fputs ("!>", file); break;
5268 case GE:
5269 fputs ("!>=", file); break;
5270 case LT:
5271 fputs ("!<", file); break;
5272 case LE:
5273 fputs ("!<=", file); break;
5274 case LTGT:
5275 fputs ("!<>", file); break;
5276 case UNLE:
5277 fputs ("!?<=", file); break;
5278 case UNLT:
5279 fputs ("!?<", file); break;
5280 case UNGE:
5281 fputs ("!?>=", file); break;
5282 case UNGT:
5283 fputs ("!?>", file); break;
5284 case UNEQ:
5285 fputs ("!?=", file); break;
5286 case UNORDERED:
5287 fputs ("!?", file); break;
5288 case ORDERED:
5289 fputs ("?", file); break;
5290 default:
5291 gcc_unreachable ();
5293 return;
5294 case 'S': /* Condition, operands are (S)wapped. */
5295 switch (GET_CODE (x))
5297 case EQ:
5298 fputs ("=", file); break;
5299 case NE:
5300 fputs ("<>", file); break;
5301 case GT:
5302 fputs ("<", file); break;
5303 case GE:
5304 fputs ("<=", file); break;
5305 case GEU:
5306 fputs ("<<=", file); break;
5307 case GTU:
5308 fputs ("<<", file); break;
5309 case LT:
5310 fputs (">", file); break;
5311 case LE:
5312 fputs (">=", file); break;
5313 case LEU:
5314 fputs (">>=", file); break;
5315 case LTU:
5316 fputs (">>", file); break;
5317 default:
5318 gcc_unreachable ();
5320 return;
5321 case 'B': /* Condition, (B)oth swapped and negate. */
5322 switch (GET_CODE (x))
5324 case EQ:
5325 fputs ("<>", file); break;
5326 case NE:
5327 fputs ("=", file); break;
5328 case GT:
5329 fputs (">=", file); break;
5330 case GE:
5331 fputs (">", file); break;
5332 case GEU:
5333 fputs (">>", file); break;
5334 case GTU:
5335 fputs (">>=", file); break;
5336 case LT:
5337 fputs ("<=", file); break;
5338 case LE:
5339 fputs ("<", file); break;
5340 case LEU:
5341 fputs ("<<", file); break;
5342 case LTU:
5343 fputs ("<<=", file); break;
5344 default:
5345 gcc_unreachable ();
5347 return;
5348 case 'k':
5349 gcc_assert (GET_CODE (x) == CONST_INT);
5350 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
5351 return;
5352 case 'Q':
5353 gcc_assert (GET_CODE (x) == CONST_INT);
5354 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
5355 return;
5356 case 'L':
5357 gcc_assert (GET_CODE (x) == CONST_INT);
5358 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
5359 return;
5360 case 'o':
5361 gcc_assert (GET_CODE (x) == CONST_INT
5362 && (INTVAL (x) == 1 || INTVAL (x) == 2 || INTVAL (x) == 3));
5363 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5364 return;
5365 case 'O':
5366 gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
5367 fprintf (file, "%d", exact_log2 (INTVAL (x)));
5368 return;
5369 case 'p':
5370 gcc_assert (GET_CODE (x) == CONST_INT);
5371 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
5372 return;
5373 case 'P':
5374 gcc_assert (GET_CODE (x) == CONST_INT);
5375 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
5376 return;
5377 case 'I':
5378 if (GET_CODE (x) == CONST_INT)
5379 fputs ("i", file);
5380 return;
5381 case 'M':
5382 case 'F':
5383 switch (GET_CODE (XEXP (x, 0)))
5385 case PRE_DEC:
5386 case PRE_INC:
5387 if (ASSEMBLER_DIALECT == 0)
5388 fputs ("s,mb", file);
5389 else
5390 fputs (",mb", file);
5391 break;
5392 case POST_DEC:
5393 case POST_INC:
5394 if (ASSEMBLER_DIALECT == 0)
5395 fputs ("s,ma", file);
5396 else
5397 fputs (",ma", file);
5398 break;
5399 case PLUS:
5400 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5401 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5403 if (ASSEMBLER_DIALECT == 0)
5404 fputs ("x", file);
5406 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5407 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5409 if (ASSEMBLER_DIALECT == 0)
5410 fputs ("x,s", file);
5411 else
5412 fputs (",s", file);
5414 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
5415 fputs ("s", file);
5416 break;
5417 default:
5418 if (code == 'F' && ASSEMBLER_DIALECT == 0)
5419 fputs ("s", file);
5420 break;
5422 return;
5423 case 'G':
5424 pa_output_global_address (file, x, 0);
5425 return;
5426 case 'H':
5427 pa_output_global_address (file, x, 1);
5428 return;
5429 case 0: /* Don't do anything special */
5430 break;
5431 case 'Z':
5433 unsigned op[3];
5434 compute_zdepwi_operands (INTVAL (x), op);
5435 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5436 return;
5438 case 'z':
5440 unsigned op[3];
5441 compute_zdepdi_operands (INTVAL (x), op);
5442 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5443 return;
5445 case 'c':
5446 /* We can get here from a .vtable_inherit due to our
5447 CONSTANT_ADDRESS_P rejecting perfectly good constant
5448 addresses. */
5449 break;
5450 default:
5451 gcc_unreachable ();
5453 if (GET_CODE (x) == REG)
5455 fputs (reg_names [REGNO (x)], file);
5456 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5458 fputs ("R", file);
5459 return;
5461 if (FP_REG_P (x)
5462 && GET_MODE_SIZE (GET_MODE (x)) <= 4
5463 && (REGNO (x) & 1) == 0)
5464 fputs ("L", file);
5466 else if (GET_CODE (x) == MEM)
5468 int size = GET_MODE_SIZE (GET_MODE (x));
5469 rtx base = NULL_RTX;
5470 switch (GET_CODE (XEXP (x, 0)))
5472 case PRE_DEC:
5473 case POST_DEC:
5474 base = XEXP (XEXP (x, 0), 0);
5475 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5476 break;
5477 case PRE_INC:
5478 case POST_INC:
5479 base = XEXP (XEXP (x, 0), 0);
5480 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5481 break;
5482 case PLUS:
5483 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5484 fprintf (file, "%s(%s)",
5485 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5486 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5487 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5488 fprintf (file, "%s(%s)",
5489 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5490 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5491 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5492 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5494 /* Because the REG_POINTER flag can get lost during reload,
5495 pa_legitimate_address_p canonicalizes the order of the
5496 index and base registers in the combined move patterns. */
5497 rtx base = XEXP (XEXP (x, 0), 1);
5498 rtx index = XEXP (XEXP (x, 0), 0);
5500 fprintf (file, "%s(%s)",
5501 reg_names [REGNO (index)], reg_names [REGNO (base)]);
5503 else
5504 output_address (GET_MODE (x), XEXP (x, 0));
5505 break;
5506 default:
5507 output_address (GET_MODE (x), XEXP (x, 0));
5508 break;
5511 else
5512 output_addr_const (file, x);
5515 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
5517 void
5518 pa_output_global_address (FILE *file, rtx x, int round_constant)
5521 /* Imagine (high (const (plus ...))). */
5522 if (GET_CODE (x) == HIGH)
5523 x = XEXP (x, 0);
5525 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5526 output_addr_const (file, x);
5527 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5529 output_addr_const (file, x);
5530 fputs ("-$global$", file);
5532 else if (GET_CODE (x) == CONST)
5534 const char *sep = "";
5535 int offset = 0; /* assembler wants -$global$ at end */
5536 rtx base = NULL_RTX;
5538 switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
5540 case LABEL_REF:
5541 case SYMBOL_REF:
5542 base = XEXP (XEXP (x, 0), 0);
5543 output_addr_const (file, base);
5544 break;
5545 case CONST_INT:
5546 offset = INTVAL (XEXP (XEXP (x, 0), 0));
5547 break;
5548 default:
5549 gcc_unreachable ();
5552 switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
5554 case LABEL_REF:
5555 case SYMBOL_REF:
5556 base = XEXP (XEXP (x, 0), 1);
5557 output_addr_const (file, base);
5558 break;
5559 case CONST_INT:
5560 offset = INTVAL (XEXP (XEXP (x, 0), 1));
5561 break;
5562 default:
5563 gcc_unreachable ();
5566 /* How bogus. The compiler is apparently responsible for
5567 rounding the constant if it uses an LR field selector.
5569 The linker and/or assembler seem a better place since
5570 they have to do this kind of thing already.
5572 If we fail to do this, HP's optimizing linker may eliminate
5573 an addil, but not update the ldw/stw/ldo instruction that
5574 uses the result of the addil. */
5575 if (round_constant)
5576 offset = ((offset + 0x1000) & ~0x1fff);
5578 switch (GET_CODE (XEXP (x, 0)))
5580 case PLUS:
5581 if (offset < 0)
5583 offset = -offset;
5584 sep = "-";
5586 else
5587 sep = "+";
5588 break;
5590 case MINUS:
5591 gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5592 sep = "-";
5593 break;
5595 default:
5596 gcc_unreachable ();
5599 if (!read_only_operand (base, VOIDmode) && !flag_pic)
5600 fputs ("-$global$", file);
5601 if (offset)
5602 fprintf (file, "%s%d", sep, offset);
5604 else
5605 output_addr_const (file, x);
5608 /* Output boilerplate text to appear at the beginning of the file.
5609 There are several possible versions. */
5610 #define aputs(x) fputs(x, asm_out_file)
5611 static inline void
5612 pa_file_start_level (void)
5614 if (TARGET_64BIT)
5615 aputs ("\t.LEVEL 2.0w\n");
5616 else if (TARGET_PA_20)
5617 aputs ("\t.LEVEL 2.0\n");
5618 else if (TARGET_PA_11)
5619 aputs ("\t.LEVEL 1.1\n");
5620 else
5621 aputs ("\t.LEVEL 1.0\n");
5624 static inline void
5625 pa_file_start_space (int sortspace)
5627 aputs ("\t.SPACE $PRIVATE$");
5628 if (sortspace)
5629 aputs (",SORT=16");
5630 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31");
5631 if (flag_tm)
5632 aputs ("\n\t.SUBSPA $TM_CLONE_TABLE$,QUAD=1,ALIGN=8,ACCESS=31");
5633 aputs ("\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5634 "\n\t.SPACE $TEXT$");
5635 if (sortspace)
5636 aputs (",SORT=8");
5637 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5638 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5641 static inline void
5642 pa_file_start_file (int want_version)
5644 if (write_symbols != NO_DEBUG)
5646 output_file_directive (asm_out_file, main_input_filename);
5647 if (want_version)
5648 aputs ("\t.version\t\"01.01\"\n");
5652 static inline void
5653 pa_file_start_mcount (const char *aswhat)
5655 if (profile_flag)
5656 fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5659 static void
5660 pa_elf_file_start (void)
5662 pa_file_start_level ();
5663 pa_file_start_mcount ("ENTRY");
5664 pa_file_start_file (0);
5667 static void
5668 pa_som_file_start (void)
5670 pa_file_start_level ();
5671 pa_file_start_space (0);
5672 aputs ("\t.IMPORT $global$,DATA\n"
5673 "\t.IMPORT $$dyncall,MILLICODE\n");
5674 pa_file_start_mcount ("CODE");
5675 pa_file_start_file (0);
5678 static void
5679 pa_linux_file_start (void)
5681 pa_file_start_file (1);
5682 pa_file_start_level ();
5683 pa_file_start_mcount ("CODE");
5686 static void
5687 pa_hpux64_gas_file_start (void)
5689 pa_file_start_level ();
5690 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5691 if (profile_flag)
5692 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5693 #endif
5694 pa_file_start_file (1);
5697 static void
5698 pa_hpux64_hpas_file_start (void)
5700 pa_file_start_level ();
5701 pa_file_start_space (1);
5702 pa_file_start_mcount ("CODE");
5703 pa_file_start_file (0);
5705 #undef aputs
5707 /* Search the deferred plabel list for SYMBOL and return its internal
5708 label. If an entry for SYMBOL is not found, a new entry is created. */
5711 pa_get_deferred_plabel (rtx symbol)
5713 const char *fname = XSTR (symbol, 0);
5714 size_t i;
5716 /* See if we have already put this function on the list of deferred
5717 plabels. This list is generally small, so a liner search is not
5718 too ugly. If it proves too slow replace it with something faster. */
5719 for (i = 0; i < n_deferred_plabels; i++)
5720 if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
5721 break;
5723 /* If the deferred plabel list is empty, or this entry was not found
5724 on the list, create a new entry on the list. */
5725 if (deferred_plabels == NULL || i == n_deferred_plabels)
5727 tree id;
5729 if (deferred_plabels == 0)
5730 deferred_plabels = ggc_alloc<deferred_plabel> ();
5731 else
5732 deferred_plabels = GGC_RESIZEVEC (struct deferred_plabel,
5733 deferred_plabels,
5734 n_deferred_plabels + 1);
5736 i = n_deferred_plabels++;
5737 deferred_plabels[i].internal_label = gen_label_rtx ();
5738 deferred_plabels[i].symbol = symbol;
5740 /* Gross. We have just implicitly taken the address of this
5741 function. Mark it in the same manner as assemble_name. */
5742 id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5743 if (id)
5744 mark_referenced (id);
5747 return deferred_plabels[i].internal_label;
5750 static void
5751 output_deferred_plabels (void)
5753 size_t i;
5755 /* If we have some deferred plabels, then we need to switch into the
5756 data or readonly data section, and align it to a 4 byte boundary
5757 before outputting the deferred plabels. */
5758 if (n_deferred_plabels)
5760 switch_to_section (flag_pic ? data_section : readonly_data_section);
5761 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5764 /* Now output the deferred plabels. */
5765 for (i = 0; i < n_deferred_plabels; i++)
5767 targetm.asm_out.internal_label (asm_out_file, "L",
5768 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5769 assemble_integer (deferred_plabels[i].symbol,
5770 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5774 /* Initialize optabs to point to emulation routines. */
5776 static void
5777 pa_init_libfuncs (void)
5779 if (HPUX_LONG_DOUBLE_LIBRARY)
5781 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5782 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5783 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5784 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5785 set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5786 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5787 set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5788 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5789 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5791 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5792 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5793 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5794 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5795 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5796 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5797 set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5799 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5800 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5801 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5802 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5804 set_conv_libfunc (sfix_optab, SImode, TFmode,
5805 TARGET_64BIT ? "__U_Qfcnvfxt_quad_to_sgl"
5806 : "_U_Qfcnvfxt_quad_to_sgl");
5807 set_conv_libfunc (sfix_optab, DImode, TFmode,
5808 "_U_Qfcnvfxt_quad_to_dbl");
5809 set_conv_libfunc (ufix_optab, SImode, TFmode,
5810 "_U_Qfcnvfxt_quad_to_usgl");
5811 set_conv_libfunc (ufix_optab, DImode, TFmode,
5812 "_U_Qfcnvfxt_quad_to_udbl");
5814 set_conv_libfunc (sfloat_optab, TFmode, SImode,
5815 "_U_Qfcnvxf_sgl_to_quad");
5816 set_conv_libfunc (sfloat_optab, TFmode, DImode,
5817 "_U_Qfcnvxf_dbl_to_quad");
5818 set_conv_libfunc (ufloat_optab, TFmode, SImode,
5819 "_U_Qfcnvxf_usgl_to_quad");
5820 set_conv_libfunc (ufloat_optab, TFmode, DImode,
5821 "_U_Qfcnvxf_udbl_to_quad");
5824 if (TARGET_SYNC_LIBCALL)
5825 init_sync_libfuncs (8);
5828 /* HP's millicode routines mean something special to the assembler.
5829 Keep track of which ones we have used. */
5831 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5832 static void import_milli (enum millicodes);
5833 static char imported[(int) end1000];
5834 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5835 static const char import_string[] = ".IMPORT $$....,MILLICODE";
5836 #define MILLI_START 10
5838 static void
5839 import_milli (enum millicodes code)
5841 char str[sizeof (import_string)];
5843 if (!imported[(int) code])
5845 imported[(int) code] = 1;
5846 strcpy (str, import_string);
5847 strncpy (str + MILLI_START, milli_names[(int) code], 4);
5848 output_asm_insn (str, 0);
5852 /* The register constraints have put the operands and return value in
5853 the proper registers. */
5855 const char *
5856 pa_output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx_insn *insn)
5858 import_milli (mulI);
5859 return pa_output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5862 /* Emit the rtl for doing a division by a constant. */
5864 /* Do magic division millicodes exist for this value? */
5865 const int pa_magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5867 /* We'll use an array to keep track of the magic millicodes and
5868 whether or not we've used them already. [n][0] is signed, [n][1] is
5869 unsigned. */
5871 static int div_milli[16][2];
5874 pa_emit_hpdiv_const (rtx *operands, int unsignedp)
5876 if (GET_CODE (operands[2]) == CONST_INT
5877 && INTVAL (operands[2]) > 0
5878 && INTVAL (operands[2]) < 16
5879 && pa_magic_milli[INTVAL (operands[2])])
5881 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5883 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
5884 emit
5885 (gen_rtx_PARALLEL
5886 (VOIDmode,
5887 gen_rtvec (6, gen_rtx_SET (gen_rtx_REG (SImode, 29),
5888 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5889 SImode,
5890 gen_rtx_REG (SImode, 26),
5891 operands[2])),
5892 gen_rtx_CLOBBER (VOIDmode, operands[4]),
5893 gen_rtx_CLOBBER (VOIDmode, operands[3]),
5894 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5895 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
5896 gen_rtx_CLOBBER (VOIDmode, ret))));
5897 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
5898 return 1;
5900 return 0;
5903 const char *
5904 pa_output_div_insn (rtx *operands, int unsignedp, rtx_insn *insn)
5906 int divisor;
5908 /* If the divisor is a constant, try to use one of the special
5909 opcodes .*/
5910 if (GET_CODE (operands[0]) == CONST_INT)
5912 static char buf[100];
5913 divisor = INTVAL (operands[0]);
5914 if (!div_milli[divisor][unsignedp])
5916 div_milli[divisor][unsignedp] = 1;
5917 if (unsignedp)
5918 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
5919 else
5920 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
5922 if (unsignedp)
5924 sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
5925 INTVAL (operands[0]));
5926 return pa_output_millicode_call (insn,
5927 gen_rtx_SYMBOL_REF (SImode, buf));
5929 else
5931 sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
5932 INTVAL (operands[0]));
5933 return pa_output_millicode_call (insn,
5934 gen_rtx_SYMBOL_REF (SImode, buf));
5937 /* Divisor isn't a special constant. */
5938 else
5940 if (unsignedp)
5942 import_milli (divU);
5943 return pa_output_millicode_call (insn,
5944 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
5946 else
5948 import_milli (divI);
5949 return pa_output_millicode_call (insn,
5950 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
5955 /* Output a $$rem millicode to do mod. */
5957 const char *
5958 pa_output_mod_insn (int unsignedp, rtx_insn *insn)
5960 if (unsignedp)
5962 import_milli (remU);
5963 return pa_output_millicode_call (insn,
5964 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
5966 else
5968 import_milli (remI);
5969 return pa_output_millicode_call (insn,
5970 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
5974 void
5975 pa_output_arg_descriptor (rtx_insn *call_insn)
5977 const char *arg_regs[4];
5978 machine_mode arg_mode;
5979 rtx link;
5980 int i, output_flag = 0;
5981 int regno;
5983 /* We neither need nor want argument location descriptors for the
5984 64bit runtime environment or the ELF32 environment. */
5985 if (TARGET_64BIT || TARGET_ELF32)
5986 return;
5988 for (i = 0; i < 4; i++)
5989 arg_regs[i] = 0;
5991 /* Specify explicitly that no argument relocations should take place
5992 if using the portable runtime calling conventions. */
5993 if (TARGET_PORTABLE_RUNTIME)
5995 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5996 asm_out_file);
5997 return;
6000 gcc_assert (CALL_P (call_insn));
6001 for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
6002 link; link = XEXP (link, 1))
6004 rtx use = XEXP (link, 0);
6006 if (! (GET_CODE (use) == USE
6007 && GET_CODE (XEXP (use, 0)) == REG
6008 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
6009 continue;
6011 arg_mode = GET_MODE (XEXP (use, 0));
6012 regno = REGNO (XEXP (use, 0));
6013 if (regno >= 23 && regno <= 26)
6015 arg_regs[26 - regno] = "GR";
6016 if (arg_mode == DImode)
6017 arg_regs[25 - regno] = "GR";
6019 else if (regno >= 32 && regno <= 39)
6021 if (arg_mode == SFmode)
6022 arg_regs[(regno - 32) / 2] = "FR";
6023 else
6025 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
6026 arg_regs[(regno - 34) / 2] = "FR";
6027 arg_regs[(regno - 34) / 2 + 1] = "FU";
6028 #else
6029 arg_regs[(regno - 34) / 2] = "FU";
6030 arg_regs[(regno - 34) / 2 + 1] = "FR";
6031 #endif
6035 fputs ("\t.CALL ", asm_out_file);
6036 for (i = 0; i < 4; i++)
6038 if (arg_regs[i])
6040 if (output_flag++)
6041 fputc (',', asm_out_file);
6042 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
6045 fputc ('\n', asm_out_file);
6048 /* Inform reload about cases where moving X with a mode MODE to or from
6049 a register in RCLASS requires an extra scratch or immediate register.
6050 Return the class needed for the immediate register. */
6052 static reg_class_t
6053 pa_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
6054 machine_mode mode, secondary_reload_info *sri)
6056 int regno;
6057 enum reg_class rclass = (enum reg_class) rclass_i;
6059 /* Handle the easy stuff first. */
6060 if (rclass == R1_REGS)
6061 return NO_REGS;
6063 if (REG_P (x))
6065 regno = REGNO (x);
6066 if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
6067 return NO_REGS;
6069 else
6070 regno = -1;
6072 /* If we have something like (mem (mem (...)), we can safely assume the
6073 inner MEM will end up in a general register after reloading, so there's
6074 no need for a secondary reload. */
6075 if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
6076 return NO_REGS;
6078 /* Trying to load a constant into a FP register during PIC code
6079 generation requires %r1 as a scratch register. For float modes,
6080 the only legitimate constant is CONST0_RTX. However, there are
6081 a few patterns that accept constant double operands. */
6082 if (flag_pic
6083 && FP_REG_CLASS_P (rclass)
6084 && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
6086 switch (mode)
6088 case E_SImode:
6089 sri->icode = CODE_FOR_reload_insi_r1;
6090 break;
6092 case E_DImode:
6093 sri->icode = CODE_FOR_reload_indi_r1;
6094 break;
6096 case E_SFmode:
6097 sri->icode = CODE_FOR_reload_insf_r1;
6098 break;
6100 case E_DFmode:
6101 sri->icode = CODE_FOR_reload_indf_r1;
6102 break;
6104 default:
6105 gcc_unreachable ();
6107 return NO_REGS;
6110 /* Secondary reloads of symbolic expressions require %r1 as a scratch
6111 register when we're generating PIC code or when the operand isn't
6112 readonly. */
6113 if (pa_symbolic_expression_p (x))
6115 if (GET_CODE (x) == HIGH)
6116 x = XEXP (x, 0);
6118 if (flag_pic || !read_only_operand (x, VOIDmode))
6120 switch (mode)
6122 case E_SImode:
6123 sri->icode = CODE_FOR_reload_insi_r1;
6124 break;
6126 case E_DImode:
6127 sri->icode = CODE_FOR_reload_indi_r1;
6128 break;
6130 default:
6131 gcc_unreachable ();
6133 return NO_REGS;
6137 /* Profiling showed the PA port spends about 1.3% of its compilation
6138 time in true_regnum from calls inside pa_secondary_reload_class. */
6139 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
6140 regno = true_regnum (x);
6142 /* Handle reloads for floating point loads and stores. */
6143 if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
6144 && FP_REG_CLASS_P (rclass))
6146 if (MEM_P (x))
6148 x = XEXP (x, 0);
6150 /* We don't need a secondary reload for indexed memory addresses.
6152 When INT14_OK_STRICT is true, it might appear that we could
6153 directly allow register indirect memory addresses. However,
6154 this doesn't work because we don't support SUBREGs in
6155 floating-point register copies and reload doesn't tell us
6156 when it's going to use a SUBREG. */
6157 if (IS_INDEX_ADDR_P (x))
6158 return NO_REGS;
6161 /* Request a secondary reload with a general scratch register
6162 for everything else. ??? Could symbolic operands be handled
6163 directly when generating non-pic PA 2.0 code? */
6164 sri->icode = (in_p
6165 ? direct_optab_handler (reload_in_optab, mode)
6166 : direct_optab_handler (reload_out_optab, mode));
6167 return NO_REGS;
6170 /* A SAR<->FP register copy requires an intermediate general register
6171 and secondary memory. We need a secondary reload with a general
6172 scratch register for spills. */
6173 if (rclass == SHIFT_REGS)
6175 /* Handle spill. */
6176 if (regno >= FIRST_PSEUDO_REGISTER || regno < 0)
6178 sri->icode = (in_p
6179 ? direct_optab_handler (reload_in_optab, mode)
6180 : direct_optab_handler (reload_out_optab, mode));
6181 return NO_REGS;
6184 /* Handle FP copy. */
6185 if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))
6186 return GENERAL_REGS;
6189 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
6190 && REGNO_REG_CLASS (regno) == SHIFT_REGS
6191 && FP_REG_CLASS_P (rclass))
6192 return GENERAL_REGS;
6194 return NO_REGS;
6197 /* Implement TARGET_SECONDARY_MEMORY_NEEDED. */
6199 static bool
6200 pa_secondary_memory_needed (machine_mode mode ATTRIBUTE_UNUSED,
6201 reg_class_t class1 ATTRIBUTE_UNUSED,
6202 reg_class_t class2 ATTRIBUTE_UNUSED)
6204 #ifdef PA_SECONDARY_MEMORY_NEEDED
6205 return PA_SECONDARY_MEMORY_NEEDED (mode, class1, class2);
6206 #else
6207 return false;
6208 #endif
6211 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY. The argument pointer
6212 is only marked as live on entry by df-scan when it is a fixed
6213 register. It isn't a fixed register in the 64-bit runtime,
6214 so we need to mark it here. */
6216 static void
6217 pa_extra_live_on_entry (bitmap regs)
6219 if (TARGET_64BIT)
6220 bitmap_set_bit (regs, ARG_POINTER_REGNUM);
6223 /* Implement EH_RETURN_HANDLER_RTX. The MEM needs to be volatile
6224 to prevent it from being deleted. */
6227 pa_eh_return_handler_rtx (void)
6229 rtx tmp;
6231 tmp = gen_rtx_PLUS (word_mode, hard_frame_pointer_rtx,
6232 TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20));
6233 tmp = gen_rtx_MEM (word_mode, tmp);
6234 tmp->volatil = 1;
6235 return tmp;
6238 /* In the 32-bit runtime, arguments larger than eight bytes are passed
6239 by invisible reference. As a GCC extension, we also pass anything
6240 with a zero or variable size by reference.
6242 The 64-bit runtime does not describe passing any types by invisible
6243 reference. The internals of GCC can't currently handle passing
6244 empty structures, and zero or variable length arrays when they are
6245 not passed entirely on the stack or by reference. Thus, as a GCC
6246 extension, we pass these types by reference. The HP compiler doesn't
6247 support these types, so hopefully there shouldn't be any compatibility
6248 issues. This may have to be revisited when HP releases a C99 compiler
6249 or updates the ABI. */
6251 static bool
6252 pa_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
6253 machine_mode mode, const_tree type,
6254 bool named ATTRIBUTE_UNUSED)
6256 HOST_WIDE_INT size;
6258 if (type)
6259 size = int_size_in_bytes (type);
6260 else
6261 size = GET_MODE_SIZE (mode);
6263 if (TARGET_64BIT)
6264 return size <= 0;
6265 else
6266 return size <= 0 || size > 8;
6269 /* Implement TARGET_FUNCTION_ARG_PADDING. */
6271 static pad_direction
6272 pa_function_arg_padding (machine_mode mode, const_tree type)
6274 if (mode == BLKmode
6275 || (TARGET_64BIT
6276 && type
6277 && (AGGREGATE_TYPE_P (type)
6278 || TREE_CODE (type) == COMPLEX_TYPE
6279 || TREE_CODE (type) == VECTOR_TYPE)))
6281 /* Return PAD_NONE if justification is not required. */
6282 if (type
6283 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
6284 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
6285 return PAD_NONE;
6287 /* The directions set here are ignored when a BLKmode argument larger
6288 than a word is placed in a register. Different code is used for
6289 the stack and registers. This makes it difficult to have a
6290 consistent data representation for both the stack and registers.
6291 For both runtimes, the justification and padding for arguments on
6292 the stack and in registers should be identical. */
6293 if (TARGET_64BIT)
6294 /* The 64-bit runtime specifies left justification for aggregates. */
6295 return PAD_UPWARD;
6296 else
6297 /* The 32-bit runtime architecture specifies right justification.
6298 When the argument is passed on the stack, the argument is padded
6299 with garbage on the left. The HP compiler pads with zeros. */
6300 return PAD_DOWNWARD;
6303 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
6304 return PAD_DOWNWARD;
6305 else
6306 return PAD_NONE;
6310 /* Do what is necessary for `va_start'. We look at the current function
6311 to determine if stdargs or varargs is used and fill in an initial
6312 va_list. A pointer to this constructor is returned. */
6314 static rtx
6315 hppa_builtin_saveregs (void)
6317 rtx offset, dest;
6318 tree fntype = TREE_TYPE (current_function_decl);
6319 int argadj = ((!stdarg_p (fntype))
6320 ? UNITS_PER_WORD : 0);
6322 if (argadj)
6323 offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, argadj);
6324 else
6325 offset = crtl->args.arg_offset_rtx;
6327 if (TARGET_64BIT)
6329 int i, off;
6331 /* Adjust for varargs/stdarg differences. */
6332 if (argadj)
6333 offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, -argadj);
6334 else
6335 offset = crtl->args.arg_offset_rtx;
6337 /* We need to save %r26 .. %r19 inclusive starting at offset -64
6338 from the incoming arg pointer and growing to larger addresses. */
6339 for (i = 26, off = -64; i >= 19; i--, off += 8)
6340 emit_move_insn (gen_rtx_MEM (word_mode,
6341 plus_constant (Pmode,
6342 arg_pointer_rtx, off)),
6343 gen_rtx_REG (word_mode, i));
6345 /* The incoming args pointer points just beyond the flushback area;
6346 normally this is not a serious concern. However, when we are doing
6347 varargs/stdargs we want to make the arg pointer point to the start
6348 of the incoming argument area. */
6349 emit_move_insn (virtual_incoming_args_rtx,
6350 plus_constant (Pmode, arg_pointer_rtx, -64));
6352 /* Now return a pointer to the first anonymous argument. */
6353 return copy_to_reg (expand_binop (Pmode, add_optab,
6354 virtual_incoming_args_rtx,
6355 offset, 0, 0, OPTAB_LIB_WIDEN));
6358 /* Store general registers on the stack. */
6359 dest = gen_rtx_MEM (BLKmode,
6360 plus_constant (Pmode, crtl->args.internal_arg_pointer,
6361 -16));
6362 set_mem_alias_set (dest, get_varargs_alias_set ());
6363 set_mem_align (dest, BITS_PER_WORD);
6364 move_block_from_reg (23, dest, 4);
6366 /* move_block_from_reg will emit code to store the argument registers
6367 individually as scalar stores.
6369 However, other insns may later load from the same addresses for
6370 a structure load (passing a struct to a varargs routine).
6372 The alias code assumes that such aliasing can never happen, so we
6373 have to keep memory referencing insns from moving up beyond the
6374 last argument register store. So we emit a blockage insn here. */
6375 emit_insn (gen_blockage ());
6377 return copy_to_reg (expand_binop (Pmode, add_optab,
6378 crtl->args.internal_arg_pointer,
6379 offset, 0, 0, OPTAB_LIB_WIDEN));
6382 static void
6383 hppa_va_start (tree valist, rtx nextarg)
6385 nextarg = expand_builtin_saveregs ();
6386 std_expand_builtin_va_start (valist, nextarg);
6389 static tree
6390 hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6391 gimple_seq *post_p)
6393 if (TARGET_64BIT)
6395 /* Args grow upward. We can use the generic routines. */
6396 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6398 else /* !TARGET_64BIT */
6400 tree ptr = build_pointer_type (type);
6401 tree valist_type;
6402 tree t, u;
6403 unsigned int size, ofs;
6404 bool indirect;
6406 indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0);
6407 if (indirect)
6409 type = ptr;
6410 ptr = build_pointer_type (type);
6412 size = int_size_in_bytes (type);
6413 valist_type = TREE_TYPE (valist);
6415 /* Args grow down. Not handled by generic routines. */
6417 u = fold_convert (sizetype, size_in_bytes (type));
6418 u = fold_build1 (NEGATE_EXPR, sizetype, u);
6419 t = fold_build_pointer_plus (valist, u);
6421 /* Align to 4 or 8 byte boundary depending on argument size. */
6423 u = build_int_cst (TREE_TYPE (t), (HOST_WIDE_INT)(size > 4 ? -8 : -4));
6424 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u);
6425 t = fold_convert (valist_type, t);
6427 t = build2 (MODIFY_EXPR, valist_type, valist, t);
6429 ofs = (8 - size) % 4;
6430 if (ofs != 0)
6431 t = fold_build_pointer_plus_hwi (t, ofs);
6433 t = fold_convert (ptr, t);
6434 t = build_va_arg_indirect_ref (t);
6436 if (indirect)
6437 t = build_va_arg_indirect_ref (t);
6439 return t;
6443 /* True if MODE is valid for the target. By "valid", we mean able to
6444 be manipulated in non-trivial ways. In particular, this means all
6445 the arithmetic is supported.
6447 Currently, TImode is not valid as the HP 64-bit runtime documentation
6448 doesn't document the alignment and calling conventions for this type.
6449 Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
6450 2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE. */
6452 static bool
6453 pa_scalar_mode_supported_p (scalar_mode mode)
6455 int precision = GET_MODE_PRECISION (mode);
6457 switch (GET_MODE_CLASS (mode))
6459 case MODE_PARTIAL_INT:
6460 case MODE_INT:
6461 if (precision == CHAR_TYPE_SIZE)
6462 return true;
6463 if (precision == SHORT_TYPE_SIZE)
6464 return true;
6465 if (precision == INT_TYPE_SIZE)
6466 return true;
6467 if (precision == LONG_TYPE_SIZE)
6468 return true;
6469 if (precision == LONG_LONG_TYPE_SIZE)
6470 return true;
6471 return false;
6473 case MODE_FLOAT:
6474 if (precision == FLOAT_TYPE_SIZE)
6475 return true;
6476 if (precision == DOUBLE_TYPE_SIZE)
6477 return true;
6478 if (precision == LONG_DOUBLE_TYPE_SIZE)
6479 return true;
6480 return false;
6482 case MODE_DECIMAL_FLOAT:
6483 return false;
6485 default:
6486 gcc_unreachable ();
6490 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
6491 it branches into the delay slot. Otherwise, return FALSE. */
6493 static bool
6494 branch_to_delay_slot_p (rtx_insn *insn)
6496 rtx_insn *jump_insn;
6498 if (dbr_sequence_length ())
6499 return FALSE;
6501 jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6502 while (insn)
6504 insn = next_active_insn (insn);
6505 if (jump_insn == insn)
6506 return TRUE;
6508 /* We can't rely on the length of asms. So, we return FALSE when
6509 the branch is followed by an asm. */
6510 if (!insn
6511 || GET_CODE (PATTERN (insn)) == ASM_INPUT
6512 || asm_noperands (PATTERN (insn)) >= 0
6513 || get_attr_length (insn) > 0)
6514 break;
6517 return FALSE;
6520 /* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot.
6522 This occurs when INSN has an unfilled delay slot and is followed
6523 by an asm. Disaster can occur if the asm is empty and the jump
6524 branches into the delay slot. So, we add a nop in the delay slot
6525 when this occurs. */
6527 static bool
6528 branch_needs_nop_p (rtx_insn *insn)
6530 rtx_insn *jump_insn;
6532 if (dbr_sequence_length ())
6533 return FALSE;
6535 jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6536 while (insn)
6538 insn = next_active_insn (insn);
6539 if (!insn || jump_insn == insn)
6540 return TRUE;
6542 if (!(GET_CODE (PATTERN (insn)) == ASM_INPUT
6543 || asm_noperands (PATTERN (insn)) >= 0)
6544 && get_attr_length (insn) > 0)
6545 break;
6548 return FALSE;
6551 /* Return TRUE if INSN, a forward jump insn, can use nullification
6552 to skip the following instruction. This avoids an extra cycle due
6553 to a mis-predicted branch when we fall through. */
6555 static bool
6556 use_skip_p (rtx_insn *insn)
6558 rtx_insn *jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6560 while (insn)
6562 insn = next_active_insn (insn);
6564 /* We can't rely on the length of asms, so we can't skip asms. */
6565 if (!insn
6566 || GET_CODE (PATTERN (insn)) == ASM_INPUT
6567 || asm_noperands (PATTERN (insn)) >= 0)
6568 break;
6569 if (get_attr_length (insn) == 4
6570 && jump_insn == next_active_insn (insn))
6571 return TRUE;
6572 if (get_attr_length (insn) > 0)
6573 break;
6576 return FALSE;
6579 /* This routine handles all the normal conditional branch sequences we
6580 might need to generate. It handles compare immediate vs compare
6581 register, nullification of delay slots, varying length branches,
6582 negated branches, and all combinations of the above. It returns the
6583 output appropriate to emit the branch corresponding to all given
6584 parameters. */
6586 const char *
6587 pa_output_cbranch (rtx *operands, int negated, rtx_insn *insn)
6589 static char buf[100];
6590 bool useskip;
6591 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6592 int length = get_attr_length (insn);
6593 int xdelay;
6595 /* A conditional branch to the following instruction (e.g. the delay slot)
6596 is asking for a disaster. This can happen when not optimizing and
6597 when jump optimization fails.
6599 While it is usually safe to emit nothing, this can fail if the
6600 preceding instruction is a nullified branch with an empty delay
6601 slot and the same branch target as this branch. We could check
6602 for this but jump optimization should eliminate nop jumps. It
6603 is always safe to emit a nop. */
6604 if (branch_to_delay_slot_p (insn))
6605 return "nop";
6607 /* The doubleword form of the cmpib instruction doesn't have the LEU
6608 and GTU conditions while the cmpb instruction does. Since we accept
6609 zero for cmpb, we must ensure that we use cmpb for the comparison. */
6610 if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6611 operands[2] = gen_rtx_REG (DImode, 0);
6612 if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
6613 operands[1] = gen_rtx_REG (DImode, 0);
6615 /* If this is a long branch with its delay slot unfilled, set `nullify'
6616 as it can nullify the delay slot and save a nop. */
6617 if (length == 8 && dbr_sequence_length () == 0)
6618 nullify = 1;
6620 /* If this is a short forward conditional branch which did not get
6621 its delay slot filled, the delay slot can still be nullified. */
6622 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6623 nullify = forward_branch_p (insn);
6625 /* A forward branch over a single nullified insn can be done with a
6626 comclr instruction. This avoids a single cycle penalty due to
6627 mis-predicted branch if we fall through (branch not taken). */
6628 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6630 switch (length)
6632 /* All short conditional branches except backwards with an unfilled
6633 delay slot. */
6634 case 4:
6635 if (useskip)
6636 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6637 else
6638 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6639 if (GET_MODE (operands[1]) == DImode)
6640 strcat (buf, "*");
6641 if (negated)
6642 strcat (buf, "%B3");
6643 else
6644 strcat (buf, "%S3");
6645 if (useskip)
6646 strcat (buf, " %2,%r1,%%r0");
6647 else if (nullify)
6649 if (branch_needs_nop_p (insn))
6650 strcat (buf, ",n %2,%r1,%0%#");
6651 else
6652 strcat (buf, ",n %2,%r1,%0");
6654 else
6655 strcat (buf, " %2,%r1,%0");
6656 break;
6658 /* All long conditionals. Note a short backward branch with an
6659 unfilled delay slot is treated just like a long backward branch
6660 with an unfilled delay slot. */
6661 case 8:
6662 /* Handle weird backwards branch with a filled delay slot
6663 which is nullified. */
6664 if (dbr_sequence_length () != 0
6665 && ! forward_branch_p (insn)
6666 && nullify)
6668 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6669 if (GET_MODE (operands[1]) == DImode)
6670 strcat (buf, "*");
6671 if (negated)
6672 strcat (buf, "%S3");
6673 else
6674 strcat (buf, "%B3");
6675 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6677 /* Handle short backwards branch with an unfilled delay slot.
6678 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6679 taken and untaken branches. */
6680 else if (dbr_sequence_length () == 0
6681 && ! forward_branch_p (insn)
6682 && INSN_ADDRESSES_SET_P ()
6683 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6684 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6686 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6687 if (GET_MODE (operands[1]) == DImode)
6688 strcat (buf, "*");
6689 if (negated)
6690 strcat (buf, "%B3 %2,%r1,%0%#");
6691 else
6692 strcat (buf, "%S3 %2,%r1,%0%#");
6694 else
6696 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6697 if (GET_MODE (operands[1]) == DImode)
6698 strcat (buf, "*");
6699 if (negated)
6700 strcat (buf, "%S3");
6701 else
6702 strcat (buf, "%B3");
6703 if (nullify)
6704 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6705 else
6706 strcat (buf, " %2,%r1,%%r0\n\tb %0");
6708 break;
6710 default:
6711 /* The reversed conditional branch must branch over one additional
6712 instruction if the delay slot is filled and needs to be extracted
6713 by pa_output_lbranch. If the delay slot is empty or this is a
6714 nullified forward branch, the instruction after the reversed
6715 condition branch must be nullified. */
6716 if (dbr_sequence_length () == 0
6717 || (nullify && forward_branch_p (insn)))
6719 nullify = 1;
6720 xdelay = 0;
6721 operands[4] = GEN_INT (length);
6723 else
6725 xdelay = 1;
6726 operands[4] = GEN_INT (length + 4);
6729 /* Create a reversed conditional branch which branches around
6730 the following insns. */
6731 if (GET_MODE (operands[1]) != DImode)
6733 if (nullify)
6735 if (negated)
6736 strcpy (buf,
6737 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6738 else
6739 strcpy (buf,
6740 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6742 else
6744 if (negated)
6745 strcpy (buf,
6746 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6747 else
6748 strcpy (buf,
6749 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6752 else
6754 if (nullify)
6756 if (negated)
6757 strcpy (buf,
6758 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6759 else
6760 strcpy (buf,
6761 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6763 else
6765 if (negated)
6766 strcpy (buf,
6767 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6768 else
6769 strcpy (buf,
6770 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6774 output_asm_insn (buf, operands);
6775 return pa_output_lbranch (operands[0], insn, xdelay);
6777 return buf;
6780 /* Output a PIC pc-relative instruction sequence to load the address of
6781 OPERANDS[0] to register OPERANDS[2]. OPERANDS[0] is a symbol ref
6782 or a code label. OPERANDS[1] specifies the register to use to load
6783 the program counter. OPERANDS[3] may be used for label generation
6784 The sequence is always three instructions in length. The program
6785 counter recorded for PA 1.X is eight bytes more than that for PA 2.0.
6786 Register %r1 is clobbered. */
6788 static void
6789 pa_output_pic_pcrel_sequence (rtx *operands)
6791 gcc_assert (SYMBOL_REF_P (operands[0]) || LABEL_P (operands[0]));
6792 if (TARGET_PA_20)
6794 /* We can use mfia to determine the current program counter. */
6795 if (TARGET_SOM || !TARGET_GAS)
6797 operands[3] = gen_label_rtx ();
6798 targetm.asm_out.internal_label (asm_out_file, "L",
6799 CODE_LABEL_NUMBER (operands[3]));
6800 output_asm_insn ("mfia %1", operands);
6801 output_asm_insn ("addil L'%0-%l3,%1", operands);
6802 output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands);
6804 else
6806 output_asm_insn ("mfia %1", operands);
6807 output_asm_insn ("addil L'%0-$PIC_pcrel$0+12,%1", operands);
6808 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+16(%%r1),%2", operands);
6811 else
6813 /* We need to use a branch to determine the current program counter. */
6814 output_asm_insn ("{bl|b,l} .+8,%1", operands);
6815 if (TARGET_SOM || !TARGET_GAS)
6817 operands[3] = gen_label_rtx ();
6818 output_asm_insn ("addil L'%0-%l3,%1", operands);
6819 targetm.asm_out.internal_label (asm_out_file, "L",
6820 CODE_LABEL_NUMBER (operands[3]));
6821 output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands);
6823 else
6825 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%1", operands);
6826 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%2", operands);
6831 /* This routine handles output of long unconditional branches that
6832 exceed the maximum range of a simple branch instruction. Since
6833 we don't have a register available for the branch, we save register
6834 %r1 in the frame marker, load the branch destination DEST into %r1,
6835 execute the branch, and restore %r1 in the delay slot of the branch.
6837 Since long branches may have an insn in the delay slot and the
6838 delay slot is used to restore %r1, we in general need to extract
6839 this insn and execute it before the branch. However, to facilitate
6840 use of this function by conditional branches, we also provide an
6841 option to not extract the delay insn so that it will be emitted
6842 after the long branch. So, if there is an insn in the delay slot,
6843 it is extracted if XDELAY is nonzero.
6845 The lengths of the various long-branch sequences are 20, 16 and 24
6846 bytes for the portable runtime, non-PIC and PIC cases, respectively. */
6848 const char *
6849 pa_output_lbranch (rtx dest, rtx_insn *insn, int xdelay)
6851 rtx xoperands[4];
6853 xoperands[0] = dest;
6855 /* First, free up the delay slot. */
6856 if (xdelay && dbr_sequence_length () != 0)
6858 /* We can't handle a jump in the delay slot. */
6859 gcc_assert (! JUMP_P (NEXT_INSN (insn)));
6861 final_scan_insn (NEXT_INSN (insn), asm_out_file,
6862 optimize, 0, NULL);
6864 /* Now delete the delay insn. */
6865 SET_INSN_DELETED (NEXT_INSN (insn));
6868 /* Output an insn to save %r1. The runtime documentation doesn't
6869 specify whether the "Clean Up" slot in the callers frame can
6870 be clobbered by the callee. It isn't copied by HP's builtin
6871 alloca, so this suggests that it can be clobbered if necessary.
6872 The "Static Link" location is copied by HP builtin alloca, so
6873 we avoid using it. Using the cleanup slot might be a problem
6874 if we have to interoperate with languages that pass cleanup
6875 information. However, it should be possible to handle these
6876 situations with GCC's asm feature.
6878 The "Current RP" slot is reserved for the called procedure, so
6879 we try to use it when we don't have a frame of our own. It's
6880 rather unlikely that we won't have a frame when we need to emit
6881 a very long branch.
6883 Really the way to go long term is a register scavenger; goto
6884 the target of the jump and find a register which we can use
6885 as a scratch to hold the value in %r1. Then, we wouldn't have
6886 to free up the delay slot or clobber a slot that may be needed
6887 for other purposes. */
6888 if (TARGET_64BIT)
6890 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6891 /* Use the return pointer slot in the frame marker. */
6892 output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
6893 else
6894 /* Use the slot at -40 in the frame marker since HP builtin
6895 alloca doesn't copy it. */
6896 output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
6898 else
6900 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6901 /* Use the return pointer slot in the frame marker. */
6902 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
6903 else
6904 /* Use the "Clean Up" slot in the frame marker. In GCC,
6905 the only other use of this location is for copying a
6906 floating point double argument from a floating-point
6907 register to two general registers. The copy is done
6908 as an "atomic" operation when outputting a call, so it
6909 won't interfere with our using the location here. */
6910 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
6913 if (TARGET_PORTABLE_RUNTIME)
6915 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6916 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6917 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6919 else if (flag_pic)
6921 xoperands[1] = gen_rtx_REG (Pmode, 1);
6922 xoperands[2] = xoperands[1];
6923 pa_output_pic_pcrel_sequence (xoperands);
6924 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6926 else
6927 /* Now output a very long branch to the original target. */
6928 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
6930 /* Now restore the value of %r1 in the delay slot. */
6931 if (TARGET_64BIT)
6933 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6934 return "ldd -16(%%r30),%%r1";
6935 else
6936 return "ldd -40(%%r30),%%r1";
6938 else
6940 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6941 return "ldw -20(%%r30),%%r1";
6942 else
6943 return "ldw -12(%%r30),%%r1";
6947 /* This routine handles all the branch-on-bit conditional branch sequences we
6948 might need to generate. It handles nullification of delay slots,
6949 varying length branches, negated branches and all combinations of the
6950 above. it returns the appropriate output template to emit the branch. */
6952 const char *
6953 pa_output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn, int which)
6955 static char buf[100];
6956 bool useskip;
6957 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6958 int length = get_attr_length (insn);
6959 int xdelay;
6961 /* A conditional branch to the following instruction (e.g. the delay slot) is
6962 asking for a disaster. I do not think this can happen as this pattern
6963 is only used when optimizing; jump optimization should eliminate the
6964 jump. But be prepared just in case. */
6966 if (branch_to_delay_slot_p (insn))
6967 return "nop";
6969 /* If this is a long branch with its delay slot unfilled, set `nullify'
6970 as it can nullify the delay slot and save a nop. */
6971 if (length == 8 && dbr_sequence_length () == 0)
6972 nullify = 1;
6974 /* If this is a short forward conditional branch which did not get
6975 its delay slot filled, the delay slot can still be nullified. */
6976 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6977 nullify = forward_branch_p (insn);
6979 /* A forward branch over a single nullified insn can be done with a
6980 extrs instruction. This avoids a single cycle penalty due to
6981 mis-predicted branch if we fall through (branch not taken). */
6982 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6984 switch (length)
6987 /* All short conditional branches except backwards with an unfilled
6988 delay slot. */
6989 case 4:
6990 if (useskip)
6991 strcpy (buf, "{extrs,|extrw,s,}");
6992 else
6993 strcpy (buf, "bb,");
6994 if (useskip && GET_MODE (operands[0]) == DImode)
6995 strcpy (buf, "extrd,s,*");
6996 else if (GET_MODE (operands[0]) == DImode)
6997 strcpy (buf, "bb,*");
6998 if ((which == 0 && negated)
6999 || (which == 1 && ! negated))
7000 strcat (buf, ">=");
7001 else
7002 strcat (buf, "<");
7003 if (useskip)
7004 strcat (buf, " %0,%1,1,%%r0");
7005 else if (nullify && negated)
7007 if (branch_needs_nop_p (insn))
7008 strcat (buf, ",n %0,%1,%3%#");
7009 else
7010 strcat (buf, ",n %0,%1,%3");
7012 else if (nullify && ! negated)
7014 if (branch_needs_nop_p (insn))
7015 strcat (buf, ",n %0,%1,%2%#");
7016 else
7017 strcat (buf, ",n %0,%1,%2");
7019 else if (! nullify && negated)
7020 strcat (buf, " %0,%1,%3");
7021 else if (! nullify && ! negated)
7022 strcat (buf, " %0,%1,%2");
7023 break;
7025 /* All long conditionals. Note a short backward branch with an
7026 unfilled delay slot is treated just like a long backward branch
7027 with an unfilled delay slot. */
7028 case 8:
7029 /* Handle weird backwards branch with a filled delay slot
7030 which is nullified. */
7031 if (dbr_sequence_length () != 0
7032 && ! forward_branch_p (insn)
7033 && nullify)
7035 strcpy (buf, "bb,");
7036 if (GET_MODE (operands[0]) == DImode)
7037 strcat (buf, "*");
7038 if ((which == 0 && negated)
7039 || (which == 1 && ! negated))
7040 strcat (buf, "<");
7041 else
7042 strcat (buf, ">=");
7043 if (negated)
7044 strcat (buf, ",n %0,%1,.+12\n\tb %3");
7045 else
7046 strcat (buf, ",n %0,%1,.+12\n\tb %2");
7048 /* Handle short backwards branch with an unfilled delay slot.
7049 Using a bb;nop rather than extrs;bl saves 1 cycle for both
7050 taken and untaken branches. */
7051 else if (dbr_sequence_length () == 0
7052 && ! forward_branch_p (insn)
7053 && INSN_ADDRESSES_SET_P ()
7054 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7055 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7057 strcpy (buf, "bb,");
7058 if (GET_MODE (operands[0]) == DImode)
7059 strcat (buf, "*");
7060 if ((which == 0 && negated)
7061 || (which == 1 && ! negated))
7062 strcat (buf, ">=");
7063 else
7064 strcat (buf, "<");
7065 if (negated)
7066 strcat (buf, " %0,%1,%3%#");
7067 else
7068 strcat (buf, " %0,%1,%2%#");
7070 else
7072 if (GET_MODE (operands[0]) == DImode)
7073 strcpy (buf, "extrd,s,*");
7074 else
7075 strcpy (buf, "{extrs,|extrw,s,}");
7076 if ((which == 0 && negated)
7077 || (which == 1 && ! negated))
7078 strcat (buf, "<");
7079 else
7080 strcat (buf, ">=");
7081 if (nullify && negated)
7082 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
7083 else if (nullify && ! negated)
7084 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
7085 else if (negated)
7086 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
7087 else
7088 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
7090 break;
7092 default:
7093 /* The reversed conditional branch must branch over one additional
7094 instruction if the delay slot is filled and needs to be extracted
7095 by pa_output_lbranch. If the delay slot is empty or this is a
7096 nullified forward branch, the instruction after the reversed
7097 condition branch must be nullified. */
7098 if (dbr_sequence_length () == 0
7099 || (nullify && forward_branch_p (insn)))
7101 nullify = 1;
7102 xdelay = 0;
7103 operands[4] = GEN_INT (length);
7105 else
7107 xdelay = 1;
7108 operands[4] = GEN_INT (length + 4);
7111 if (GET_MODE (operands[0]) == DImode)
7112 strcpy (buf, "bb,*");
7113 else
7114 strcpy (buf, "bb,");
7115 if ((which == 0 && negated)
7116 || (which == 1 && !negated))
7117 strcat (buf, "<");
7118 else
7119 strcat (buf, ">=");
7120 if (nullify)
7121 strcat (buf, ",n %0,%1,.+%4");
7122 else
7123 strcat (buf, " %0,%1,.+%4");
7124 output_asm_insn (buf, operands);
7125 return pa_output_lbranch (negated ? operands[3] : operands[2],
7126 insn, xdelay);
7128 return buf;
7131 /* This routine handles all the branch-on-variable-bit conditional branch
7132 sequences we might need to generate. It handles nullification of delay
7133 slots, varying length branches, negated branches and all combinations
7134 of the above. it returns the appropriate output template to emit the
7135 branch. */
7137 const char *
7138 pa_output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn,
7139 int which)
7141 static char buf[100];
7142 bool useskip;
7143 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7144 int length = get_attr_length (insn);
7145 int xdelay;
7147 /* A conditional branch to the following instruction (e.g. the delay slot) is
7148 asking for a disaster. I do not think this can happen as this pattern
7149 is only used when optimizing; jump optimization should eliminate the
7150 jump. But be prepared just in case. */
7152 if (branch_to_delay_slot_p (insn))
7153 return "nop";
7155 /* If this is a long branch with its delay slot unfilled, set `nullify'
7156 as it can nullify the delay slot and save a nop. */
7157 if (length == 8 && dbr_sequence_length () == 0)
7158 nullify = 1;
7160 /* If this is a short forward conditional branch which did not get
7161 its delay slot filled, the delay slot can still be nullified. */
7162 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7163 nullify = forward_branch_p (insn);
7165 /* A forward branch over a single nullified insn can be done with a
7166 extrs instruction. This avoids a single cycle penalty due to
7167 mis-predicted branch if we fall through (branch not taken). */
7168 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
7170 switch (length)
7173 /* All short conditional branches except backwards with an unfilled
7174 delay slot. */
7175 case 4:
7176 if (useskip)
7177 strcpy (buf, "{vextrs,|extrw,s,}");
7178 else
7179 strcpy (buf, "{bvb,|bb,}");
7180 if (useskip && GET_MODE (operands[0]) == DImode)
7181 strcpy (buf, "extrd,s,*");
7182 else if (GET_MODE (operands[0]) == DImode)
7183 strcpy (buf, "bb,*");
7184 if ((which == 0 && negated)
7185 || (which == 1 && ! negated))
7186 strcat (buf, ">=");
7187 else
7188 strcat (buf, "<");
7189 if (useskip)
7190 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
7191 else if (nullify && negated)
7193 if (branch_needs_nop_p (insn))
7194 strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
7195 else
7196 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
7198 else if (nullify && ! negated)
7200 if (branch_needs_nop_p (insn))
7201 strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
7202 else
7203 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
7205 else if (! nullify && negated)
7206 strcat (buf, "{ %0,%3| %0,%%sar,%3}");
7207 else if (! nullify && ! negated)
7208 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
7209 break;
7211 /* All long conditionals. Note a short backward branch with an
7212 unfilled delay slot is treated just like a long backward branch
7213 with an unfilled delay slot. */
7214 case 8:
7215 /* Handle weird backwards branch with a filled delay slot
7216 which is nullified. */
7217 if (dbr_sequence_length () != 0
7218 && ! forward_branch_p (insn)
7219 && nullify)
7221 strcpy (buf, "{bvb,|bb,}");
7222 if (GET_MODE (operands[0]) == DImode)
7223 strcat (buf, "*");
7224 if ((which == 0 && negated)
7225 || (which == 1 && ! negated))
7226 strcat (buf, "<");
7227 else
7228 strcat (buf, ">=");
7229 if (negated)
7230 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
7231 else
7232 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
7234 /* Handle short backwards branch with an unfilled delay slot.
7235 Using a bb;nop rather than extrs;bl saves 1 cycle for both
7236 taken and untaken branches. */
7237 else if (dbr_sequence_length () == 0
7238 && ! forward_branch_p (insn)
7239 && INSN_ADDRESSES_SET_P ()
7240 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7241 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7243 strcpy (buf, "{bvb,|bb,}");
7244 if (GET_MODE (operands[0]) == DImode)
7245 strcat (buf, "*");
7246 if ((which == 0 && negated)
7247 || (which == 1 && ! negated))
7248 strcat (buf, ">=");
7249 else
7250 strcat (buf, "<");
7251 if (negated)
7252 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
7253 else
7254 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
7256 else
7258 strcpy (buf, "{vextrs,|extrw,s,}");
7259 if (GET_MODE (operands[0]) == DImode)
7260 strcpy (buf, "extrd,s,*");
7261 if ((which == 0 && negated)
7262 || (which == 1 && ! negated))
7263 strcat (buf, "<");
7264 else
7265 strcat (buf, ">=");
7266 if (nullify && negated)
7267 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
7268 else if (nullify && ! negated)
7269 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
7270 else if (negated)
7271 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
7272 else
7273 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
7275 break;
7277 default:
7278 /* The reversed conditional branch must branch over one additional
7279 instruction if the delay slot is filled and needs to be extracted
7280 by pa_output_lbranch. If the delay slot is empty or this is a
7281 nullified forward branch, the instruction after the reversed
7282 condition branch must be nullified. */
7283 if (dbr_sequence_length () == 0
7284 || (nullify && forward_branch_p (insn)))
7286 nullify = 1;
7287 xdelay = 0;
7288 operands[4] = GEN_INT (length);
7290 else
7292 xdelay = 1;
7293 operands[4] = GEN_INT (length + 4);
7296 if (GET_MODE (operands[0]) == DImode)
7297 strcpy (buf, "bb,*");
7298 else
7299 strcpy (buf, "{bvb,|bb,}");
7300 if ((which == 0 && negated)
7301 || (which == 1 && !negated))
7302 strcat (buf, "<");
7303 else
7304 strcat (buf, ">=");
7305 if (nullify)
7306 strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
7307 else
7308 strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
7309 output_asm_insn (buf, operands);
7310 return pa_output_lbranch (negated ? operands[3] : operands[2],
7311 insn, xdelay);
7313 return buf;
7316 /* Return the output template for emitting a dbra type insn.
7318 Note it may perform some output operations on its own before
7319 returning the final output string. */
7320 const char *
7321 pa_output_dbra (rtx *operands, rtx_insn *insn, int which_alternative)
7323 int length = get_attr_length (insn);
7325 /* A conditional branch to the following instruction (e.g. the delay slot) is
7326 asking for a disaster. Be prepared! */
7328 if (branch_to_delay_slot_p (insn))
7330 if (which_alternative == 0)
7331 return "ldo %1(%0),%0";
7332 else if (which_alternative == 1)
7334 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
7335 output_asm_insn ("ldw -16(%%r30),%4", operands);
7336 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7337 return "{fldws|fldw} -16(%%r30),%0";
7339 else
7341 output_asm_insn ("ldw %0,%4", operands);
7342 return "ldo %1(%4),%4\n\tstw %4,%0";
7346 if (which_alternative == 0)
7348 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7349 int xdelay;
7351 /* If this is a long branch with its delay slot unfilled, set `nullify'
7352 as it can nullify the delay slot and save a nop. */
7353 if (length == 8 && dbr_sequence_length () == 0)
7354 nullify = 1;
7356 /* If this is a short forward conditional branch which did not get
7357 its delay slot filled, the delay slot can still be nullified. */
7358 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7359 nullify = forward_branch_p (insn);
7361 switch (length)
7363 case 4:
7364 if (nullify)
7366 if (branch_needs_nop_p (insn))
7367 return "addib,%C2,n %1,%0,%3%#";
7368 else
7369 return "addib,%C2,n %1,%0,%3";
7371 else
7372 return "addib,%C2 %1,%0,%3";
7374 case 8:
7375 /* Handle weird backwards branch with a fulled delay slot
7376 which is nullified. */
7377 if (dbr_sequence_length () != 0
7378 && ! forward_branch_p (insn)
7379 && nullify)
7380 return "addib,%N2,n %1,%0,.+12\n\tb %3";
7381 /* Handle short backwards branch with an unfilled delay slot.
7382 Using a addb;nop rather than addi;bl saves 1 cycle for both
7383 taken and untaken branches. */
7384 else if (dbr_sequence_length () == 0
7385 && ! forward_branch_p (insn)
7386 && INSN_ADDRESSES_SET_P ()
7387 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7388 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7389 return "addib,%C2 %1,%0,%3%#";
7391 /* Handle normal cases. */
7392 if (nullify)
7393 return "addi,%N2 %1,%0,%0\n\tb,n %3";
7394 else
7395 return "addi,%N2 %1,%0,%0\n\tb %3";
7397 default:
7398 /* The reversed conditional branch must branch over one additional
7399 instruction if the delay slot is filled and needs to be extracted
7400 by pa_output_lbranch. If the delay slot is empty or this is a
7401 nullified forward branch, the instruction after the reversed
7402 condition branch must be nullified. */
7403 if (dbr_sequence_length () == 0
7404 || (nullify && forward_branch_p (insn)))
7406 nullify = 1;
7407 xdelay = 0;
7408 operands[4] = GEN_INT (length);
7410 else
7412 xdelay = 1;
7413 operands[4] = GEN_INT (length + 4);
7416 if (nullify)
7417 output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
7418 else
7419 output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
7421 return pa_output_lbranch (operands[3], insn, xdelay);
7425 /* Deal with gross reload from FP register case. */
7426 else if (which_alternative == 1)
7428 /* Move loop counter from FP register to MEM then into a GR,
7429 increment the GR, store the GR into MEM, and finally reload
7430 the FP register from MEM from within the branch's delay slot. */
7431 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
7432 operands);
7433 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7434 if (length == 24)
7435 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
7436 else if (length == 28)
7437 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7438 else
7440 operands[5] = GEN_INT (length - 16);
7441 output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
7442 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7443 return pa_output_lbranch (operands[3], insn, 0);
7446 /* Deal with gross reload from memory case. */
7447 else
7449 /* Reload loop counter from memory, the store back to memory
7450 happens in the branch's delay slot. */
7451 output_asm_insn ("ldw %0,%4", operands);
7452 if (length == 12)
7453 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
7454 else if (length == 16)
7455 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
7456 else
7458 operands[5] = GEN_INT (length - 4);
7459 output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
7460 return pa_output_lbranch (operands[3], insn, 0);
7465 /* Return the output template for emitting a movb type insn.
7467 Note it may perform some output operations on its own before
7468 returning the final output string. */
7469 const char *
7470 pa_output_movb (rtx *operands, rtx_insn *insn, int which_alternative,
7471 int reverse_comparison)
7473 int length = get_attr_length (insn);
7475 /* A conditional branch to the following instruction (e.g. the delay slot) is
7476 asking for a disaster. Be prepared! */
7478 if (branch_to_delay_slot_p (insn))
7480 if (which_alternative == 0)
7481 return "copy %1,%0";
7482 else if (which_alternative == 1)
7484 output_asm_insn ("stw %1,-16(%%r30)", operands);
7485 return "{fldws|fldw} -16(%%r30),%0";
7487 else if (which_alternative == 2)
7488 return "stw %1,%0";
7489 else
7490 return "mtsar %r1";
7493 /* Support the second variant. */
7494 if (reverse_comparison)
7495 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
7497 if (which_alternative == 0)
7499 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7500 int xdelay;
7502 /* If this is a long branch with its delay slot unfilled, set `nullify'
7503 as it can nullify the delay slot and save a nop. */
7504 if (length == 8 && dbr_sequence_length () == 0)
7505 nullify = 1;
7507 /* If this is a short forward conditional branch which did not get
7508 its delay slot filled, the delay slot can still be nullified. */
7509 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7510 nullify = forward_branch_p (insn);
7512 switch (length)
7514 case 4:
7515 if (nullify)
7517 if (branch_needs_nop_p (insn))
7518 return "movb,%C2,n %1,%0,%3%#";
7519 else
7520 return "movb,%C2,n %1,%0,%3";
7522 else
7523 return "movb,%C2 %1,%0,%3";
7525 case 8:
7526 /* Handle weird backwards branch with a filled delay slot
7527 which is nullified. */
7528 if (dbr_sequence_length () != 0
7529 && ! forward_branch_p (insn)
7530 && nullify)
7531 return "movb,%N2,n %1,%0,.+12\n\tb %3";
7533 /* Handle short backwards branch with an unfilled delay slot.
7534 Using a movb;nop rather than or;bl saves 1 cycle for both
7535 taken and untaken branches. */
7536 else if (dbr_sequence_length () == 0
7537 && ! forward_branch_p (insn)
7538 && INSN_ADDRESSES_SET_P ()
7539 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7540 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7541 return "movb,%C2 %1,%0,%3%#";
7542 /* Handle normal cases. */
7543 if (nullify)
7544 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
7545 else
7546 return "or,%N2 %1,%%r0,%0\n\tb %3";
7548 default:
7549 /* The reversed conditional branch must branch over one additional
7550 instruction if the delay slot is filled and needs to be extracted
7551 by pa_output_lbranch. If the delay slot is empty or this is a
7552 nullified forward branch, the instruction after the reversed
7553 condition branch must be nullified. */
7554 if (dbr_sequence_length () == 0
7555 || (nullify && forward_branch_p (insn)))
7557 nullify = 1;
7558 xdelay = 0;
7559 operands[4] = GEN_INT (length);
7561 else
7563 xdelay = 1;
7564 operands[4] = GEN_INT (length + 4);
7567 if (nullify)
7568 output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
7569 else
7570 output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
7572 return pa_output_lbranch (operands[3], insn, xdelay);
7575 /* Deal with gross reload for FP destination register case. */
7576 else if (which_alternative == 1)
7578 /* Move source register to MEM, perform the branch test, then
7579 finally load the FP register from MEM from within the branch's
7580 delay slot. */
7581 output_asm_insn ("stw %1,-16(%%r30)", operands);
7582 if (length == 12)
7583 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7584 else if (length == 16)
7585 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7586 else
7588 operands[4] = GEN_INT (length - 4);
7589 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
7590 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7591 return pa_output_lbranch (operands[3], insn, 0);
7594 /* Deal with gross reload from memory case. */
7595 else if (which_alternative == 2)
7597 /* Reload loop counter from memory, the store back to memory
7598 happens in the branch's delay slot. */
7599 if (length == 8)
7600 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7601 else if (length == 12)
7602 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7603 else
7605 operands[4] = GEN_INT (length);
7606 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7607 operands);
7608 return pa_output_lbranch (operands[3], insn, 0);
7611 /* Handle SAR as a destination. */
7612 else
7614 if (length == 8)
7615 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7616 else if (length == 12)
7617 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7618 else
7620 operands[4] = GEN_INT (length);
7621 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7622 operands);
7623 return pa_output_lbranch (operands[3], insn, 0);
7628 /* Copy any FP arguments in INSN into integer registers. */
7629 static void
7630 copy_fp_args (rtx_insn *insn)
7632 rtx link;
7633 rtx xoperands[2];
7635 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7637 int arg_mode, regno;
7638 rtx use = XEXP (link, 0);
7640 if (! (GET_CODE (use) == USE
7641 && GET_CODE (XEXP (use, 0)) == REG
7642 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7643 continue;
7645 arg_mode = GET_MODE (XEXP (use, 0));
7646 regno = REGNO (XEXP (use, 0));
7648 /* Is it a floating point register? */
7649 if (regno >= 32 && regno <= 39)
7651 /* Copy the FP register into an integer register via memory. */
7652 if (arg_mode == SFmode)
7654 xoperands[0] = XEXP (use, 0);
7655 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
7656 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
7657 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7659 else
7661 xoperands[0] = XEXP (use, 0);
7662 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
7663 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
7664 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
7665 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7671 /* Compute length of the FP argument copy sequence for INSN. */
7672 static int
7673 length_fp_args (rtx_insn *insn)
7675 int length = 0;
7676 rtx link;
7678 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7680 int arg_mode, regno;
7681 rtx use = XEXP (link, 0);
7683 if (! (GET_CODE (use) == USE
7684 && GET_CODE (XEXP (use, 0)) == REG
7685 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7686 continue;
7688 arg_mode = GET_MODE (XEXP (use, 0));
7689 regno = REGNO (XEXP (use, 0));
7691 /* Is it a floating point register? */
7692 if (regno >= 32 && regno <= 39)
7694 if (arg_mode == SFmode)
7695 length += 8;
7696 else
7697 length += 12;
7701 return length;
7704 /* Return the attribute length for the millicode call instruction INSN.
7705 The length must match the code generated by pa_output_millicode_call.
7706 We include the delay slot in the returned length as it is better to
7707 over estimate the length than to under estimate it. */
7710 pa_attr_length_millicode_call (rtx_insn *insn)
7712 unsigned long distance = -1;
7713 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7715 if (INSN_ADDRESSES_SET_P ())
7717 distance = (total + insn_current_reference_address (insn));
7718 if (distance < total)
7719 distance = -1;
7722 if (TARGET_64BIT)
7724 if (!TARGET_LONG_CALLS && distance < 7600000)
7725 return 8;
7727 return 20;
7729 else if (TARGET_PORTABLE_RUNTIME)
7730 return 24;
7731 else
7733 if (!TARGET_LONG_CALLS && distance < MAX_PCREL17F_OFFSET)
7734 return 8;
7736 if (!flag_pic)
7737 return 12;
7739 return 24;
7743 /* INSN is a function call.
7745 CALL_DEST is the routine we are calling. */
7747 const char *
7748 pa_output_millicode_call (rtx_insn *insn, rtx call_dest)
7750 int attr_length = get_attr_length (insn);
7751 int seq_length = dbr_sequence_length ();
7752 rtx xoperands[4];
7754 xoperands[0] = call_dest;
7756 /* Handle the common case where we are sure that the branch will
7757 reach the beginning of the $CODE$ subspace. The within reach
7758 form of the $$sh_func_adrs call has a length of 28. Because it
7759 has an attribute type of sh_func_adrs, it never has a nonzero
7760 sequence length (i.e., the delay slot is never filled). */
7761 if (!TARGET_LONG_CALLS
7762 && (attr_length == 8
7763 || (attr_length == 28
7764 && get_attr_type (insn) == TYPE_SH_FUNC_ADRS)))
7766 xoperands[1] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
7767 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7769 else
7771 if (TARGET_64BIT)
7773 /* It might seem that one insn could be saved by accessing
7774 the millicode function using the linkage table. However,
7775 this doesn't work in shared libraries and other dynamically
7776 loaded objects. Using a pc-relative sequence also avoids
7777 problems related to the implicit use of the gp register. */
7778 xoperands[1] = gen_rtx_REG (Pmode, 1);
7779 xoperands[2] = xoperands[1];
7780 pa_output_pic_pcrel_sequence (xoperands);
7781 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7783 else if (TARGET_PORTABLE_RUNTIME)
7785 /* Pure portable runtime doesn't allow be/ble; we also don't
7786 have PIC support in the assembler/linker, so this sequence
7787 is needed. */
7789 /* Get the address of our target into %r1. */
7790 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7791 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7793 /* Get our return address into %r31. */
7794 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7795 output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
7797 /* Jump to our target address in %r1. */
7798 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7800 else if (!flag_pic)
7802 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7803 if (TARGET_PA_20)
7804 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
7805 else
7806 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7808 else
7810 xoperands[1] = gen_rtx_REG (Pmode, 31);
7811 xoperands[2] = gen_rtx_REG (Pmode, 1);
7812 pa_output_pic_pcrel_sequence (xoperands);
7814 /* Adjust return address. */
7815 output_asm_insn ("ldo {16|24}(%%r31),%%r31", xoperands);
7817 /* Jump to our target address in %r1. */
7818 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7822 if (seq_length == 0)
7823 output_asm_insn ("nop", xoperands);
7825 return "";
7828 /* Return the attribute length of the call instruction INSN. The SIBCALL
7829 flag indicates whether INSN is a regular call or a sibling call. The
7830 length returned must be longer than the code actually generated by
7831 pa_output_call. Since branch shortening is done before delay branch
7832 sequencing, there is no way to determine whether or not the delay
7833 slot will be filled during branch shortening. Even when the delay
7834 slot is filled, we may have to add a nop if the delay slot contains
7835 a branch that can't reach its target. Thus, we always have to include
7836 the delay slot in the length estimate. This used to be done in
7837 pa_adjust_insn_length but we do it here now as some sequences always
7838 fill the delay slot and we can save four bytes in the estimate for
7839 these sequences. */
7842 pa_attr_length_call (rtx_insn *insn, int sibcall)
7844 int local_call;
7845 rtx call, call_dest;
7846 tree call_decl;
7847 int length = 0;
7848 rtx pat = PATTERN (insn);
7849 unsigned long distance = -1;
7851 gcc_assert (CALL_P (insn));
7853 if (INSN_ADDRESSES_SET_P ())
7855 unsigned long total;
7857 total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7858 distance = (total + insn_current_reference_address (insn));
7859 if (distance < total)
7860 distance = -1;
7863 gcc_assert (GET_CODE (pat) == PARALLEL);
7865 /* Get the call rtx. */
7866 call = XVECEXP (pat, 0, 0);
7867 if (GET_CODE (call) == SET)
7868 call = SET_SRC (call);
7870 gcc_assert (GET_CODE (call) == CALL);
7872 /* Determine if this is a local call. */
7873 call_dest = XEXP (XEXP (call, 0), 0);
7874 call_decl = SYMBOL_REF_DECL (call_dest);
7875 local_call = call_decl && targetm.binds_local_p (call_decl);
7877 /* pc-relative branch. */
7878 if (!TARGET_LONG_CALLS
7879 && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7880 || distance < MAX_PCREL17F_OFFSET))
7881 length += 8;
7883 /* 64-bit plabel sequence. */
7884 else if (TARGET_64BIT && !local_call)
7885 length += sibcall ? 28 : 24;
7887 /* non-pic long absolute branch sequence. */
7888 else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7889 length += 12;
7891 /* long pc-relative branch sequence. */
7892 else if (TARGET_LONG_PIC_SDIFF_CALL
7893 || (TARGET_GAS && !TARGET_SOM && local_call))
7895 length += 20;
7897 if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7898 length += 8;
7901 /* 32-bit plabel sequence. */
7902 else
7904 length += 32;
7906 if (TARGET_SOM)
7907 length += length_fp_args (insn);
7909 if (flag_pic)
7910 length += 4;
7912 if (!TARGET_PA_20)
7914 if (!sibcall)
7915 length += 8;
7917 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7918 length += 8;
7922 return length;
7925 /* INSN is a function call.
7927 CALL_DEST is the routine we are calling. */
7929 const char *
7930 pa_output_call (rtx_insn *insn, rtx call_dest, int sibcall)
7932 int seq_length = dbr_sequence_length ();
7933 tree call_decl = SYMBOL_REF_DECL (call_dest);
7934 int local_call = call_decl && targetm.binds_local_p (call_decl);
7935 rtx xoperands[4];
7937 xoperands[0] = call_dest;
7939 /* Handle the common case where we're sure that the branch will reach
7940 the beginning of the "$CODE$" subspace. This is the beginning of
7941 the current function if we are in a named section. */
7942 if (!TARGET_LONG_CALLS && pa_attr_length_call (insn, sibcall) == 8)
7944 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
7945 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7947 else
7949 if (TARGET_64BIT && !local_call)
7951 /* ??? As far as I can tell, the HP linker doesn't support the
7952 long pc-relative sequence described in the 64-bit runtime
7953 architecture. So, we use a slightly longer indirect call. */
7954 xoperands[0] = pa_get_deferred_plabel (call_dest);
7955 xoperands[1] = gen_label_rtx ();
7957 /* If this isn't a sibcall, we put the load of %r27 into the
7958 delay slot. We can't do this in a sibcall as we don't
7959 have a second call-clobbered scratch register available.
7960 We don't need to do anything when generating fast indirect
7961 calls. */
7962 if (seq_length != 0 && !sibcall)
7964 final_scan_insn (NEXT_INSN (insn), asm_out_file,
7965 optimize, 0, NULL);
7967 /* Now delete the delay insn. */
7968 SET_INSN_DELETED (NEXT_INSN (insn));
7969 seq_length = 0;
7972 output_asm_insn ("addil LT'%0,%%r27", xoperands);
7973 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
7974 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
7976 if (sibcall)
7978 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7979 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands);
7980 output_asm_insn ("bve (%%r1)", xoperands);
7982 else
7984 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
7985 output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
7986 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7987 seq_length = 1;
7990 else
7992 int indirect_call = 0;
7994 /* Emit a long call. There are several different sequences
7995 of increasing length and complexity. In most cases,
7996 they don't allow an instruction in the delay slot. */
7997 if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7998 && !TARGET_LONG_PIC_SDIFF_CALL
7999 && !(TARGET_GAS && !TARGET_SOM && local_call)
8000 && !TARGET_64BIT)
8001 indirect_call = 1;
8003 if (seq_length != 0
8004 && !sibcall
8005 && (!TARGET_PA_20
8006 || indirect_call
8007 || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)))
8009 /* A non-jump insn in the delay slot. By definition we can
8010 emit this insn before the call (and in fact before argument
8011 relocating. */
8012 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
8013 NULL);
8015 /* Now delete the delay insn. */
8016 SET_INSN_DELETED (NEXT_INSN (insn));
8017 seq_length = 0;
8020 if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
8022 /* This is the best sequence for making long calls in
8023 non-pic code. Unfortunately, GNU ld doesn't provide
8024 the stub needed for external calls, and GAS's support
8025 for this with the SOM linker is buggy. It is safe
8026 to use this for local calls. */
8027 output_asm_insn ("ldil L'%0,%%r1", xoperands);
8028 if (sibcall)
8029 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
8030 else
8032 if (TARGET_PA_20)
8033 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
8034 xoperands);
8035 else
8036 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
8038 output_asm_insn ("copy %%r31,%%r2", xoperands);
8039 seq_length = 1;
8042 else
8044 /* The HP assembler and linker can handle relocations for
8045 the difference of two symbols. The HP assembler
8046 recognizes the sequence as a pc-relative call and
8047 the linker provides stubs when needed. */
8049 /* GAS currently can't generate the relocations that
8050 are needed for the SOM linker under HP-UX using this
8051 sequence. The GNU linker doesn't generate the stubs
8052 that are needed for external calls on TARGET_ELF32
8053 with this sequence. For now, we have to use a longer
8054 plabel sequence when using GAS for non local calls. */
8055 if (TARGET_LONG_PIC_SDIFF_CALL
8056 || (TARGET_GAS && !TARGET_SOM && local_call))
8058 xoperands[1] = gen_rtx_REG (Pmode, 1);
8059 xoperands[2] = xoperands[1];
8060 pa_output_pic_pcrel_sequence (xoperands);
8062 else
8064 /* Emit a long plabel-based call sequence. This is
8065 essentially an inline implementation of $$dyncall.
8066 We don't actually try to call $$dyncall as this is
8067 as difficult as calling the function itself. */
8068 xoperands[0] = pa_get_deferred_plabel (call_dest);
8069 xoperands[1] = gen_label_rtx ();
8071 /* Since the call is indirect, FP arguments in registers
8072 need to be copied to the general registers. Then, the
8073 argument relocation stub will copy them back. */
8074 if (TARGET_SOM)
8075 copy_fp_args (insn);
8077 if (flag_pic)
8079 output_asm_insn ("addil LT'%0,%%r19", xoperands);
8080 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
8081 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands);
8083 else
8085 output_asm_insn ("addil LR'%0-$global$,%%r27",
8086 xoperands);
8087 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
8088 xoperands);
8091 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands);
8092 output_asm_insn ("depi 0,31,2,%%r1", xoperands);
8093 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands);
8094 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands);
8096 if (!sibcall && !TARGET_PA_20)
8098 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
8099 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8100 output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
8101 else
8102 output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
8106 if (TARGET_PA_20)
8108 if (sibcall)
8109 output_asm_insn ("bve (%%r1)", xoperands);
8110 else
8112 if (indirect_call)
8114 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8115 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
8116 seq_length = 1;
8118 else
8119 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8122 else
8124 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
8125 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
8126 xoperands);
8128 if (sibcall)
8130 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8131 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
8132 else
8133 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
8135 else
8137 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8138 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
8139 else
8140 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
8142 if (indirect_call)
8143 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
8144 else
8145 output_asm_insn ("copy %%r31,%%r2", xoperands);
8146 seq_length = 1;
8153 if (seq_length == 0)
8154 output_asm_insn ("nop", xoperands);
8156 return "";
8159 /* Return the attribute length of the indirect call instruction INSN.
8160 The length must match the code generated by output_indirect call.
8161 The returned length includes the delay slot. Currently, the delay
8162 slot of an indirect call sequence is not exposed and it is used by
8163 the sequence itself. */
8166 pa_attr_length_indirect_call (rtx_insn *insn)
8168 unsigned long distance = -1;
8169 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
8171 if (INSN_ADDRESSES_SET_P ())
8173 distance = (total + insn_current_reference_address (insn));
8174 if (distance < total)
8175 distance = -1;
8178 if (TARGET_64BIT)
8179 return 12;
8181 if (TARGET_FAST_INDIRECT_CALLS)
8182 return 8;
8184 if (TARGET_PORTABLE_RUNTIME)
8185 return 16;
8187 /* Inline version of $$dyncall. */
8188 if ((TARGET_NO_SPACE_REGS || TARGET_PA_20) && !optimize_size)
8189 return 20;
8191 if (!TARGET_LONG_CALLS
8192 && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
8193 || distance < MAX_PCREL17F_OFFSET))
8194 return 8;
8196 /* Out of reach, can use ble. */
8197 if (!flag_pic)
8198 return 12;
8200 /* Inline version of $$dyncall. */
8201 if (TARGET_NO_SPACE_REGS || TARGET_PA_20)
8202 return 20;
8204 if (!optimize_size)
8205 return 36;
8207 /* Long PIC pc-relative call. */
8208 return 20;
8211 const char *
8212 pa_output_indirect_call (rtx_insn *insn, rtx call_dest)
8214 rtx xoperands[4];
8215 int length;
8217 if (TARGET_64BIT)
8219 xoperands[0] = call_dest;
8220 output_asm_insn ("ldd 16(%0),%%r2\n\t"
8221 "bve,l (%%r2),%%r2\n\t"
8222 "ldd 24(%0),%%r27", xoperands);
8223 return "";
8226 /* First the special case for kernels, level 0 systems, etc. */
8227 if (TARGET_FAST_INDIRECT_CALLS)
8229 pa_output_arg_descriptor (insn);
8230 if (TARGET_PA_20)
8231 return "bve,l,n (%%r22),%%r2\n\tnop";
8232 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8235 if (TARGET_PORTABLE_RUNTIME)
8237 output_asm_insn ("ldil L'$$dyncall,%%r31\n\t"
8238 "ldo R'$$dyncall(%%r31),%%r31", xoperands);
8239 pa_output_arg_descriptor (insn);
8240 return "blr %%r0,%%r2\n\tbv,n %%r0(%%r31)";
8243 /* Maybe emit a fast inline version of $$dyncall. */
8244 if ((TARGET_NO_SPACE_REGS || TARGET_PA_20) && !optimize_size)
8246 output_asm_insn ("bb,>=,n %%r22,30,.+12\n\t"
8247 "ldw 2(%%r22),%%r19\n\t"
8248 "ldw -2(%%r22),%%r22", xoperands);
8249 pa_output_arg_descriptor (insn);
8250 if (TARGET_NO_SPACE_REGS)
8252 if (TARGET_PA_20)
8253 return "bve,l,n (%%r22),%%r2\n\tnop";
8254 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8256 return "bve,l (%%r22),%%r2\n\tstw %%r2,-24(%%sp)";
8259 /* Now the normal case -- we can reach $$dyncall directly or
8260 we're sure that we can get there via a long-branch stub.
8262 No need to check target flags as the length uniquely identifies
8263 the remaining cases. */
8264 length = pa_attr_length_indirect_call (insn);
8265 if (length == 8)
8267 pa_output_arg_descriptor (insn);
8269 /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
8270 $$dyncall. Since BLE uses %r31 as the link register, the 22-bit
8271 variant of the B,L instruction can't be used on the SOM target. */
8272 if (TARGET_PA_20 && !TARGET_SOM)
8273 return "b,l,n $$dyncall,%%r2\n\tnop";
8274 else
8275 return "bl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
8278 /* Long millicode call, but we are not generating PIC or portable runtime
8279 code. */
8280 if (length == 12)
8282 output_asm_insn ("ldil L'$$dyncall,%%r2", xoperands);
8283 pa_output_arg_descriptor (insn);
8284 return "ble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
8287 /* Maybe emit a fast inline version of $$dyncall. The long PIC
8288 pc-relative call sequence is five instructions. The inline PA 2.0
8289 version of $$dyncall is also five instructions. The PA 1.X versions
8290 are longer but still an overall win. */
8291 if (TARGET_NO_SPACE_REGS || TARGET_PA_20 || !optimize_size)
8293 output_asm_insn ("bb,>=,n %%r22,30,.+12\n\t"
8294 "ldw 2(%%r22),%%r19\n\t"
8295 "ldw -2(%%r22),%%r22", xoperands);
8296 if (TARGET_NO_SPACE_REGS)
8298 pa_output_arg_descriptor (insn);
8299 if (TARGET_PA_20)
8300 return "bve,l,n (%%r22),%%r2\n\tnop";
8301 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8303 if (TARGET_PA_20)
8305 pa_output_arg_descriptor (insn);
8306 return "bve,l (%%r22),%%r2\n\tstw %%r2,-24(%%sp)";
8308 output_asm_insn ("bl .+8,%%r2\n\t"
8309 "ldo 16(%%r2),%%r2\n\t"
8310 "ldsid (%%r22),%%r1\n\t"
8311 "mtsp %%r1,%%sr0", xoperands);
8312 pa_output_arg_descriptor (insn);
8313 return "be 0(%%sr0,%%r22)\n\tstw %%r2,-24(%%sp)";
8316 /* We need a long PIC call to $$dyncall. */
8317 xoperands[0] = gen_rtx_SYMBOL_REF (Pmode, "$$dyncall");
8318 xoperands[1] = gen_rtx_REG (Pmode, 2);
8319 xoperands[2] = gen_rtx_REG (Pmode, 1);
8320 pa_output_pic_pcrel_sequence (xoperands);
8321 pa_output_arg_descriptor (insn);
8322 return "bv %%r0(%%r1)\n\tldo {12|20}(%%r2),%%r2";
8325 /* In HPUX 8.0's shared library scheme, special relocations are needed
8326 for function labels if they might be passed to a function
8327 in a shared library (because shared libraries don't live in code
8328 space), and special magic is needed to construct their address. */
8330 void
8331 pa_encode_label (rtx sym)
8333 const char *str = XSTR (sym, 0);
8334 int len = strlen (str) + 1;
8335 char *newstr, *p;
8337 p = newstr = XALLOCAVEC (char, len + 1);
8338 *p++ = '@';
8339 strcpy (p, str);
8341 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
8344 static void
8345 pa_encode_section_info (tree decl, rtx rtl, int first)
8347 int old_referenced = 0;
8349 if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF)
8350 old_referenced
8351 = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED;
8353 default_encode_section_info (decl, rtl, first);
8355 if (first && TEXT_SPACE_P (decl))
8357 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
8358 if (TREE_CODE (decl) == FUNCTION_DECL)
8359 pa_encode_label (XEXP (rtl, 0));
8361 else if (old_referenced)
8362 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced;
8365 /* This is sort of inverse to pa_encode_section_info. */
8367 static const char *
8368 pa_strip_name_encoding (const char *str)
8370 str += (*str == '@');
8371 str += (*str == '*');
8372 return str;
8375 /* Returns 1 if OP is a function label involved in a simple addition
8376 with a constant. Used to keep certain patterns from matching
8377 during instruction combination. */
8379 pa_is_function_label_plus_const (rtx op)
8381 /* Strip off any CONST. */
8382 if (GET_CODE (op) == CONST)
8383 op = XEXP (op, 0);
8385 return (GET_CODE (op) == PLUS
8386 && function_label_operand (XEXP (op, 0), VOIDmode)
8387 && GET_CODE (XEXP (op, 1)) == CONST_INT);
8390 /* Output assembly code for a thunk to FUNCTION. */
8392 static void
8393 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
8394 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
8395 tree function)
8397 static unsigned int current_thunk_number;
8398 int val_14 = VAL_14_BITS_P (delta);
8399 unsigned int old_last_address = last_address, nbytes = 0;
8400 char label[17];
8401 rtx xoperands[4];
8403 xoperands[0] = XEXP (DECL_RTL (function), 0);
8404 xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
8405 xoperands[2] = GEN_INT (delta);
8407 final_start_function (emit_barrier (), file, 1);
8409 /* Output the thunk. We know that the function is in the same
8410 translation unit (i.e., the same space) as the thunk, and that
8411 thunks are output after their method. Thus, we don't need an
8412 external branch to reach the function. With SOM and GAS,
8413 functions and thunks are effectively in different sections.
8414 Thus, we can always use a IA-relative branch and the linker
8415 will add a long branch stub if necessary.
8417 However, we have to be careful when generating PIC code on the
8418 SOM port to ensure that the sequence does not transfer to an
8419 import stub for the target function as this could clobber the
8420 return value saved at SP-24. This would also apply to the
8421 32-bit linux port if the multi-space model is implemented. */
8422 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8423 && !(flag_pic && TREE_PUBLIC (function))
8424 && (TARGET_GAS || last_address < 262132))
8425 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8426 && ((targetm_common.have_named_sections
8427 && DECL_SECTION_NAME (thunk_fndecl) != NULL
8428 /* The GNU 64-bit linker has rather poor stub management.
8429 So, we use a long branch from thunks that aren't in
8430 the same section as the target function. */
8431 && ((!TARGET_64BIT
8432 && (DECL_SECTION_NAME (thunk_fndecl)
8433 != DECL_SECTION_NAME (function)))
8434 || ((DECL_SECTION_NAME (thunk_fndecl)
8435 == DECL_SECTION_NAME (function))
8436 && last_address < 262132)))
8437 /* In this case, we need to be able to reach the start of
8438 the stub table even though the function is likely closer
8439 and can be jumped to directly. */
8440 || (targetm_common.have_named_sections
8441 && DECL_SECTION_NAME (thunk_fndecl) == NULL
8442 && DECL_SECTION_NAME (function) == NULL
8443 && total_code_bytes < MAX_PCREL17F_OFFSET)
8444 /* Likewise. */
8445 || (!targetm_common.have_named_sections
8446 && total_code_bytes < MAX_PCREL17F_OFFSET))))
8448 if (!val_14)
8449 output_asm_insn ("addil L'%2,%%r26", xoperands);
8451 output_asm_insn ("b %0", xoperands);
8453 if (val_14)
8455 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8456 nbytes += 8;
8458 else
8460 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8461 nbytes += 12;
8464 else if (TARGET_64BIT)
8466 rtx xop[4];
8468 /* We only have one call-clobbered scratch register, so we can't
8469 make use of the delay slot if delta doesn't fit in 14 bits. */
8470 if (!val_14)
8472 output_asm_insn ("addil L'%2,%%r26", xoperands);
8473 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8476 /* Load function address into %r1. */
8477 xop[0] = xoperands[0];
8478 xop[1] = gen_rtx_REG (Pmode, 1);
8479 xop[2] = xop[1];
8480 pa_output_pic_pcrel_sequence (xop);
8482 if (val_14)
8484 output_asm_insn ("bv %%r0(%%r1)", xoperands);
8485 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8486 nbytes += 20;
8488 else
8490 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8491 nbytes += 24;
8494 else if (TARGET_PORTABLE_RUNTIME)
8496 output_asm_insn ("ldil L'%0,%%r1", xoperands);
8497 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
8499 if (!val_14)
8500 output_asm_insn ("ldil L'%2,%%r26", xoperands);
8502 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8504 if (val_14)
8506 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8507 nbytes += 16;
8509 else
8511 output_asm_insn ("ldo R'%2(%%r26),%%r26", xoperands);
8512 nbytes += 20;
8515 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8517 /* The function is accessible from outside this module. The only
8518 way to avoid an import stub between the thunk and function is to
8519 call the function directly with an indirect sequence similar to
8520 that used by $$dyncall. This is possible because $$dyncall acts
8521 as the import stub in an indirect call. */
8522 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8523 xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8524 output_asm_insn ("addil LT'%3,%%r19", xoperands);
8525 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8526 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8527 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8528 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8529 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8530 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8532 if (!val_14)
8534 output_asm_insn ("addil L'%2,%%r26", xoperands);
8535 nbytes += 4;
8538 if (TARGET_PA_20)
8540 output_asm_insn ("bve (%%r22)", xoperands);
8541 nbytes += 36;
8543 else if (TARGET_NO_SPACE_REGS)
8545 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
8546 nbytes += 36;
8548 else
8550 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8551 output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8552 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
8553 nbytes += 44;
8556 if (val_14)
8557 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8558 else
8559 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8561 else if (flag_pic)
8563 rtx xop[4];
8565 /* Load function address into %r22. */
8566 xop[0] = xoperands[0];
8567 xop[1] = gen_rtx_REG (Pmode, 1);
8568 xop[2] = gen_rtx_REG (Pmode, 22);
8569 pa_output_pic_pcrel_sequence (xop);
8571 if (!val_14)
8572 output_asm_insn ("addil L'%2,%%r26", xoperands);
8574 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8576 if (val_14)
8578 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8579 nbytes += 20;
8581 else
8583 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8584 nbytes += 24;
8587 else
8589 if (!val_14)
8590 output_asm_insn ("addil L'%2,%%r26", xoperands);
8592 output_asm_insn ("ldil L'%0,%%r22", xoperands);
8593 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8595 if (val_14)
8597 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8598 nbytes += 12;
8600 else
8602 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8603 nbytes += 16;
8607 final_end_function ();
8609 if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8611 switch_to_section (data_section);
8612 output_asm_insn (".align 4", xoperands);
8613 ASM_OUTPUT_LABEL (file, label);
8614 output_asm_insn (".word P'%0", xoperands);
8617 current_thunk_number++;
8618 nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
8619 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
8620 last_address += nbytes;
8621 if (old_last_address > last_address)
8622 last_address = UINT_MAX;
8623 update_total_code_bytes (nbytes);
8626 /* Only direct calls to static functions are allowed to be sibling (tail)
8627 call optimized.
8629 This restriction is necessary because some linker generated stubs will
8630 store return pointers into rp' in some cases which might clobber a
8631 live value already in rp'.
8633 In a sibcall the current function and the target function share stack
8634 space. Thus if the path to the current function and the path to the
8635 target function save a value in rp', they save the value into the
8636 same stack slot, which has undesirable consequences.
8638 Because of the deferred binding nature of shared libraries any function
8639 with external scope could be in a different load module and thus require
8640 rp' to be saved when calling that function. So sibcall optimizations
8641 can only be safe for static function.
8643 Note that GCC never needs return value relocations, so we don't have to
8644 worry about static calls with return value relocations (which require
8645 saving rp').
8647 It is safe to perform a sibcall optimization when the target function
8648 will never return. */
8649 static bool
8650 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8652 if (TARGET_PORTABLE_RUNTIME)
8653 return false;
8655 /* Sibcalls are not ok because the arg pointer register is not a fixed
8656 register. This prevents the sibcall optimization from occurring. In
8657 addition, there are problems with stub placement using GNU ld. This
8658 is because a normal sibcall branch uses a 17-bit relocation while
8659 a regular call branch uses a 22-bit relocation. As a result, more
8660 care needs to be taken in the placement of long-branch stubs. */
8661 if (TARGET_64BIT)
8662 return false;
8664 /* Sibcalls are only ok within a translation unit. */
8665 return (decl && !TREE_PUBLIC (decl));
8668 /* ??? Addition is not commutative on the PA due to the weird implicit
8669 space register selection rules for memory addresses. Therefore, we
8670 don't consider a + b == b + a, as this might be inside a MEM. */
8671 static bool
8672 pa_commutative_p (const_rtx x, int outer_code)
8674 return (COMMUTATIVE_P (x)
8675 && (TARGET_NO_SPACE_REGS
8676 || (outer_code != UNKNOWN && outer_code != MEM)
8677 || GET_CODE (x) != PLUS));
8680 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8681 use in fmpyadd instructions. */
8683 pa_fmpyaddoperands (rtx *operands)
8685 machine_mode mode = GET_MODE (operands[0]);
8687 /* Must be a floating point mode. */
8688 if (mode != SFmode && mode != DFmode)
8689 return 0;
8691 /* All modes must be the same. */
8692 if (! (mode == GET_MODE (operands[1])
8693 && mode == GET_MODE (operands[2])
8694 && mode == GET_MODE (operands[3])
8695 && mode == GET_MODE (operands[4])
8696 && mode == GET_MODE (operands[5])))
8697 return 0;
8699 /* All operands must be registers. */
8700 if (! (GET_CODE (operands[1]) == REG
8701 && GET_CODE (operands[2]) == REG
8702 && GET_CODE (operands[3]) == REG
8703 && GET_CODE (operands[4]) == REG
8704 && GET_CODE (operands[5]) == REG))
8705 return 0;
8707 /* Only 2 real operands to the addition. One of the input operands must
8708 be the same as the output operand. */
8709 if (! rtx_equal_p (operands[3], operands[4])
8710 && ! rtx_equal_p (operands[3], operands[5]))
8711 return 0;
8713 /* Inout operand of add cannot conflict with any operands from multiply. */
8714 if (rtx_equal_p (operands[3], operands[0])
8715 || rtx_equal_p (operands[3], operands[1])
8716 || rtx_equal_p (operands[3], operands[2]))
8717 return 0;
8719 /* multiply cannot feed into addition operands. */
8720 if (rtx_equal_p (operands[4], operands[0])
8721 || rtx_equal_p (operands[5], operands[0]))
8722 return 0;
8724 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8725 if (mode == SFmode
8726 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8727 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8728 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8729 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8730 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8731 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8732 return 0;
8734 /* Passed. Operands are suitable for fmpyadd. */
8735 return 1;
8738 #if !defined(USE_COLLECT2)
8739 static void
8740 pa_asm_out_constructor (rtx symbol, int priority)
8742 if (!function_label_operand (symbol, VOIDmode))
8743 pa_encode_label (symbol);
8745 #ifdef CTORS_SECTION_ASM_OP
8746 default_ctor_section_asm_out_constructor (symbol, priority);
8747 #else
8748 # ifdef TARGET_ASM_NAMED_SECTION
8749 default_named_section_asm_out_constructor (symbol, priority);
8750 # else
8751 default_stabs_asm_out_constructor (symbol, priority);
8752 # endif
8753 #endif
8756 static void
8757 pa_asm_out_destructor (rtx symbol, int priority)
8759 if (!function_label_operand (symbol, VOIDmode))
8760 pa_encode_label (symbol);
8762 #ifdef DTORS_SECTION_ASM_OP
8763 default_dtor_section_asm_out_destructor (symbol, priority);
8764 #else
8765 # ifdef TARGET_ASM_NAMED_SECTION
8766 default_named_section_asm_out_destructor (symbol, priority);
8767 # else
8768 default_stabs_asm_out_destructor (symbol, priority);
8769 # endif
8770 #endif
8772 #endif
8774 /* This function places uninitialized global data in the bss section.
8775 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
8776 function on the SOM port to prevent uninitialized global data from
8777 being placed in the data section. */
8779 void
8780 pa_asm_output_aligned_bss (FILE *stream,
8781 const char *name,
8782 unsigned HOST_WIDE_INT size,
8783 unsigned int align)
8785 switch_to_section (bss_section);
8786 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8788 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
8789 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
8790 #endif
8792 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
8793 ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
8794 #endif
8796 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8797 ASM_OUTPUT_LABEL (stream, name);
8798 fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8801 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
8802 that doesn't allow the alignment of global common storage to be directly
8803 specified. The SOM linker aligns common storage based on the rounded
8804 value of the NUM_BYTES parameter in the .comm directive. It's not
8805 possible to use the .align directive as it doesn't affect the alignment
8806 of the label associated with a .comm directive. */
8808 void
8809 pa_asm_output_aligned_common (FILE *stream,
8810 const char *name,
8811 unsigned HOST_WIDE_INT size,
8812 unsigned int align)
8814 unsigned int max_common_align;
8816 max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
8817 if (align > max_common_align)
8819 warning (0, "alignment (%u) for %s exceeds maximum alignment "
8820 "for global common data. Using %u",
8821 align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT);
8822 align = max_common_align;
8825 switch_to_section (bss_section);
8827 assemble_name (stream, name);
8828 fprintf (stream, "\t.comm " HOST_WIDE_INT_PRINT_UNSIGNED"\n",
8829 MAX (size, align / BITS_PER_UNIT));
8832 /* We can't use .comm for local common storage as the SOM linker effectively
8833 treats the symbol as universal and uses the same storage for local symbols
8834 with the same name in different object files. The .block directive
8835 reserves an uninitialized block of storage. However, it's not common
8836 storage. Fortunately, GCC never requests common storage with the same
8837 name in any given translation unit. */
8839 void
8840 pa_asm_output_aligned_local (FILE *stream,
8841 const char *name,
8842 unsigned HOST_WIDE_INT size,
8843 unsigned int align)
8845 switch_to_section (bss_section);
8846 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8848 #ifdef LOCAL_ASM_OP
8849 fprintf (stream, "%s", LOCAL_ASM_OP);
8850 assemble_name (stream, name);
8851 fprintf (stream, "\n");
8852 #endif
8854 ASM_OUTPUT_LABEL (stream, name);
8855 fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8858 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8859 use in fmpysub instructions. */
8861 pa_fmpysuboperands (rtx *operands)
8863 machine_mode mode = GET_MODE (operands[0]);
8865 /* Must be a floating point mode. */
8866 if (mode != SFmode && mode != DFmode)
8867 return 0;
8869 /* All modes must be the same. */
8870 if (! (mode == GET_MODE (operands[1])
8871 && mode == GET_MODE (operands[2])
8872 && mode == GET_MODE (operands[3])
8873 && mode == GET_MODE (operands[4])
8874 && mode == GET_MODE (operands[5])))
8875 return 0;
8877 /* All operands must be registers. */
8878 if (! (GET_CODE (operands[1]) == REG
8879 && GET_CODE (operands[2]) == REG
8880 && GET_CODE (operands[3]) == REG
8881 && GET_CODE (operands[4]) == REG
8882 && GET_CODE (operands[5]) == REG))
8883 return 0;
8885 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
8886 operation, so operands[4] must be the same as operand[3]. */
8887 if (! rtx_equal_p (operands[3], operands[4]))
8888 return 0;
8890 /* multiply cannot feed into subtraction. */
8891 if (rtx_equal_p (operands[5], operands[0]))
8892 return 0;
8894 /* Inout operand of sub cannot conflict with any operands from multiply. */
8895 if (rtx_equal_p (operands[3], operands[0])
8896 || rtx_equal_p (operands[3], operands[1])
8897 || rtx_equal_p (operands[3], operands[2]))
8898 return 0;
8900 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8901 if (mode == SFmode
8902 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8903 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8904 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8905 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8906 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8907 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8908 return 0;
8910 /* Passed. Operands are suitable for fmpysub. */
8911 return 1;
8914 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
8915 constants for a MULT embedded inside a memory address. */
8917 pa_mem_shadd_constant_p (int val)
8919 if (val == 2 || val == 4 || val == 8)
8920 return 1;
8921 else
8922 return 0;
8925 /* Return 1 if the given constant is 1, 2, or 3. These are the valid
8926 constants for shadd instructions. */
8928 pa_shadd_constant_p (int val)
8930 if (val == 1 || val == 2 || val == 3)
8931 return 1;
8932 else
8933 return 0;
8936 /* Return TRUE if INSN branches forward. */
8938 static bool
8939 forward_branch_p (rtx_insn *insn)
8941 rtx lab = JUMP_LABEL (insn);
8943 /* The INSN must have a jump label. */
8944 gcc_assert (lab != NULL_RTX);
8946 if (INSN_ADDRESSES_SET_P ())
8947 return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn));
8949 while (insn)
8951 if (insn == lab)
8952 return true;
8953 else
8954 insn = NEXT_INSN (insn);
8957 return false;
8960 /* Output an unconditional move and branch insn. */
8962 const char *
8963 pa_output_parallel_movb (rtx *operands, rtx_insn *insn)
8965 int length = get_attr_length (insn);
8967 /* These are the cases in which we win. */
8968 if (length == 4)
8969 return "mov%I1b,tr %1,%0,%2";
8971 /* None of the following cases win, but they don't lose either. */
8972 if (length == 8)
8974 if (dbr_sequence_length () == 0)
8976 /* Nothing in the delay slot, fake it by putting the combined
8977 insn (the copy or add) in the delay slot of a bl. */
8978 if (GET_CODE (operands[1]) == CONST_INT)
8979 return "b %2\n\tldi %1,%0";
8980 else
8981 return "b %2\n\tcopy %1,%0";
8983 else
8985 /* Something in the delay slot, but we've got a long branch. */
8986 if (GET_CODE (operands[1]) == CONST_INT)
8987 return "ldi %1,%0\n\tb %2";
8988 else
8989 return "copy %1,%0\n\tb %2";
8993 if (GET_CODE (operands[1]) == CONST_INT)
8994 output_asm_insn ("ldi %1,%0", operands);
8995 else
8996 output_asm_insn ("copy %1,%0", operands);
8997 return pa_output_lbranch (operands[2], insn, 1);
9000 /* Output an unconditional add and branch insn. */
9002 const char *
9003 pa_output_parallel_addb (rtx *operands, rtx_insn *insn)
9005 int length = get_attr_length (insn);
9007 /* To make life easy we want operand0 to be the shared input/output
9008 operand and operand1 to be the readonly operand. */
9009 if (operands[0] == operands[1])
9010 operands[1] = operands[2];
9012 /* These are the cases in which we win. */
9013 if (length == 4)
9014 return "add%I1b,tr %1,%0,%3";
9016 /* None of the following cases win, but they don't lose either. */
9017 if (length == 8)
9019 if (dbr_sequence_length () == 0)
9020 /* Nothing in the delay slot, fake it by putting the combined
9021 insn (the copy or add) in the delay slot of a bl. */
9022 return "b %3\n\tadd%I1 %1,%0,%0";
9023 else
9024 /* Something in the delay slot, but we've got a long branch. */
9025 return "add%I1 %1,%0,%0\n\tb %3";
9028 output_asm_insn ("add%I1 %1,%0,%0", operands);
9029 return pa_output_lbranch (operands[3], insn, 1);
9032 /* We use this hook to perform a PA specific optimization which is difficult
9033 to do in earlier passes. */
9035 static void
9036 pa_reorg (void)
9038 remove_useless_addtr_insns (1);
9040 if (pa_cpu < PROCESSOR_8000)
9041 pa_combine_instructions ();
9044 /* The PA has a number of odd instructions which can perform multiple
9045 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
9046 it may be profitable to combine two instructions into one instruction
9047 with two outputs. It's not profitable PA2.0 machines because the
9048 two outputs would take two slots in the reorder buffers.
9050 This routine finds instructions which can be combined and combines
9051 them. We only support some of the potential combinations, and we
9052 only try common ways to find suitable instructions.
9054 * addb can add two registers or a register and a small integer
9055 and jump to a nearby (+-8k) location. Normally the jump to the
9056 nearby location is conditional on the result of the add, but by
9057 using the "true" condition we can make the jump unconditional.
9058 Thus addb can perform two independent operations in one insn.
9060 * movb is similar to addb in that it can perform a reg->reg
9061 or small immediate->reg copy and jump to a nearby (+-8k location).
9063 * fmpyadd and fmpysub can perform a FP multiply and either an
9064 FP add or FP sub if the operands of the multiply and add/sub are
9065 independent (there are other minor restrictions). Note both
9066 the fmpy and fadd/fsub can in theory move to better spots according
9067 to data dependencies, but for now we require the fmpy stay at a
9068 fixed location.
9070 * Many of the memory operations can perform pre & post updates
9071 of index registers. GCC's pre/post increment/decrement addressing
9072 is far too simple to take advantage of all the possibilities. This
9073 pass may not be suitable since those insns may not be independent.
9075 * comclr can compare two ints or an int and a register, nullify
9076 the following instruction and zero some other register. This
9077 is more difficult to use as it's harder to find an insn which
9078 will generate a comclr than finding something like an unconditional
9079 branch. (conditional moves & long branches create comclr insns).
9081 * Most arithmetic operations can conditionally skip the next
9082 instruction. They can be viewed as "perform this operation
9083 and conditionally jump to this nearby location" (where nearby
9084 is an insns away). These are difficult to use due to the
9085 branch length restrictions. */
9087 static void
9088 pa_combine_instructions (void)
9090 rtx_insn *anchor;
9092 /* This can get expensive since the basic algorithm is on the
9093 order of O(n^2) (or worse). Only do it for -O2 or higher
9094 levels of optimization. */
9095 if (optimize < 2)
9096 return;
9098 /* Walk down the list of insns looking for "anchor" insns which
9099 may be combined with "floating" insns. As the name implies,
9100 "anchor" instructions don't move, while "floating" insns may
9101 move around. */
9102 rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
9103 rtx_insn *new_rtx = make_insn_raw (par);
9105 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
9107 enum attr_pa_combine_type anchor_attr;
9108 enum attr_pa_combine_type floater_attr;
9110 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
9111 Also ignore any special USE insns. */
9112 if ((! NONJUMP_INSN_P (anchor) && ! JUMP_P (anchor) && ! CALL_P (anchor))
9113 || GET_CODE (PATTERN (anchor)) == USE
9114 || GET_CODE (PATTERN (anchor)) == CLOBBER)
9115 continue;
9117 anchor_attr = get_attr_pa_combine_type (anchor);
9118 /* See if anchor is an insn suitable for combination. */
9119 if (anchor_attr == PA_COMBINE_TYPE_FMPY
9120 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
9121 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9122 && ! forward_branch_p (anchor)))
9124 rtx_insn *floater;
9126 for (floater = PREV_INSN (anchor);
9127 floater;
9128 floater = PREV_INSN (floater))
9130 if (NOTE_P (floater)
9131 || (NONJUMP_INSN_P (floater)
9132 && (GET_CODE (PATTERN (floater)) == USE
9133 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9134 continue;
9136 /* Anything except a regular INSN will stop our search. */
9137 if (! NONJUMP_INSN_P (floater))
9139 floater = NULL;
9140 break;
9143 /* See if FLOATER is suitable for combination with the
9144 anchor. */
9145 floater_attr = get_attr_pa_combine_type (floater);
9146 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9147 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9148 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9149 && floater_attr == PA_COMBINE_TYPE_FMPY))
9151 /* If ANCHOR and FLOATER can be combined, then we're
9152 done with this pass. */
9153 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9154 SET_DEST (PATTERN (floater)),
9155 XEXP (SET_SRC (PATTERN (floater)), 0),
9156 XEXP (SET_SRC (PATTERN (floater)), 1)))
9157 break;
9160 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9161 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
9163 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
9165 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9166 SET_DEST (PATTERN (floater)),
9167 XEXP (SET_SRC (PATTERN (floater)), 0),
9168 XEXP (SET_SRC (PATTERN (floater)), 1)))
9169 break;
9171 else
9173 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9174 SET_DEST (PATTERN (floater)),
9175 SET_SRC (PATTERN (floater)),
9176 SET_SRC (PATTERN (floater))))
9177 break;
9182 /* If we didn't find anything on the backwards scan try forwards. */
9183 if (!floater
9184 && (anchor_attr == PA_COMBINE_TYPE_FMPY
9185 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
9187 for (floater = anchor; floater; floater = NEXT_INSN (floater))
9189 if (NOTE_P (floater)
9190 || (NONJUMP_INSN_P (floater)
9191 && (GET_CODE (PATTERN (floater)) == USE
9192 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9194 continue;
9196 /* Anything except a regular INSN will stop our search. */
9197 if (! NONJUMP_INSN_P (floater))
9199 floater = NULL;
9200 break;
9203 /* See if FLOATER is suitable for combination with the
9204 anchor. */
9205 floater_attr = get_attr_pa_combine_type (floater);
9206 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9207 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9208 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9209 && floater_attr == PA_COMBINE_TYPE_FMPY))
9211 /* If ANCHOR and FLOATER can be combined, then we're
9212 done with this pass. */
9213 if (pa_can_combine_p (new_rtx, anchor, floater, 1,
9214 SET_DEST (PATTERN (floater)),
9215 XEXP (SET_SRC (PATTERN (floater)),
9217 XEXP (SET_SRC (PATTERN (floater)),
9218 1)))
9219 break;
9224 /* FLOATER will be nonzero if we found a suitable floating
9225 insn for combination with ANCHOR. */
9226 if (floater
9227 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9228 || anchor_attr == PA_COMBINE_TYPE_FMPY))
9230 /* Emit the new instruction and delete the old anchor. */
9231 rtvec vtemp = gen_rtvec (2, copy_rtx (PATTERN (anchor)),
9232 copy_rtx (PATTERN (floater)));
9233 rtx temp = gen_rtx_PARALLEL (VOIDmode, vtemp);
9234 emit_insn_before (temp, anchor);
9236 SET_INSN_DELETED (anchor);
9238 /* Emit a special USE insn for FLOATER, then delete
9239 the floating insn. */
9240 temp = copy_rtx (PATTERN (floater));
9241 emit_insn_before (gen_rtx_USE (VOIDmode, temp), floater);
9242 delete_insn (floater);
9244 continue;
9246 else if (floater
9247 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
9249 /* Emit the new_jump instruction and delete the old anchor. */
9250 rtvec vtemp = gen_rtvec (2, copy_rtx (PATTERN (anchor)),
9251 copy_rtx (PATTERN (floater)));
9252 rtx temp = gen_rtx_PARALLEL (VOIDmode, vtemp);
9253 temp = emit_jump_insn_before (temp, anchor);
9255 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
9256 SET_INSN_DELETED (anchor);
9258 /* Emit a special USE insn for FLOATER, then delete
9259 the floating insn. */
9260 temp = copy_rtx (PATTERN (floater));
9261 emit_insn_before (gen_rtx_USE (VOIDmode, temp), floater);
9262 delete_insn (floater);
9263 continue;
9269 static int
9270 pa_can_combine_p (rtx_insn *new_rtx, rtx_insn *anchor, rtx_insn *floater,
9271 int reversed, rtx dest,
9272 rtx src1, rtx src2)
9274 int insn_code_number;
9275 rtx_insn *start, *end;
9277 /* Create a PARALLEL with the patterns of ANCHOR and
9278 FLOATER, try to recognize it, then test constraints
9279 for the resulting pattern.
9281 If the pattern doesn't match or the constraints
9282 aren't met keep searching for a suitable floater
9283 insn. */
9284 XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor);
9285 XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater);
9286 INSN_CODE (new_rtx) = -1;
9287 insn_code_number = recog_memoized (new_rtx);
9288 basic_block bb = BLOCK_FOR_INSN (anchor);
9289 if (insn_code_number < 0
9290 || (extract_insn (new_rtx),
9291 !constrain_operands (1, get_preferred_alternatives (new_rtx, bb))))
9292 return 0;
9294 if (reversed)
9296 start = anchor;
9297 end = floater;
9299 else
9301 start = floater;
9302 end = anchor;
9305 /* There's up to three operands to consider. One
9306 output and two inputs.
9308 The output must not be used between FLOATER & ANCHOR
9309 exclusive. The inputs must not be set between
9310 FLOATER and ANCHOR exclusive. */
9312 if (reg_used_between_p (dest, start, end))
9313 return 0;
9315 if (reg_set_between_p (src1, start, end))
9316 return 0;
9318 if (reg_set_between_p (src2, start, end))
9319 return 0;
9321 /* If we get here, then everything is good. */
9322 return 1;
9325 /* Return nonzero if references for INSN are delayed.
9327 Millicode insns are actually function calls with some special
9328 constraints on arguments and register usage.
9330 Millicode calls always expect their arguments in the integer argument
9331 registers, and always return their result in %r29 (ret1). They
9332 are expected to clobber their arguments, %r1, %r29, and the return
9333 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9335 This function tells reorg that the references to arguments and
9336 millicode calls do not appear to happen until after the millicode call.
9337 This allows reorg to put insns which set the argument registers into the
9338 delay slot of the millicode call -- thus they act more like traditional
9339 CALL_INSNs.
9341 Note we cannot consider side effects of the insn to be delayed because
9342 the branch and link insn will clobber the return pointer. If we happened
9343 to use the return pointer in the delay slot of the call, then we lose.
9345 get_attr_type will try to recognize the given insn, so make sure to
9346 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9347 in particular. */
9349 pa_insn_refs_are_delayed (rtx_insn *insn)
9351 return ((NONJUMP_INSN_P (insn)
9352 && GET_CODE (PATTERN (insn)) != SEQUENCE
9353 && GET_CODE (PATTERN (insn)) != USE
9354 && GET_CODE (PATTERN (insn)) != CLOBBER
9355 && get_attr_type (insn) == TYPE_MILLI));
9358 /* Promote the return value, but not the arguments. */
9360 static machine_mode
9361 pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9362 machine_mode mode,
9363 int *punsignedp ATTRIBUTE_UNUSED,
9364 const_tree fntype ATTRIBUTE_UNUSED,
9365 int for_return)
9367 if (for_return == 0)
9368 return mode;
9369 return promote_mode (type, mode, punsignedp);
9372 /* On the HP-PA the value is found in register(s) 28(-29), unless
9373 the mode is SF or DF. Then the value is returned in fr4 (32).
9375 This must perform the same promotions as PROMOTE_MODE, else promoting
9376 return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
9378 Small structures must be returned in a PARALLEL on PA64 in order
9379 to match the HP Compiler ABI. */
9381 static rtx
9382 pa_function_value (const_tree valtype,
9383 const_tree func ATTRIBUTE_UNUSED,
9384 bool outgoing ATTRIBUTE_UNUSED)
9386 machine_mode valmode;
9388 if (AGGREGATE_TYPE_P (valtype)
9389 || TREE_CODE (valtype) == COMPLEX_TYPE
9390 || TREE_CODE (valtype) == VECTOR_TYPE)
9392 HOST_WIDE_INT valsize = int_size_in_bytes (valtype);
9394 /* Handle aggregates that fit exactly in a word or double word. */
9395 if ((valsize & (UNITS_PER_WORD - 1)) == 0)
9396 return gen_rtx_REG (TYPE_MODE (valtype), 28);
9398 if (TARGET_64BIT)
9400 /* Aggregates with a size less than or equal to 128 bits are
9401 returned in GR 28(-29). They are left justified. The pad
9402 bits are undefined. Larger aggregates are returned in
9403 memory. */
9404 rtx loc[2];
9405 int i, offset = 0;
9406 int ub = valsize <= UNITS_PER_WORD ? 1 : 2;
9408 for (i = 0; i < ub; i++)
9410 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9411 gen_rtx_REG (DImode, 28 + i),
9412 GEN_INT (offset));
9413 offset += 8;
9416 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
9418 else if (valsize > UNITS_PER_WORD)
9420 /* Aggregates 5 to 8 bytes in size are returned in general
9421 registers r28-r29 in the same manner as other non
9422 floating-point objects. The data is right-justified and
9423 zero-extended to 64 bits. This is opposite to the normal
9424 justification used on big endian targets and requires
9425 special treatment. */
9426 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9427 gen_rtx_REG (DImode, 28), const0_rtx);
9428 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9432 if ((INTEGRAL_TYPE_P (valtype)
9433 && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD)
9434 || POINTER_TYPE_P (valtype))
9435 valmode = word_mode;
9436 else
9437 valmode = TYPE_MODE (valtype);
9439 if (TREE_CODE (valtype) == REAL_TYPE
9440 && !AGGREGATE_TYPE_P (valtype)
9441 && TYPE_MODE (valtype) != TFmode
9442 && !TARGET_SOFT_FLOAT)
9443 return gen_rtx_REG (valmode, 32);
9445 return gen_rtx_REG (valmode, 28);
9448 /* Implement the TARGET_LIBCALL_VALUE hook. */
9450 static rtx
9451 pa_libcall_value (machine_mode mode,
9452 const_rtx fun ATTRIBUTE_UNUSED)
9454 if (! TARGET_SOFT_FLOAT
9455 && (mode == SFmode || mode == DFmode))
9456 return gen_rtx_REG (mode, 32);
9457 else
9458 return gen_rtx_REG (mode, 28);
9461 /* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook. */
9463 static bool
9464 pa_function_value_regno_p (const unsigned int regno)
9466 if (regno == 28
9467 || (! TARGET_SOFT_FLOAT && regno == 32))
9468 return true;
9470 return false;
9473 /* Update the data in CUM to advance over an argument
9474 of mode MODE and data type TYPE.
9475 (TYPE is null for libcalls where that information may not be available.) */
9477 static void
9478 pa_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
9479 const_tree type, bool named ATTRIBUTE_UNUSED)
9481 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9482 int arg_size = FUNCTION_ARG_SIZE (mode, type);
9484 cum->nargs_prototype--;
9485 cum->words += (arg_size
9486 + ((cum->words & 01)
9487 && type != NULL_TREE
9488 && arg_size > 1));
9491 /* Return the location of a parameter that is passed in a register or NULL
9492 if the parameter has any component that is passed in memory.
9494 This is new code and will be pushed to into the net sources after
9495 further testing.
9497 ??? We might want to restructure this so that it looks more like other
9498 ports. */
9499 static rtx
9500 pa_function_arg (cumulative_args_t cum_v, machine_mode mode,
9501 const_tree type, bool named ATTRIBUTE_UNUSED)
9503 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9504 int max_arg_words = (TARGET_64BIT ? 8 : 4);
9505 int alignment = 0;
9506 int arg_size;
9507 int fpr_reg_base;
9508 int gpr_reg_base;
9509 rtx retval;
9511 if (mode == VOIDmode)
9512 return NULL_RTX;
9514 arg_size = FUNCTION_ARG_SIZE (mode, type);
9516 /* If this arg would be passed partially or totally on the stack, then
9517 this routine should return zero. pa_arg_partial_bytes will
9518 handle arguments which are split between regs and stack slots if
9519 the ABI mandates split arguments. */
9520 if (!TARGET_64BIT)
9522 /* The 32-bit ABI does not split arguments. */
9523 if (cum->words + arg_size > max_arg_words)
9524 return NULL_RTX;
9526 else
9528 if (arg_size > 1)
9529 alignment = cum->words & 1;
9530 if (cum->words + alignment >= max_arg_words)
9531 return NULL_RTX;
9534 /* The 32bit ABIs and the 64bit ABIs are rather different,
9535 particularly in their handling of FP registers. We might
9536 be able to cleverly share code between them, but I'm not
9537 going to bother in the hope that splitting them up results
9538 in code that is more easily understood. */
9540 if (TARGET_64BIT)
9542 /* Advance the base registers to their current locations.
9544 Remember, gprs grow towards smaller register numbers while
9545 fprs grow to higher register numbers. Also remember that
9546 although FP regs are 32-bit addressable, we pretend that
9547 the registers are 64-bits wide. */
9548 gpr_reg_base = 26 - cum->words;
9549 fpr_reg_base = 32 + cum->words;
9551 /* Arguments wider than one word and small aggregates need special
9552 treatment. */
9553 if (arg_size > 1
9554 || mode == BLKmode
9555 || (type && (AGGREGATE_TYPE_P (type)
9556 || TREE_CODE (type) == COMPLEX_TYPE
9557 || TREE_CODE (type) == VECTOR_TYPE)))
9559 /* Double-extended precision (80-bit), quad-precision (128-bit)
9560 and aggregates including complex numbers are aligned on
9561 128-bit boundaries. The first eight 64-bit argument slots
9562 are associated one-to-one, with general registers r26
9563 through r19, and also with floating-point registers fr4
9564 through fr11. Arguments larger than one word are always
9565 passed in general registers.
9567 Using a PARALLEL with a word mode register results in left
9568 justified data on a big-endian target. */
9570 rtx loc[8];
9571 int i, offset = 0, ub = arg_size;
9573 /* Align the base register. */
9574 gpr_reg_base -= alignment;
9576 ub = MIN (ub, max_arg_words - cum->words - alignment);
9577 for (i = 0; i < ub; i++)
9579 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9580 gen_rtx_REG (DImode, gpr_reg_base),
9581 GEN_INT (offset));
9582 gpr_reg_base -= 1;
9583 offset += 8;
9586 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
9589 else
9591 /* If the argument is larger than a word, then we know precisely
9592 which registers we must use. */
9593 if (arg_size > 1)
9595 if (cum->words)
9597 gpr_reg_base = 23;
9598 fpr_reg_base = 38;
9600 else
9602 gpr_reg_base = 25;
9603 fpr_reg_base = 34;
9606 /* Structures 5 to 8 bytes in size are passed in the general
9607 registers in the same manner as other non floating-point
9608 objects. The data is right-justified and zero-extended
9609 to 64 bits. This is opposite to the normal justification
9610 used on big endian targets and requires special treatment.
9611 We now define BLOCK_REG_PADDING to pad these objects.
9612 Aggregates, complex and vector types are passed in the same
9613 manner as structures. */
9614 if (mode == BLKmode
9615 || (type && (AGGREGATE_TYPE_P (type)
9616 || TREE_CODE (type) == COMPLEX_TYPE
9617 || TREE_CODE (type) == VECTOR_TYPE)))
9619 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9620 gen_rtx_REG (DImode, gpr_reg_base),
9621 const0_rtx);
9622 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9625 else
9627 /* We have a single word (32 bits). A simple computation
9628 will get us the register #s we need. */
9629 gpr_reg_base = 26 - cum->words;
9630 fpr_reg_base = 32 + 2 * cum->words;
9634 /* Determine if the argument needs to be passed in both general and
9635 floating point registers. */
9636 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
9637 /* If we are doing soft-float with portable runtime, then there
9638 is no need to worry about FP regs. */
9639 && !TARGET_SOFT_FLOAT
9640 /* The parameter must be some kind of scalar float, else we just
9641 pass it in integer registers. */
9642 && GET_MODE_CLASS (mode) == MODE_FLOAT
9643 /* The target function must not have a prototype. */
9644 && cum->nargs_prototype <= 0
9645 /* libcalls do not need to pass items in both FP and general
9646 registers. */
9647 && type != NULL_TREE
9648 /* All this hair applies to "outgoing" args only. This includes
9649 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
9650 && !cum->incoming)
9651 /* Also pass outgoing floating arguments in both registers in indirect
9652 calls with the 32 bit ABI and the HP assembler since there is no
9653 way to the specify argument locations in static functions. */
9654 || (!TARGET_64BIT
9655 && !TARGET_GAS
9656 && !cum->incoming
9657 && cum->indirect
9658 && GET_MODE_CLASS (mode) == MODE_FLOAT))
9660 retval
9661 = gen_rtx_PARALLEL
9662 (mode,
9663 gen_rtvec (2,
9664 gen_rtx_EXPR_LIST (VOIDmode,
9665 gen_rtx_REG (mode, fpr_reg_base),
9666 const0_rtx),
9667 gen_rtx_EXPR_LIST (VOIDmode,
9668 gen_rtx_REG (mode, gpr_reg_base),
9669 const0_rtx)));
9671 else
9673 /* See if we should pass this parameter in a general register. */
9674 if (TARGET_SOFT_FLOAT
9675 /* Indirect calls in the normal 32bit ABI require all arguments
9676 to be passed in general registers. */
9677 || (!TARGET_PORTABLE_RUNTIME
9678 && !TARGET_64BIT
9679 && !TARGET_ELF32
9680 && cum->indirect)
9681 /* If the parameter is not a scalar floating-point parameter,
9682 then it belongs in GPRs. */
9683 || GET_MODE_CLASS (mode) != MODE_FLOAT
9684 /* Structure with single SFmode field belongs in GPR. */
9685 || (type && AGGREGATE_TYPE_P (type)))
9686 retval = gen_rtx_REG (mode, gpr_reg_base);
9687 else
9688 retval = gen_rtx_REG (mode, fpr_reg_base);
9690 return retval;
9693 /* Arguments larger than one word are double word aligned. */
9695 static unsigned int
9696 pa_function_arg_boundary (machine_mode mode, const_tree type)
9698 bool singleword = (type
9699 ? (integer_zerop (TYPE_SIZE (type))
9700 || !TREE_CONSTANT (TYPE_SIZE (type))
9701 || int_size_in_bytes (type) <= UNITS_PER_WORD)
9702 : GET_MODE_SIZE (mode) <= UNITS_PER_WORD);
9704 return singleword ? PARM_BOUNDARY : MAX_PARM_BOUNDARY;
9707 /* If this arg would be passed totally in registers or totally on the stack,
9708 then this routine should return zero. */
9710 static int
9711 pa_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
9712 tree type, bool named ATTRIBUTE_UNUSED)
9714 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9715 unsigned int max_arg_words = 8;
9716 unsigned int offset = 0;
9718 if (!TARGET_64BIT)
9719 return 0;
9721 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
9722 offset = 1;
9724 if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
9725 /* Arg fits fully into registers. */
9726 return 0;
9727 else if (cum->words + offset >= max_arg_words)
9728 /* Arg fully on the stack. */
9729 return 0;
9730 else
9731 /* Arg is split. */
9732 return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
9736 /* A get_unnamed_section callback for switching to the text section.
9738 This function is only used with SOM. Because we don't support
9739 named subspaces, we can only create a new subspace or switch back
9740 to the default text subspace. */
9742 static void
9743 som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED)
9745 gcc_assert (TARGET_SOM);
9746 if (TARGET_GAS)
9748 if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
9750 /* We only want to emit a .nsubspa directive once at the
9751 start of the function. */
9752 cfun->machine->in_nsubspa = 1;
9754 /* Create a new subspace for the text. This provides
9755 better stub placement and one-only functions. */
9756 if (cfun->decl
9757 && DECL_ONE_ONLY (cfun->decl)
9758 && !DECL_WEAK (cfun->decl))
9760 output_section_asm_op ("\t.SPACE $TEXT$\n"
9761 "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
9762 "ACCESS=44,SORT=24,COMDAT");
9763 return;
9766 else
9768 /* There isn't a current function or the body of the current
9769 function has been completed. So, we are changing to the
9770 text section to output debugging information. Thus, we
9771 need to forget that we are in the text section so that
9772 varasm.c will call us when text_section is selected again. */
9773 gcc_assert (!cfun || !cfun->machine
9774 || cfun->machine->in_nsubspa == 2);
9775 in_section = NULL;
9777 output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
9778 return;
9780 output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
9783 /* A get_unnamed_section callback for switching to comdat data
9784 sections. This function is only used with SOM. */
9786 static void
9787 som_output_comdat_data_section_asm_op (const void *data)
9789 in_section = NULL;
9790 output_section_asm_op (data);
9793 /* Implement TARGET_ASM_INITIALIZE_SECTIONS */
9795 static void
9796 pa_som_asm_init_sections (void)
9798 text_section
9799 = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
9801 /* SOM puts readonly data in the default $LIT$ subspace when PIC code
9802 is not being generated. */
9803 som_readonly_data_section
9804 = get_unnamed_section (0, output_section_asm_op,
9805 "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
9807 /* When secondary definitions are not supported, SOM makes readonly
9808 data one-only by creating a new $LIT$ subspace in $TEXT$ with
9809 the comdat flag. */
9810 som_one_only_readonly_data_section
9811 = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
9812 "\t.SPACE $TEXT$\n"
9813 "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
9814 "ACCESS=0x2c,SORT=16,COMDAT");
9817 /* When secondary definitions are not supported, SOM makes data one-only
9818 by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */
9819 som_one_only_data_section
9820 = get_unnamed_section (SECTION_WRITE,
9821 som_output_comdat_data_section_asm_op,
9822 "\t.SPACE $PRIVATE$\n"
9823 "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
9824 "ACCESS=31,SORT=24,COMDAT");
9826 if (flag_tm)
9827 som_tm_clone_table_section
9828 = get_unnamed_section (0, output_section_asm_op,
9829 "\t.SPACE $PRIVATE$\n\t.SUBSPA $TM_CLONE_TABLE$");
9831 /* FIXME: HPUX ld generates incorrect GOT entries for "T" fixups
9832 which reference data within the $TEXT$ space (for example constant
9833 strings in the $LIT$ subspace).
9835 The assemblers (GAS and HP as) both have problems with handling
9836 the difference of two symbols which is the other correct way to
9837 reference constant data during PIC code generation.
9839 So, there's no way to reference constant data which is in the
9840 $TEXT$ space during PIC generation. Instead place all constant
9841 data into the $PRIVATE$ subspace (this reduces sharing, but it
9842 works correctly). */
9843 readonly_data_section = flag_pic ? data_section : som_readonly_data_section;
9845 /* We must not have a reference to an external symbol defined in a
9846 shared library in a readonly section, else the SOM linker will
9847 complain.
9849 So, we force exception information into the data section. */
9850 exception_section = data_section;
9853 /* Implement TARGET_ASM_TM_CLONE_TABLE_SECTION. */
9855 static section *
9856 pa_som_tm_clone_table_section (void)
9858 return som_tm_clone_table_section;
9861 /* On hpux10, the linker will give an error if we have a reference
9862 in the read-only data section to a symbol defined in a shared
9863 library. Therefore, expressions that might require a reloc can
9864 not be placed in the read-only data section. */
9866 static section *
9867 pa_select_section (tree exp, int reloc,
9868 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
9870 if (TREE_CODE (exp) == VAR_DECL
9871 && TREE_READONLY (exp)
9872 && !TREE_THIS_VOLATILE (exp)
9873 && DECL_INITIAL (exp)
9874 && (DECL_INITIAL (exp) == error_mark_node
9875 || TREE_CONSTANT (DECL_INITIAL (exp)))
9876 && !reloc)
9878 if (TARGET_SOM
9879 && DECL_ONE_ONLY (exp)
9880 && !DECL_WEAK (exp))
9881 return som_one_only_readonly_data_section;
9882 else
9883 return readonly_data_section;
9885 else if (CONSTANT_CLASS_P (exp) && !reloc)
9886 return readonly_data_section;
9887 else if (TARGET_SOM
9888 && TREE_CODE (exp) == VAR_DECL
9889 && DECL_ONE_ONLY (exp)
9890 && !DECL_WEAK (exp))
9891 return som_one_only_data_section;
9892 else
9893 return data_section;
9896 /* Implement pa_reloc_rw_mask. */
9898 static int
9899 pa_reloc_rw_mask (void)
9901 /* We force (const (plus (symbol) (const_int))) to memory when the
9902 const_int doesn't fit in a 14-bit integer. The SOM linker can't
9903 handle this construct in read-only memory and we want to avoid
9904 this for ELF. So, we always force an RTX needing relocation to
9905 the data section. */
9906 return 3;
9909 static void
9910 pa_globalize_label (FILE *stream, const char *name)
9912 /* We only handle DATA objects here, functions are globalized in
9913 ASM_DECLARE_FUNCTION_NAME. */
9914 if (! FUNCTION_NAME_P (name))
9916 fputs ("\t.EXPORT ", stream);
9917 assemble_name (stream, name);
9918 fputs (",DATA\n", stream);
9922 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
9924 static rtx
9925 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
9926 int incoming ATTRIBUTE_UNUSED)
9928 return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
9931 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9933 bool
9934 pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9936 /* SOM ABI says that objects larger than 64 bits are returned in memory.
9937 PA64 ABI says that objects larger than 128 bits are returned in memory.
9938 Note, int_size_in_bytes can return -1 if the size of the object is
9939 variable or larger than the maximum value that can be expressed as
9940 a HOST_WIDE_INT. It can also return zero for an empty type. The
9941 simplest way to handle variable and empty types is to pass them in
9942 memory. This avoids problems in defining the boundaries of argument
9943 slots, allocating registers, etc. */
9944 return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
9945 || int_size_in_bytes (type) <= 0);
9948 /* Structure to hold declaration and name of external symbols that are
9949 emitted by GCC. We generate a vector of these symbols and output them
9950 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
9951 This avoids putting out names that are never really used. */
9953 typedef struct GTY(()) extern_symbol
9955 tree decl;
9956 const char *name;
9957 } extern_symbol;
9959 /* Define gc'd vector type for extern_symbol. */
9961 /* Vector of extern_symbol pointers. */
9962 static GTY(()) vec<extern_symbol, va_gc> *extern_symbols;
9964 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9965 /* Mark DECL (name NAME) as an external reference (assembler output
9966 file FILE). This saves the names to output at the end of the file
9967 if actually referenced. */
9969 void
9970 pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
9972 gcc_assert (file == asm_out_file);
9973 extern_symbol p = {decl, name};
9974 vec_safe_push (extern_symbols, p);
9976 #endif
9978 /* Output text required at the end of an assembler file.
9979 This includes deferred plabels and .import directives for
9980 all external symbols that were actually referenced. */
9982 static void
9983 pa_file_end (void)
9985 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9986 unsigned int i;
9987 extern_symbol *p;
9989 if (!NO_DEFERRED_PROFILE_COUNTERS)
9990 output_deferred_profile_counters ();
9991 #endif
9993 output_deferred_plabels ();
9995 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9996 for (i = 0; vec_safe_iterate (extern_symbols, i, &p); i++)
9998 tree decl = p->decl;
10000 if (!TREE_ASM_WRITTEN (decl)
10001 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
10002 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
10005 vec_free (extern_symbols);
10006 #endif
10008 if (NEED_INDICATE_EXEC_STACK)
10009 file_end_indicate_exec_stack ();
10012 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
10014 static bool
10015 pa_can_change_mode_class (machine_mode from, machine_mode to,
10016 reg_class_t rclass)
10018 if (from == to)
10019 return true;
10021 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to))
10022 return true;
10024 /* Reject changes to/from modes with zero size. */
10025 if (!GET_MODE_SIZE (from) || !GET_MODE_SIZE (to))
10026 return false;
10028 /* Reject changes to/from complex and vector modes. */
10029 if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from)
10030 || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to))
10031 return false;
10033 /* There is no way to load QImode or HImode values directly from memory
10034 to a FP register. SImode loads to the FP registers are not zero
10035 extended. On the 64-bit target, this conflicts with the definition
10036 of LOAD_EXTEND_OP. Thus, we can't allow changing between modes with
10037 different sizes in the floating-point registers. */
10038 if (MAYBE_FP_REG_CLASS_P (rclass))
10039 return false;
10041 /* TARGET_HARD_REGNO_MODE_OK places modes with sizes larger than a word
10042 in specific sets of registers. Thus, we cannot allow changing
10043 to a larger mode when it's larger than a word. */
10044 if (GET_MODE_SIZE (to) > UNITS_PER_WORD
10045 && GET_MODE_SIZE (to) > GET_MODE_SIZE (from))
10046 return false;
10048 return true;
10051 /* Implement TARGET_MODES_TIEABLE_P.
10053 We should return FALSE for QImode and HImode because these modes
10054 are not ok in the floating-point registers. However, this prevents
10055 tieing these modes to SImode and DImode in the general registers.
10056 So, this isn't a good idea. We rely on TARGET_HARD_REGNO_MODE_OK and
10057 TARGET_CAN_CHANGE_MODE_CLASS to prevent these modes from being used
10058 in the floating-point registers. */
10060 static bool
10061 pa_modes_tieable_p (machine_mode mode1, machine_mode mode2)
10063 /* Don't tie modes in different classes. */
10064 if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2))
10065 return false;
10067 return true;
10071 /* Length in units of the trampoline instruction code. */
10073 #define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 32 : 40))
10076 /* Output assembler code for a block containing the constant parts
10077 of a trampoline, leaving space for the variable parts.\
10079 The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
10080 and then branches to the specified routine.
10082 This code template is copied from text segment to stack location
10083 and then patched with pa_trampoline_init to contain valid values,
10084 and then entered as a subroutine.
10086 It is best to keep this as small as possible to avoid having to
10087 flush multiple lines in the cache. */
10089 static void
10090 pa_asm_trampoline_template (FILE *f)
10092 if (!TARGET_64BIT)
10094 fputs ("\tldw 36(%r22),%r21\n", f);
10095 fputs ("\tbb,>=,n %r21,30,.+16\n", f);
10096 if (ASSEMBLER_DIALECT == 0)
10097 fputs ("\tdepi 0,31,2,%r21\n", f);
10098 else
10099 fputs ("\tdepwi 0,31,2,%r21\n", f);
10100 fputs ("\tldw 4(%r21),%r19\n", f);
10101 fputs ("\tldw 0(%r21),%r21\n", f);
10102 if (TARGET_PA_20)
10104 fputs ("\tbve (%r21)\n", f);
10105 fputs ("\tldw 40(%r22),%r29\n", f);
10106 fputs ("\t.word 0\n", f);
10107 fputs ("\t.word 0\n", f);
10109 else
10111 fputs ("\tldsid (%r21),%r1\n", f);
10112 fputs ("\tmtsp %r1,%sr0\n", f);
10113 fputs ("\tbe 0(%sr0,%r21)\n", f);
10114 fputs ("\tldw 40(%r22),%r29\n", f);
10116 fputs ("\t.word 0\n", f);
10117 fputs ("\t.word 0\n", f);
10118 fputs ("\t.word 0\n", f);
10119 fputs ("\t.word 0\n", f);
10121 else
10123 fputs ("\t.dword 0\n", f);
10124 fputs ("\t.dword 0\n", f);
10125 fputs ("\t.dword 0\n", f);
10126 fputs ("\t.dword 0\n", f);
10127 fputs ("\tmfia %r31\n", f);
10128 fputs ("\tldd 24(%r31),%r1\n", f);
10129 fputs ("\tldd 24(%r1),%r27\n", f);
10130 fputs ("\tldd 16(%r1),%r1\n", f);
10131 fputs ("\tbve (%r1)\n", f);
10132 fputs ("\tldd 32(%r31),%r31\n", f);
10133 fputs ("\t.dword 0 ; fptr\n", f);
10134 fputs ("\t.dword 0 ; static link\n", f);
10138 /* Emit RTL insns to initialize the variable parts of a trampoline.
10139 FNADDR is an RTX for the address of the function's pure code.
10140 CXT is an RTX for the static chain value for the function.
10142 Move the function address to the trampoline template at offset 36.
10143 Move the static chain value to trampoline template at offset 40.
10144 Move the trampoline address to trampoline template at offset 44.
10145 Move r19 to trampoline template at offset 48. The latter two
10146 words create a plabel for the indirect call to the trampoline.
10148 A similar sequence is used for the 64-bit port but the plabel is
10149 at the beginning of the trampoline.
10151 Finally, the cache entries for the trampoline code are flushed.
10152 This is necessary to ensure that the trampoline instruction sequence
10153 is written to memory prior to any attempts at prefetching the code
10154 sequence. */
10156 static void
10157 pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
10159 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10160 rtx start_addr = gen_reg_rtx (Pmode);
10161 rtx end_addr = gen_reg_rtx (Pmode);
10162 rtx line_length = gen_reg_rtx (Pmode);
10163 rtx r_tramp, tmp;
10165 emit_block_move (m_tramp, assemble_trampoline_template (),
10166 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
10167 r_tramp = force_reg (Pmode, XEXP (m_tramp, 0));
10169 if (!TARGET_64BIT)
10171 tmp = adjust_address (m_tramp, Pmode, 36);
10172 emit_move_insn (tmp, fnaddr);
10173 tmp = adjust_address (m_tramp, Pmode, 40);
10174 emit_move_insn (tmp, chain_value);
10176 /* Create a fat pointer for the trampoline. */
10177 tmp = adjust_address (m_tramp, Pmode, 44);
10178 emit_move_insn (tmp, r_tramp);
10179 tmp = adjust_address (m_tramp, Pmode, 48);
10180 emit_move_insn (tmp, gen_rtx_REG (Pmode, 19));
10182 /* fdc and fic only use registers for the address to flush,
10183 they do not accept integer displacements. We align the
10184 start and end addresses to the beginning of their respective
10185 cache lines to minimize the number of lines flushed. */
10186 emit_insn (gen_andsi3 (start_addr, r_tramp,
10187 GEN_INT (-MIN_CACHELINE_SIZE)));
10188 tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp,
10189 TRAMPOLINE_CODE_SIZE-1));
10190 emit_insn (gen_andsi3 (end_addr, tmp,
10191 GEN_INT (-MIN_CACHELINE_SIZE)));
10192 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10193 emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length));
10194 emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length,
10195 gen_reg_rtx (Pmode),
10196 gen_reg_rtx (Pmode)));
10198 else
10200 tmp = adjust_address (m_tramp, Pmode, 56);
10201 emit_move_insn (tmp, fnaddr);
10202 tmp = adjust_address (m_tramp, Pmode, 64);
10203 emit_move_insn (tmp, chain_value);
10205 /* Create a fat pointer for the trampoline. */
10206 tmp = adjust_address (m_tramp, Pmode, 16);
10207 emit_move_insn (tmp, force_reg (Pmode, plus_constant (Pmode,
10208 r_tramp, 32)));
10209 tmp = adjust_address (m_tramp, Pmode, 24);
10210 emit_move_insn (tmp, gen_rtx_REG (Pmode, 27));
10212 /* fdc and fic only use registers for the address to flush,
10213 they do not accept integer displacements. We align the
10214 start and end addresses to the beginning of their respective
10215 cache lines to minimize the number of lines flushed. */
10216 tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp, 32));
10217 emit_insn (gen_anddi3 (start_addr, tmp,
10218 GEN_INT (-MIN_CACHELINE_SIZE)));
10219 tmp = force_reg (Pmode, plus_constant (Pmode, tmp,
10220 TRAMPOLINE_CODE_SIZE - 1));
10221 emit_insn (gen_anddi3 (end_addr, tmp,
10222 GEN_INT (-MIN_CACHELINE_SIZE)));
10223 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10224 emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length));
10225 emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length,
10226 gen_reg_rtx (Pmode),
10227 gen_reg_rtx (Pmode)));
10230 #ifdef HAVE_ENABLE_EXECUTE_STACK
10231  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
10232 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
10233 #endif
10236 /* Perform any machine-specific adjustment in the address of the trampoline.
10237 ADDR contains the address that was passed to pa_trampoline_init.
10238 Adjust the trampoline address to point to the plabel at offset 44. */
10240 static rtx
10241 pa_trampoline_adjust_address (rtx addr)
10243 if (!TARGET_64BIT)
10244 addr = memory_address (Pmode, plus_constant (Pmode, addr, 46));
10245 return addr;
10248 static rtx
10249 pa_delegitimize_address (rtx orig_x)
10251 rtx x = delegitimize_mem_from_attrs (orig_x);
10253 if (GET_CODE (x) == LO_SUM
10254 && GET_CODE (XEXP (x, 1)) == UNSPEC
10255 && XINT (XEXP (x, 1), 1) == UNSPEC_DLTIND14R)
10256 return gen_const_mem (Pmode, XVECEXP (XEXP (x, 1), 0, 0));
10257 return x;
10260 static rtx
10261 pa_internal_arg_pointer (void)
10263 /* The argument pointer and the hard frame pointer are the same in
10264 the 32-bit runtime, so we don't need a copy. */
10265 if (TARGET_64BIT)
10266 return copy_to_reg (virtual_incoming_args_rtx);
10267 else
10268 return virtual_incoming_args_rtx;
10271 /* Given FROM and TO register numbers, say whether this elimination is allowed.
10272 Frame pointer elimination is automatically handled. */
10274 static bool
10275 pa_can_eliminate (const int from, const int to)
10277 /* The argument cannot be eliminated in the 64-bit runtime. */
10278 if (TARGET_64BIT && from == ARG_POINTER_REGNUM)
10279 return false;
10281 return (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
10282 ? ! frame_pointer_needed
10283 : true);
10286 /* Define the offset between two registers, FROM to be eliminated and its
10287 replacement TO, at the start of a routine. */
10288 HOST_WIDE_INT
10289 pa_initial_elimination_offset (int from, int to)
10291 HOST_WIDE_INT offset;
10293 if ((from == HARD_FRAME_POINTER_REGNUM || from == FRAME_POINTER_REGNUM)
10294 && to == STACK_POINTER_REGNUM)
10295 offset = -pa_compute_frame_size (get_frame_size (), 0);
10296 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10297 offset = 0;
10298 else
10299 gcc_unreachable ();
10301 return offset;
10304 static void
10305 pa_conditional_register_usage (void)
10307 int i;
10309 if (!TARGET_64BIT && !TARGET_PA_11)
10311 for (i = 56; i <= FP_REG_LAST; i++)
10312 fixed_regs[i] = call_used_regs[i] = 1;
10313 for (i = 33; i < 56; i += 2)
10314 fixed_regs[i] = call_used_regs[i] = 1;
10316 if (TARGET_DISABLE_FPREGS || TARGET_SOFT_FLOAT)
10318 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
10319 fixed_regs[i] = call_used_regs[i] = 1;
10321 if (flag_pic)
10322 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
10325 /* Target hook for c_mode_for_suffix. */
10327 static machine_mode
10328 pa_c_mode_for_suffix (char suffix)
10330 if (HPUX_LONG_DOUBLE_LIBRARY)
10332 if (suffix == 'q')
10333 return TFmode;
10336 return VOIDmode;
10339 /* Target hook for function_section. */
10341 static section *
10342 pa_function_section (tree decl, enum node_frequency freq,
10343 bool startup, bool exit)
10345 /* Put functions in text section if target doesn't have named sections. */
10346 if (!targetm_common.have_named_sections)
10347 return text_section;
10349 /* Force nested functions into the same section as the containing
10350 function. */
10351 if (decl
10352 && DECL_SECTION_NAME (decl) == NULL
10353 && DECL_CONTEXT (decl) != NULL_TREE
10354 && TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL
10355 && DECL_SECTION_NAME (DECL_CONTEXT (decl)) == NULL)
10356 return function_section (DECL_CONTEXT (decl));
10358 /* Otherwise, use the default function section. */
10359 return default_function_section (decl, freq, startup, exit);
10362 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
10364 In 64-bit mode, we reject CONST_DOUBLES. We also reject CONST_INTS
10365 that need more than three instructions to load prior to reload. This
10366 limit is somewhat arbitrary. It takes three instructions to load a
10367 CONST_INT from memory but two are memory accesses. It may be better
10368 to increase the allowed range for CONST_INTS. We may also be able
10369 to handle CONST_DOUBLES. */
10371 static bool
10372 pa_legitimate_constant_p (machine_mode mode, rtx x)
10374 if (GET_MODE_CLASS (mode) == MODE_FLOAT && x != CONST0_RTX (mode))
10375 return false;
10377 if (!NEW_HP_ASSEMBLER && !TARGET_GAS && GET_CODE (x) == LABEL_REF)
10378 return false;
10380 /* TLS_MODEL_GLOBAL_DYNAMIC and TLS_MODEL_LOCAL_DYNAMIC are not
10381 legitimate constants. The other variants can't be handled by
10382 the move patterns after reload starts. */
10383 if (tls_referenced_p (x))
10384 return false;
10386 if (TARGET_64BIT && GET_CODE (x) == CONST_DOUBLE)
10387 return false;
10389 if (TARGET_64BIT
10390 && HOST_BITS_PER_WIDE_INT > 32
10391 && GET_CODE (x) == CONST_INT
10392 && !reload_in_progress
10393 && !reload_completed
10394 && !LEGITIMATE_64BIT_CONST_INT_P (INTVAL (x))
10395 && !pa_cint_ok_for_move (UINTVAL (x)))
10396 return false;
10398 if (function_label_operand (x, mode))
10399 return false;
10401 return true;
10404 /* Implement TARGET_SECTION_TYPE_FLAGS. */
10406 static unsigned int
10407 pa_section_type_flags (tree decl, const char *name, int reloc)
10409 unsigned int flags;
10411 flags = default_section_type_flags (decl, name, reloc);
10413 /* Function labels are placed in the constant pool. This can
10414 cause a section conflict if decls are put in ".data.rel.ro"
10415 or ".data.rel.ro.local" using the __attribute__ construct. */
10416 if (strcmp (name, ".data.rel.ro") == 0
10417 || strcmp (name, ".data.rel.ro.local") == 0)
10418 flags |= SECTION_WRITE | SECTION_RELRO;
10420 return flags;
10423 /* pa_legitimate_address_p recognizes an RTL expression that is a
10424 valid memory address for an instruction. The MODE argument is the
10425 machine mode for the MEM expression that wants to use this address.
10427 On HP PA-RISC, the legitimate address forms are REG+SMALLINT,
10428 REG+REG, and REG+(REG*SCALE). The indexed address forms are only
10429 available with floating point loads and stores, and integer loads.
10430 We get better code by allowing indexed addresses in the initial
10431 RTL generation.
10433 The acceptance of indexed addresses as legitimate implies that we
10434 must provide patterns for doing indexed integer stores, or the move
10435 expanders must force the address of an indexed store to a register.
10436 We have adopted the latter approach.
10438 Another function of pa_legitimate_address_p is to ensure that
10439 the base register is a valid pointer for indexed instructions.
10440 On targets that have non-equivalent space registers, we have to
10441 know at the time of assembler output which register in a REG+REG
10442 pair is the base register. The REG_POINTER flag is sometimes lost
10443 in reload and the following passes, so it can't be relied on during
10444 code generation. Thus, we either have to canonicalize the order
10445 of the registers in REG+REG indexed addresses, or treat REG+REG
10446 addresses separately and provide patterns for both permutations.
10448 The latter approach requires several hundred additional lines of
10449 code in pa.md. The downside to canonicalizing is that a PLUS
10450 in the wrong order can't combine to form to make a scaled indexed
10451 memory operand. As we won't need to canonicalize the operands if
10452 the REG_POINTER lossage can be fixed, it seems better canonicalize.
10454 We initially break out scaled indexed addresses in canonical order
10455 in pa_emit_move_sequence. LEGITIMIZE_ADDRESS also canonicalizes
10456 scaled indexed addresses during RTL generation. However, fold_rtx
10457 has its own opinion on how the operands of a PLUS should be ordered.
10458 If one of the operands is equivalent to a constant, it will make
10459 that operand the second operand. As the base register is likely to
10460 be equivalent to a SYMBOL_REF, we have made it the second operand.
10462 pa_legitimate_address_p accepts REG+REG as legitimate when the
10463 operands are in the order INDEX+BASE on targets with non-equivalent
10464 space registers, and in any order on targets with equivalent space
10465 registers. It accepts both MULT+BASE and BASE+MULT for scaled indexing.
10467 We treat a SYMBOL_REF as legitimate if it is part of the current
10468 function's constant-pool, because such addresses can actually be
10469 output as REG+SMALLINT. */
10471 static bool
10472 pa_legitimate_address_p (machine_mode mode, rtx x, bool strict)
10474 if ((REG_P (x)
10475 && (strict ? STRICT_REG_OK_FOR_BASE_P (x)
10476 : REG_OK_FOR_BASE_P (x)))
10477 || ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC
10478 || GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC)
10479 && REG_P (XEXP (x, 0))
10480 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10481 : REG_OK_FOR_BASE_P (XEXP (x, 0)))))
10482 return true;
10484 if (GET_CODE (x) == PLUS)
10486 rtx base, index;
10488 /* For REG+REG, the base register should be in XEXP (x, 1),
10489 so check it first. */
10490 if (REG_P (XEXP (x, 1))
10491 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 1))
10492 : REG_OK_FOR_BASE_P (XEXP (x, 1))))
10493 base = XEXP (x, 1), index = XEXP (x, 0);
10494 else if (REG_P (XEXP (x, 0))
10495 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10496 : REG_OK_FOR_BASE_P (XEXP (x, 0))))
10497 base = XEXP (x, 0), index = XEXP (x, 1);
10498 else
10499 return false;
10501 if (GET_CODE (index) == CONST_INT)
10503 if (INT_5_BITS (index))
10504 return true;
10506 /* When INT14_OK_STRICT is false, a secondary reload is needed
10507 to adjust the displacement of SImode and DImode floating point
10508 instructions but this may fail when the register also needs
10509 reloading. So, we return false when STRICT is true. We
10510 also reject long displacements for float mode addresses since
10511 the majority of accesses will use floating point instructions
10512 that don't support 14-bit offsets. */
10513 if (!INT14_OK_STRICT
10514 && (strict || !(reload_in_progress || reload_completed))
10515 && mode != QImode
10516 && mode != HImode)
10517 return false;
10519 return base14_operand (index, mode);
10522 if (!TARGET_DISABLE_INDEXING
10523 /* Only accept the "canonical" INDEX+BASE operand order
10524 on targets with non-equivalent space registers. */
10525 && (TARGET_NO_SPACE_REGS
10526 ? REG_P (index)
10527 : (base == XEXP (x, 1) && REG_P (index)
10528 && (reload_completed
10529 || (reload_in_progress && HARD_REGISTER_P (base))
10530 || REG_POINTER (base))
10531 && (reload_completed
10532 || (reload_in_progress && HARD_REGISTER_P (index))
10533 || !REG_POINTER (index))))
10534 && MODE_OK_FOR_UNSCALED_INDEXING_P (mode)
10535 && (strict ? STRICT_REG_OK_FOR_INDEX_P (index)
10536 : REG_OK_FOR_INDEX_P (index))
10537 && borx_reg_operand (base, Pmode)
10538 && borx_reg_operand (index, Pmode))
10539 return true;
10541 if (!TARGET_DISABLE_INDEXING
10542 && GET_CODE (index) == MULT
10543 && MODE_OK_FOR_SCALED_INDEXING_P (mode)
10544 && REG_P (XEXP (index, 0))
10545 && GET_MODE (XEXP (index, 0)) == Pmode
10546 && (strict ? STRICT_REG_OK_FOR_INDEX_P (XEXP (index, 0))
10547 : REG_OK_FOR_INDEX_P (XEXP (index, 0)))
10548 && GET_CODE (XEXP (index, 1)) == CONST_INT
10549 && INTVAL (XEXP (index, 1))
10550 == (HOST_WIDE_INT) GET_MODE_SIZE (mode)
10551 && borx_reg_operand (base, Pmode))
10552 return true;
10554 return false;
10557 if (GET_CODE (x) == LO_SUM)
10559 rtx y = XEXP (x, 0);
10561 if (GET_CODE (y) == SUBREG)
10562 y = SUBREG_REG (y);
10564 if (REG_P (y)
10565 && (strict ? STRICT_REG_OK_FOR_BASE_P (y)
10566 : REG_OK_FOR_BASE_P (y)))
10568 /* Needed for -fPIC */
10569 if (mode == Pmode
10570 && GET_CODE (XEXP (x, 1)) == UNSPEC)
10571 return true;
10573 if (!INT14_OK_STRICT
10574 && (strict || !(reload_in_progress || reload_completed))
10575 && mode != QImode
10576 && mode != HImode)
10577 return false;
10579 if (CONSTANT_P (XEXP (x, 1)))
10580 return true;
10582 return false;
10585 if (GET_CODE (x) == CONST_INT && INT_5_BITS (x))
10586 return true;
10588 return false;
10591 /* Look for machine dependent ways to make the invalid address AD a
10592 valid address.
10594 For the PA, transform:
10596 memory(X + <large int>)
10598 into:
10600 if (<large int> & mask) >= 16
10601 Y = (<large int> & ~mask) + mask + 1 Round up.
10602 else
10603 Y = (<large int> & ~mask) Round down.
10604 Z = X + Y
10605 memory (Z + (<large int> - Y));
10607 This makes reload inheritance and reload_cse work better since Z
10608 can be reused.
10610 There may be more opportunities to improve code with this hook. */
10613 pa_legitimize_reload_address (rtx ad, machine_mode mode,
10614 int opnum, int type,
10615 int ind_levels ATTRIBUTE_UNUSED)
10617 long offset, newoffset, mask;
10618 rtx new_rtx, temp = NULL_RTX;
10620 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
10621 && !INT14_OK_STRICT ? 0x1f : 0x3fff);
10623 if (optimize && GET_CODE (ad) == PLUS)
10624 temp = simplify_binary_operation (PLUS, Pmode,
10625 XEXP (ad, 0), XEXP (ad, 1));
10627 new_rtx = temp ? temp : ad;
10629 if (optimize
10630 && GET_CODE (new_rtx) == PLUS
10631 && GET_CODE (XEXP (new_rtx, 0)) == REG
10632 && GET_CODE (XEXP (new_rtx, 1)) == CONST_INT)
10634 offset = INTVAL (XEXP ((new_rtx), 1));
10636 /* Choose rounding direction. Round up if we are >= halfway. */
10637 if ((offset & mask) >= ((mask + 1) / 2))
10638 newoffset = (offset & ~mask) + mask + 1;
10639 else
10640 newoffset = offset & ~mask;
10642 /* Ensure that long displacements are aligned. */
10643 if (mask == 0x3fff
10644 && (GET_MODE_CLASS (mode) == MODE_FLOAT
10645 || (TARGET_64BIT && (mode) == DImode)))
10646 newoffset &= ~(GET_MODE_SIZE (mode) - 1);
10648 if (newoffset != 0 && VAL_14_BITS_P (newoffset))
10650 temp = gen_rtx_PLUS (Pmode, XEXP (new_rtx, 0),
10651 GEN_INT (newoffset));
10652 ad = gen_rtx_PLUS (Pmode, temp, GEN_INT (offset - newoffset));
10653 push_reload (XEXP (ad, 0), 0, &XEXP (ad, 0), 0,
10654 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
10655 opnum, (enum reload_type) type);
10656 return ad;
10660 return NULL_RTX;
10663 /* Output address vector. */
10665 void
10666 pa_output_addr_vec (rtx lab, rtx body)
10668 int idx, vlen = XVECLEN (body, 0);
10670 targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10671 if (TARGET_GAS)
10672 fputs ("\t.begin_brtab\n", asm_out_file);
10673 for (idx = 0; idx < vlen; idx++)
10675 ASM_OUTPUT_ADDR_VEC_ELT
10676 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
10678 if (TARGET_GAS)
10679 fputs ("\t.end_brtab\n", asm_out_file);
10682 /* Output address difference vector. */
10684 void
10685 pa_output_addr_diff_vec (rtx lab, rtx body)
10687 rtx base = XEXP (XEXP (body, 0), 0);
10688 int idx, vlen = XVECLEN (body, 1);
10690 targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10691 if (TARGET_GAS)
10692 fputs ("\t.begin_brtab\n", asm_out_file);
10693 for (idx = 0; idx < vlen; idx++)
10695 ASM_OUTPUT_ADDR_DIFF_ELT
10696 (asm_out_file,
10697 body,
10698 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
10699 CODE_LABEL_NUMBER (base));
10701 if (TARGET_GAS)
10702 fputs ("\t.end_brtab\n", asm_out_file);
10705 /* This is a helper function for the other atomic operations. This function
10706 emits a loop that contains SEQ that iterates until a compare-and-swap
10707 operation at the end succeeds. MEM is the memory to be modified. SEQ is
10708 a set of instructions that takes a value from OLD_REG as an input and
10709 produces a value in NEW_REG as an output. Before SEQ, OLD_REG will be
10710 set to the current contents of MEM. After SEQ, a compare-and-swap will
10711 attempt to update MEM with NEW_REG. The function returns true when the
10712 loop was generated successfully. */
10714 static bool
10715 pa_expand_compare_and_swap_loop (rtx mem, rtx old_reg, rtx new_reg, rtx seq)
10717 machine_mode mode = GET_MODE (mem);
10718 rtx_code_label *label;
10719 rtx cmp_reg, success, oldval;
10721 /* The loop we want to generate looks like
10723 cmp_reg = mem;
10724 label:
10725 old_reg = cmp_reg;
10726 seq;
10727 (success, cmp_reg) = compare-and-swap(mem, old_reg, new_reg)
10728 if (success)
10729 goto label;
10731 Note that we only do the plain load from memory once. Subsequent
10732 iterations use the value loaded by the compare-and-swap pattern. */
10734 label = gen_label_rtx ();
10735 cmp_reg = gen_reg_rtx (mode);
10737 emit_move_insn (cmp_reg, mem);
10738 emit_label (label);
10739 emit_move_insn (old_reg, cmp_reg);
10740 if (seq)
10741 emit_insn (seq);
10743 success = NULL_RTX;
10744 oldval = cmp_reg;
10745 if (!expand_atomic_compare_and_swap (&success, &oldval, mem, old_reg,
10746 new_reg, false, MEMMODEL_SYNC_SEQ_CST,
10747 MEMMODEL_RELAXED))
10748 return false;
10750 if (oldval != cmp_reg)
10751 emit_move_insn (cmp_reg, oldval);
10753 /* Mark this jump predicted not taken. */
10754 emit_cmp_and_jump_insns (success, const0_rtx, EQ, const0_rtx,
10755 GET_MODE (success), 1, label,
10756 profile_probability::guessed_never ());
10757 return true;
10760 /* This function tries to implement an atomic exchange operation using a
10761 compare_and_swap loop. VAL is written to *MEM. The previous contents of
10762 *MEM are returned, using TARGET if possible. No memory model is required
10763 since a compare_and_swap loop is seq-cst. */
10766 pa_maybe_emit_compare_and_swap_exchange_loop (rtx target, rtx mem, rtx val)
10768 machine_mode mode = GET_MODE (mem);
10770 if (can_compare_and_swap_p (mode, true))
10772 if (!target || !register_operand (target, mode))
10773 target = gen_reg_rtx (mode);
10774 if (pa_expand_compare_and_swap_loop (mem, target, val, NULL_RTX))
10775 return target;
10778 return NULL_RTX;
10781 /* Implement TARGET_CALLEE_COPIES. The callee is responsible for copying
10782 arguments passed by hidden reference in the 32-bit HP runtime. Users
10783 can override this behavior for better compatibility with openmp at the
10784 risk of library incompatibilities. Arguments are always passed by value
10785 in the 64-bit HP runtime. */
10787 static bool
10788 pa_callee_copies (cumulative_args_t cum ATTRIBUTE_UNUSED,
10789 machine_mode mode ATTRIBUTE_UNUSED,
10790 const_tree type ATTRIBUTE_UNUSED,
10791 bool named ATTRIBUTE_UNUSED)
10793 return !TARGET_CALLER_COPIES;
10796 /* Implement TARGET_HARD_REGNO_NREGS. */
10798 static unsigned int
10799 pa_hard_regno_nregs (unsigned int regno ATTRIBUTE_UNUSED, machine_mode mode)
10801 return PA_HARD_REGNO_NREGS (regno, mode);
10804 /* Implement TARGET_HARD_REGNO_MODE_OK. */
10806 static bool
10807 pa_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
10809 return PA_HARD_REGNO_MODE_OK (regno, mode);
10812 #include "gt-pa.h"