PR target/66148
[official-gcc.git] / gcc / config / pa / pa.c
blobbb4d7b386d63f8ee9099b5b1b2107892dacd572e
1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992-2015 Free Software Foundation, Inc.
3 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "regs.h"
27 #include "hard-reg-set.h"
28 #include "insn-config.h"
29 #include "conditions.h"
30 #include "insn-attr.h"
31 #include "flags.h"
32 #include "hash-set.h"
33 #include "machmode.h"
34 #include "vec.h"
35 #include "double-int.h"
36 #include "input.h"
37 #include "alias.h"
38 #include "symtab.h"
39 #include "wide-int.h"
40 #include "inchash.h"
41 #include "tree.h"
42 #include "fold-const.h"
43 #include "stor-layout.h"
44 #include "stringpool.h"
45 #include "varasm.h"
46 #include "calls.h"
47 #include "output.h"
48 #include "dbxout.h"
49 #include "except.h"
50 #include "hashtab.h"
51 #include "function.h"
52 #include "statistics.h"
53 #include "real.h"
54 #include "fixed-value.h"
55 #include "expmed.h"
56 #include "dojump.h"
57 #include "explow.h"
58 #include "emit-rtl.h"
59 #include "stmt.h"
60 #include "expr.h"
61 #include "insn-codes.h"
62 #include "optabs.h"
63 #include "reload.h"
64 #include "diagnostic-core.h"
65 #include "ggc.h"
66 #include "recog.h"
67 #include "predict.h"
68 #include "tm_p.h"
69 #include "target.h"
70 #include "common/common-target.h"
71 #include "target-def.h"
72 #include "langhooks.h"
73 #include "dominance.h"
74 #include "cfg.h"
75 #include "cfgrtl.h"
76 #include "cfganal.h"
77 #include "lcm.h"
78 #include "cfgbuild.h"
79 #include "cfgcleanup.h"
80 #include "basic-block.h"
81 #include "df.h"
82 #include "opts.h"
83 #include "builtins.h"
85 /* Return nonzero if there is a bypass for the output of
86 OUT_INSN and the fp store IN_INSN. */
87 int
88 pa_fpstore_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
90 machine_mode store_mode;
91 machine_mode other_mode;
92 rtx set;
94 if (recog_memoized (in_insn) < 0
95 || (get_attr_type (in_insn) != TYPE_FPSTORE
96 && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
97 || recog_memoized (out_insn) < 0)
98 return 0;
100 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
102 set = single_set (out_insn);
103 if (!set)
104 return 0;
106 other_mode = GET_MODE (SET_SRC (set));
108 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
112 #ifndef DO_FRAME_NOTES
113 #ifdef INCOMING_RETURN_ADDR_RTX
114 #define DO_FRAME_NOTES 1
115 #else
116 #define DO_FRAME_NOTES 0
117 #endif
118 #endif
120 static void pa_option_override (void);
121 static void copy_reg_pointer (rtx, rtx);
122 static void fix_range (const char *);
123 static int hppa_register_move_cost (machine_mode mode, reg_class_t,
124 reg_class_t);
125 static int hppa_address_cost (rtx, machine_mode mode, addr_space_t, bool);
126 static bool hppa_rtx_costs (rtx, int, int, int, int *, bool);
127 static inline rtx force_mode (machine_mode, rtx);
128 static void pa_reorg (void);
129 static void pa_combine_instructions (void);
130 static int pa_can_combine_p (rtx_insn *, rtx_insn *, rtx_insn *, int, rtx,
131 rtx, rtx);
132 static bool forward_branch_p (rtx_insn *);
133 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
134 static void compute_zdepdi_operands (unsigned HOST_WIDE_INT, unsigned *);
135 static int compute_movmem_length (rtx_insn *);
136 static int compute_clrmem_length (rtx_insn *);
137 static bool pa_assemble_integer (rtx, unsigned int, int);
138 static void remove_useless_addtr_insns (int);
139 static void store_reg (int, HOST_WIDE_INT, int);
140 static void store_reg_modify (int, int, HOST_WIDE_INT);
141 static void load_reg (int, HOST_WIDE_INT, int);
142 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
143 static rtx pa_function_value (const_tree, const_tree, bool);
144 static rtx pa_libcall_value (machine_mode, const_rtx);
145 static bool pa_function_value_regno_p (const unsigned int);
146 static void pa_output_function_prologue (FILE *, HOST_WIDE_INT);
147 static void update_total_code_bytes (unsigned int);
148 static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT);
149 static int pa_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
150 static int pa_adjust_priority (rtx_insn *, int);
151 static int pa_issue_rate (void);
152 static int pa_reloc_rw_mask (void);
153 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
154 static section *pa_som_tm_clone_table_section (void) ATTRIBUTE_UNUSED;
155 static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
156 ATTRIBUTE_UNUSED;
157 static void pa_encode_section_info (tree, rtx, int);
158 static const char *pa_strip_name_encoding (const char *);
159 static bool pa_function_ok_for_sibcall (tree, tree);
160 static void pa_globalize_label (FILE *, const char *)
161 ATTRIBUTE_UNUSED;
162 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
163 HOST_WIDE_INT, tree);
164 #if !defined(USE_COLLECT2)
165 static void pa_asm_out_constructor (rtx, int);
166 static void pa_asm_out_destructor (rtx, int);
167 #endif
168 static void pa_init_builtins (void);
169 static rtx pa_expand_builtin (tree, rtx, rtx, machine_mode mode, int);
170 static rtx hppa_builtin_saveregs (void);
171 static void hppa_va_start (tree, rtx);
172 static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
173 static bool pa_scalar_mode_supported_p (machine_mode);
174 static bool pa_commutative_p (const_rtx x, int outer_code);
175 static void copy_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
176 static int length_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
177 static rtx hppa_legitimize_address (rtx, rtx, machine_mode);
178 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
179 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
180 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
181 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
182 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
183 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
184 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
185 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
186 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
187 static void output_deferred_plabels (void);
188 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
189 #ifdef ASM_OUTPUT_EXTERNAL_REAL
190 static void pa_hpux_file_end (void);
191 #endif
192 static void pa_init_libfuncs (void);
193 static rtx pa_struct_value_rtx (tree, int);
194 static bool pa_pass_by_reference (cumulative_args_t, machine_mode,
195 const_tree, bool);
196 static int pa_arg_partial_bytes (cumulative_args_t, machine_mode,
197 tree, bool);
198 static void pa_function_arg_advance (cumulative_args_t, machine_mode,
199 const_tree, bool);
200 static rtx pa_function_arg (cumulative_args_t, machine_mode,
201 const_tree, bool);
202 static unsigned int pa_function_arg_boundary (machine_mode, const_tree);
203 static struct machine_function * pa_init_machine_status (void);
204 static reg_class_t pa_secondary_reload (bool, rtx, reg_class_t,
205 machine_mode,
206 secondary_reload_info *);
207 static void pa_extra_live_on_entry (bitmap);
208 static machine_mode pa_promote_function_mode (const_tree,
209 machine_mode, int *,
210 const_tree, int);
212 static void pa_asm_trampoline_template (FILE *);
213 static void pa_trampoline_init (rtx, tree, rtx);
214 static rtx pa_trampoline_adjust_address (rtx);
215 static rtx pa_delegitimize_address (rtx);
216 static bool pa_print_operand_punct_valid_p (unsigned char);
217 static rtx pa_internal_arg_pointer (void);
218 static bool pa_can_eliminate (const int, const int);
219 static void pa_conditional_register_usage (void);
220 static machine_mode pa_c_mode_for_suffix (char);
221 static section *pa_function_section (tree, enum node_frequency, bool, bool);
222 static bool pa_cannot_force_const_mem (machine_mode, rtx);
223 static bool pa_legitimate_constant_p (machine_mode, rtx);
224 static unsigned int pa_section_type_flags (tree, const char *, int);
225 static bool pa_legitimate_address_p (machine_mode, rtx, bool);
227 /* The following extra sections are only used for SOM. */
228 static GTY(()) section *som_readonly_data_section;
229 static GTY(()) section *som_one_only_readonly_data_section;
230 static GTY(()) section *som_one_only_data_section;
231 static GTY(()) section *som_tm_clone_table_section;
233 /* Counts for the number of callee-saved general and floating point
234 registers which were saved by the current function's prologue. */
235 static int gr_saved, fr_saved;
237 /* Boolean indicating whether the return pointer was saved by the
238 current function's prologue. */
239 static bool rp_saved;
241 static rtx find_addr_reg (rtx);
243 /* Keep track of the number of bytes we have output in the CODE subspace
244 during this compilation so we'll know when to emit inline long-calls. */
245 unsigned long total_code_bytes;
247 /* The last address of the previous function plus the number of bytes in
248 associated thunks that have been output. This is used to determine if
249 a thunk can use an IA-relative branch to reach its target function. */
250 static unsigned int last_address;
252 /* Variables to handle plabels that we discover are necessary at assembly
253 output time. They are output after the current function. */
254 struct GTY(()) deferred_plabel
256 rtx internal_label;
257 rtx symbol;
259 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
260 deferred_plabels;
261 static size_t n_deferred_plabels = 0;
263 /* Initialize the GCC target structure. */
265 #undef TARGET_OPTION_OVERRIDE
266 #define TARGET_OPTION_OVERRIDE pa_option_override
268 #undef TARGET_ASM_ALIGNED_HI_OP
269 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
270 #undef TARGET_ASM_ALIGNED_SI_OP
271 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
272 #undef TARGET_ASM_ALIGNED_DI_OP
273 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
274 #undef TARGET_ASM_UNALIGNED_HI_OP
275 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
276 #undef TARGET_ASM_UNALIGNED_SI_OP
277 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
278 #undef TARGET_ASM_UNALIGNED_DI_OP
279 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
280 #undef TARGET_ASM_INTEGER
281 #define TARGET_ASM_INTEGER pa_assemble_integer
283 #undef TARGET_ASM_FUNCTION_PROLOGUE
284 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
285 #undef TARGET_ASM_FUNCTION_EPILOGUE
286 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
288 #undef TARGET_FUNCTION_VALUE
289 #define TARGET_FUNCTION_VALUE pa_function_value
290 #undef TARGET_LIBCALL_VALUE
291 #define TARGET_LIBCALL_VALUE pa_libcall_value
292 #undef TARGET_FUNCTION_VALUE_REGNO_P
293 #define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p
295 #undef TARGET_LEGITIMIZE_ADDRESS
296 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
298 #undef TARGET_SCHED_ADJUST_COST
299 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
300 #undef TARGET_SCHED_ADJUST_PRIORITY
301 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
302 #undef TARGET_SCHED_ISSUE_RATE
303 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
305 #undef TARGET_ENCODE_SECTION_INFO
306 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
307 #undef TARGET_STRIP_NAME_ENCODING
308 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
310 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
311 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
313 #undef TARGET_COMMUTATIVE_P
314 #define TARGET_COMMUTATIVE_P pa_commutative_p
316 #undef TARGET_ASM_OUTPUT_MI_THUNK
317 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
318 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
319 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
321 #undef TARGET_ASM_FILE_END
322 #ifdef ASM_OUTPUT_EXTERNAL_REAL
323 #define TARGET_ASM_FILE_END pa_hpux_file_end
324 #else
325 #define TARGET_ASM_FILE_END output_deferred_plabels
326 #endif
328 #undef TARGET_ASM_RELOC_RW_MASK
329 #define TARGET_ASM_RELOC_RW_MASK pa_reloc_rw_mask
331 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
332 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p
334 #if !defined(USE_COLLECT2)
335 #undef TARGET_ASM_CONSTRUCTOR
336 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
337 #undef TARGET_ASM_DESTRUCTOR
338 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
339 #endif
341 #undef TARGET_INIT_BUILTINS
342 #define TARGET_INIT_BUILTINS pa_init_builtins
344 #undef TARGET_EXPAND_BUILTIN
345 #define TARGET_EXPAND_BUILTIN pa_expand_builtin
347 #undef TARGET_REGISTER_MOVE_COST
348 #define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
349 #undef TARGET_RTX_COSTS
350 #define TARGET_RTX_COSTS hppa_rtx_costs
351 #undef TARGET_ADDRESS_COST
352 #define TARGET_ADDRESS_COST hppa_address_cost
354 #undef TARGET_MACHINE_DEPENDENT_REORG
355 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
357 #undef TARGET_INIT_LIBFUNCS
358 #define TARGET_INIT_LIBFUNCS pa_init_libfuncs
360 #undef TARGET_PROMOTE_FUNCTION_MODE
361 #define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
362 #undef TARGET_PROMOTE_PROTOTYPES
363 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
365 #undef TARGET_STRUCT_VALUE_RTX
366 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
367 #undef TARGET_RETURN_IN_MEMORY
368 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
369 #undef TARGET_MUST_PASS_IN_STACK
370 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
371 #undef TARGET_PASS_BY_REFERENCE
372 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
373 #undef TARGET_CALLEE_COPIES
374 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true
375 #undef TARGET_ARG_PARTIAL_BYTES
376 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
377 #undef TARGET_FUNCTION_ARG
378 #define TARGET_FUNCTION_ARG pa_function_arg
379 #undef TARGET_FUNCTION_ARG_ADVANCE
380 #define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance
381 #undef TARGET_FUNCTION_ARG_BOUNDARY
382 #define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary
384 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
385 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
386 #undef TARGET_EXPAND_BUILTIN_VA_START
387 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
388 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
389 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
391 #undef TARGET_SCALAR_MODE_SUPPORTED_P
392 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
394 #undef TARGET_CANNOT_FORCE_CONST_MEM
395 #define TARGET_CANNOT_FORCE_CONST_MEM pa_cannot_force_const_mem
397 #undef TARGET_SECONDARY_RELOAD
398 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
400 #undef TARGET_EXTRA_LIVE_ON_ENTRY
401 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
403 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
404 #define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
405 #undef TARGET_TRAMPOLINE_INIT
406 #define TARGET_TRAMPOLINE_INIT pa_trampoline_init
407 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
408 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
409 #undef TARGET_DELEGITIMIZE_ADDRESS
410 #define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
411 #undef TARGET_INTERNAL_ARG_POINTER
412 #define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer
413 #undef TARGET_CAN_ELIMINATE
414 #define TARGET_CAN_ELIMINATE pa_can_eliminate
415 #undef TARGET_CONDITIONAL_REGISTER_USAGE
416 #define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage
417 #undef TARGET_C_MODE_FOR_SUFFIX
418 #define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix
419 #undef TARGET_ASM_FUNCTION_SECTION
420 #define TARGET_ASM_FUNCTION_SECTION pa_function_section
422 #undef TARGET_LEGITIMATE_CONSTANT_P
423 #define TARGET_LEGITIMATE_CONSTANT_P pa_legitimate_constant_p
424 #undef TARGET_SECTION_TYPE_FLAGS
425 #define TARGET_SECTION_TYPE_FLAGS pa_section_type_flags
426 #undef TARGET_LEGITIMATE_ADDRESS_P
427 #define TARGET_LEGITIMATE_ADDRESS_P pa_legitimate_address_p
429 struct gcc_target targetm = TARGET_INITIALIZER;
431 /* Parse the -mfixed-range= option string. */
433 static void
434 fix_range (const char *const_str)
436 int i, first, last;
437 char *str, *dash, *comma;
439 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
440 REG2 are either register names or register numbers. The effect
441 of this option is to mark the registers in the range from REG1 to
442 REG2 as ``fixed'' so they won't be used by the compiler. This is
443 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */
445 i = strlen (const_str);
446 str = (char *) alloca (i + 1);
447 memcpy (str, const_str, i + 1);
449 while (1)
451 dash = strchr (str, '-');
452 if (!dash)
454 warning (0, "value of -mfixed-range must have form REG1-REG2");
455 return;
457 *dash = '\0';
459 comma = strchr (dash + 1, ',');
460 if (comma)
461 *comma = '\0';
463 first = decode_reg_name (str);
464 if (first < 0)
466 warning (0, "unknown register name: %s", str);
467 return;
470 last = decode_reg_name (dash + 1);
471 if (last < 0)
473 warning (0, "unknown register name: %s", dash + 1);
474 return;
477 *dash = '-';
479 if (first > last)
481 warning (0, "%s-%s is an empty range", str, dash + 1);
482 return;
485 for (i = first; i <= last; ++i)
486 fixed_regs[i] = call_used_regs[i] = 1;
488 if (!comma)
489 break;
491 *comma = ',';
492 str = comma + 1;
495 /* Check if all floating point registers have been fixed. */
496 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
497 if (!fixed_regs[i])
498 break;
500 if (i > FP_REG_LAST)
501 target_flags |= MASK_DISABLE_FPREGS;
504 /* Implement the TARGET_OPTION_OVERRIDE hook. */
506 static void
507 pa_option_override (void)
509 unsigned int i;
510 cl_deferred_option *opt;
511 vec<cl_deferred_option> *v
512 = (vec<cl_deferred_option> *) pa_deferred_options;
514 if (v)
515 FOR_EACH_VEC_ELT (*v, i, opt)
517 switch (opt->opt_index)
519 case OPT_mfixed_range_:
520 fix_range (opt->arg);
521 break;
523 default:
524 gcc_unreachable ();
528 if (flag_pic && TARGET_PORTABLE_RUNTIME)
530 warning (0, "PIC code generation is not supported in the portable runtime model");
533 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
535 warning (0, "PIC code generation is not compatible with fast indirect calls");
538 if (! TARGET_GAS && write_symbols != NO_DEBUG)
540 warning (0, "-g is only supported when using GAS on this processor,");
541 warning (0, "-g option disabled");
542 write_symbols = NO_DEBUG;
545 /* We only support the "big PIC" model now. And we always generate PIC
546 code when in 64bit mode. */
547 if (flag_pic == 1 || TARGET_64BIT)
548 flag_pic = 2;
550 /* Disable -freorder-blocks-and-partition as we don't support hot and
551 cold partitioning. */
552 if (flag_reorder_blocks_and_partition)
554 inform (input_location,
555 "-freorder-blocks-and-partition does not work "
556 "on this architecture");
557 flag_reorder_blocks_and_partition = 0;
558 flag_reorder_blocks = 1;
561 /* We can't guarantee that .dword is available for 32-bit targets. */
562 if (UNITS_PER_WORD == 4)
563 targetm.asm_out.aligned_op.di = NULL;
565 /* The unaligned ops are only available when using GAS. */
566 if (!TARGET_GAS)
568 targetm.asm_out.unaligned_op.hi = NULL;
569 targetm.asm_out.unaligned_op.si = NULL;
570 targetm.asm_out.unaligned_op.di = NULL;
573 init_machine_status = pa_init_machine_status;
576 enum pa_builtins
578 PA_BUILTIN_COPYSIGNQ,
579 PA_BUILTIN_FABSQ,
580 PA_BUILTIN_INFQ,
581 PA_BUILTIN_HUGE_VALQ,
582 PA_BUILTIN_max
585 static GTY(()) tree pa_builtins[(int) PA_BUILTIN_max];
587 static void
588 pa_init_builtins (void)
590 #ifdef DONT_HAVE_FPUTC_UNLOCKED
592 tree decl = builtin_decl_explicit (BUILT_IN_PUTC_UNLOCKED);
593 set_builtin_decl (BUILT_IN_FPUTC_UNLOCKED, decl,
594 builtin_decl_implicit_p (BUILT_IN_PUTC_UNLOCKED));
596 #endif
597 #if TARGET_HPUX_11
599 tree decl;
601 if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
602 set_user_assembler_name (decl, "_Isfinite");
603 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
604 set_user_assembler_name (decl, "_Isfinitef");
606 #endif
608 if (HPUX_LONG_DOUBLE_LIBRARY)
610 tree decl, ftype;
612 /* Under HPUX, the __float128 type is a synonym for "long double". */
613 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
614 "__float128");
616 /* TFmode support builtins. */
617 ftype = build_function_type_list (long_double_type_node,
618 long_double_type_node,
619 NULL_TREE);
620 decl = add_builtin_function ("__builtin_fabsq", ftype,
621 PA_BUILTIN_FABSQ, BUILT_IN_MD,
622 "_U_Qfabs", NULL_TREE);
623 TREE_READONLY (decl) = 1;
624 pa_builtins[PA_BUILTIN_FABSQ] = decl;
626 ftype = build_function_type_list (long_double_type_node,
627 long_double_type_node,
628 long_double_type_node,
629 NULL_TREE);
630 decl = add_builtin_function ("__builtin_copysignq", ftype,
631 PA_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
632 "_U_Qfcopysign", NULL_TREE);
633 TREE_READONLY (decl) = 1;
634 pa_builtins[PA_BUILTIN_COPYSIGNQ] = decl;
636 ftype = build_function_type_list (long_double_type_node, NULL_TREE);
637 decl = add_builtin_function ("__builtin_infq", ftype,
638 PA_BUILTIN_INFQ, BUILT_IN_MD,
639 NULL, NULL_TREE);
640 pa_builtins[PA_BUILTIN_INFQ] = decl;
642 decl = add_builtin_function ("__builtin_huge_valq", ftype,
643 PA_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
644 NULL, NULL_TREE);
645 pa_builtins[PA_BUILTIN_HUGE_VALQ] = decl;
649 static rtx
650 pa_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
651 machine_mode mode ATTRIBUTE_UNUSED,
652 int ignore ATTRIBUTE_UNUSED)
654 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
655 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
657 switch (fcode)
659 case PA_BUILTIN_FABSQ:
660 case PA_BUILTIN_COPYSIGNQ:
661 return expand_call (exp, target, ignore);
663 case PA_BUILTIN_INFQ:
664 case PA_BUILTIN_HUGE_VALQ:
666 machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
667 REAL_VALUE_TYPE inf;
668 rtx tmp;
670 real_inf (&inf);
671 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, target_mode);
673 tmp = validize_mem (force_const_mem (target_mode, tmp));
675 if (target == 0)
676 target = gen_reg_rtx (target_mode);
678 emit_move_insn (target, tmp);
679 return target;
682 default:
683 gcc_unreachable ();
686 return NULL_RTX;
689 /* Function to init struct machine_function.
690 This will be called, via a pointer variable,
691 from push_function_context. */
693 static struct machine_function *
694 pa_init_machine_status (void)
696 return ggc_cleared_alloc<machine_function> ();
699 /* If FROM is a probable pointer register, mark TO as a probable
700 pointer register with the same pointer alignment as FROM. */
702 static void
703 copy_reg_pointer (rtx to, rtx from)
705 if (REG_POINTER (from))
706 mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
709 /* Return 1 if X contains a symbolic expression. We know these
710 expressions will have one of a few well defined forms, so
711 we need only check those forms. */
713 pa_symbolic_expression_p (rtx x)
716 /* Strip off any HIGH. */
717 if (GET_CODE (x) == HIGH)
718 x = XEXP (x, 0);
720 return symbolic_operand (x, VOIDmode);
723 /* Accept any constant that can be moved in one instruction into a
724 general register. */
726 pa_cint_ok_for_move (HOST_WIDE_INT ival)
728 /* OK if ldo, ldil, or zdepi, can be used. */
729 return (VAL_14_BITS_P (ival)
730 || pa_ldil_cint_p (ival)
731 || pa_zdepi_cint_p (ival));
734 /* True iff ldil can be used to load this CONST_INT. The least
735 significant 11 bits of the value must be zero and the value must
736 not change sign when extended from 32 to 64 bits. */
738 pa_ldil_cint_p (HOST_WIDE_INT ival)
740 HOST_WIDE_INT x = ival & (((HOST_WIDE_INT) -1 << 31) | 0x7ff);
742 return x == 0 || x == ((HOST_WIDE_INT) -1 << 31);
745 /* True iff zdepi can be used to generate this CONST_INT.
746 zdepi first sign extends a 5-bit signed number to a given field
747 length, then places this field anywhere in a zero. */
749 pa_zdepi_cint_p (unsigned HOST_WIDE_INT x)
751 unsigned HOST_WIDE_INT lsb_mask, t;
753 /* This might not be obvious, but it's at least fast.
754 This function is critical; we don't have the time loops would take. */
755 lsb_mask = x & -x;
756 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
757 /* Return true iff t is a power of two. */
758 return ((t & (t - 1)) == 0);
761 /* True iff depi or extru can be used to compute (reg & mask).
762 Accept bit pattern like these:
763 0....01....1
764 1....10....0
765 1..10..01..1 */
767 pa_and_mask_p (unsigned HOST_WIDE_INT mask)
769 mask = ~mask;
770 mask += mask & -mask;
771 return (mask & (mask - 1)) == 0;
774 /* True iff depi can be used to compute (reg | MASK). */
776 pa_ior_mask_p (unsigned HOST_WIDE_INT mask)
778 mask += mask & -mask;
779 return (mask & (mask - 1)) == 0;
782 /* Legitimize PIC addresses. If the address is already
783 position-independent, we return ORIG. Newly generated
784 position-independent addresses go to REG. If we need more
785 than one register, we lose. */
787 static rtx
788 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
790 rtx pic_ref = orig;
792 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
794 /* Labels need special handling. */
795 if (pic_label_operand (orig, mode))
797 rtx_insn *insn;
799 /* We do not want to go through the movXX expanders here since that
800 would create recursion.
802 Nor do we really want to call a generator for a named pattern
803 since that requires multiple patterns if we want to support
804 multiple word sizes.
806 So instead we just emit the raw set, which avoids the movXX
807 expanders completely. */
808 mark_reg_pointer (reg, BITS_PER_UNIT);
809 insn = emit_insn (gen_rtx_SET (reg, orig));
811 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
812 add_reg_note (insn, REG_EQUAL, orig);
814 /* During and after reload, we need to generate a REG_LABEL_OPERAND note
815 and update LABEL_NUSES because this is not done automatically. */
816 if (reload_in_progress || reload_completed)
818 /* Extract LABEL_REF. */
819 if (GET_CODE (orig) == CONST)
820 orig = XEXP (XEXP (orig, 0), 0);
821 /* Extract CODE_LABEL. */
822 orig = XEXP (orig, 0);
823 add_reg_note (insn, REG_LABEL_OPERAND, orig);
824 /* Make sure we have label and not a note. */
825 if (LABEL_P (orig))
826 LABEL_NUSES (orig)++;
828 crtl->uses_pic_offset_table = 1;
829 return reg;
831 if (GET_CODE (orig) == SYMBOL_REF)
833 rtx_insn *insn;
834 rtx tmp_reg;
836 gcc_assert (reg);
838 /* Before reload, allocate a temporary register for the intermediate
839 result. This allows the sequence to be deleted when the final
840 result is unused and the insns are trivially dead. */
841 tmp_reg = ((reload_in_progress || reload_completed)
842 ? reg : gen_reg_rtx (Pmode));
844 if (function_label_operand (orig, VOIDmode))
846 /* Force function label into memory in word mode. */
847 orig = XEXP (force_const_mem (word_mode, orig), 0);
848 /* Load plabel address from DLT. */
849 emit_move_insn (tmp_reg,
850 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
851 gen_rtx_HIGH (word_mode, orig)));
852 pic_ref
853 = gen_const_mem (Pmode,
854 gen_rtx_LO_SUM (Pmode, tmp_reg,
855 gen_rtx_UNSPEC (Pmode,
856 gen_rtvec (1, orig),
857 UNSPEC_DLTIND14R)));
858 emit_move_insn (reg, pic_ref);
859 /* Now load address of function descriptor. */
860 pic_ref = gen_rtx_MEM (Pmode, reg);
862 else
864 /* Load symbol reference from DLT. */
865 emit_move_insn (tmp_reg,
866 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
867 gen_rtx_HIGH (word_mode, orig)));
868 pic_ref
869 = gen_const_mem (Pmode,
870 gen_rtx_LO_SUM (Pmode, tmp_reg,
871 gen_rtx_UNSPEC (Pmode,
872 gen_rtvec (1, orig),
873 UNSPEC_DLTIND14R)));
876 crtl->uses_pic_offset_table = 1;
877 mark_reg_pointer (reg, BITS_PER_UNIT);
878 insn = emit_move_insn (reg, pic_ref);
880 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
881 set_unique_reg_note (insn, REG_EQUAL, orig);
883 return reg;
885 else if (GET_CODE (orig) == CONST)
887 rtx base;
889 if (GET_CODE (XEXP (orig, 0)) == PLUS
890 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
891 return orig;
893 gcc_assert (reg);
894 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
896 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
897 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
898 base == reg ? 0 : reg);
900 if (GET_CODE (orig) == CONST_INT)
902 if (INT_14_BITS (orig))
903 return plus_constant (Pmode, base, INTVAL (orig));
904 orig = force_reg (Pmode, orig);
906 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
907 /* Likewise, should we set special REG_NOTEs here? */
910 return pic_ref;
913 static GTY(()) rtx gen_tls_tga;
915 static rtx
916 gen_tls_get_addr (void)
918 if (!gen_tls_tga)
919 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
920 return gen_tls_tga;
923 static rtx
924 hppa_tls_call (rtx arg)
926 rtx ret;
928 ret = gen_reg_rtx (Pmode);
929 emit_library_call_value (gen_tls_get_addr (), ret,
930 LCT_CONST, Pmode, 1, arg, Pmode);
932 return ret;
935 static rtx
936 legitimize_tls_address (rtx addr)
938 rtx ret, tmp, t1, t2, tp;
939 rtx_insn *insn;
941 /* Currently, we can't handle anything but a SYMBOL_REF. */
942 if (GET_CODE (addr) != SYMBOL_REF)
943 return addr;
945 switch (SYMBOL_REF_TLS_MODEL (addr))
947 case TLS_MODEL_GLOBAL_DYNAMIC:
948 tmp = gen_reg_rtx (Pmode);
949 if (flag_pic)
950 emit_insn (gen_tgd_load_pic (tmp, addr));
951 else
952 emit_insn (gen_tgd_load (tmp, addr));
953 ret = hppa_tls_call (tmp);
954 break;
956 case TLS_MODEL_LOCAL_DYNAMIC:
957 ret = gen_reg_rtx (Pmode);
958 tmp = gen_reg_rtx (Pmode);
959 start_sequence ();
960 if (flag_pic)
961 emit_insn (gen_tld_load_pic (tmp, addr));
962 else
963 emit_insn (gen_tld_load (tmp, addr));
964 t1 = hppa_tls_call (tmp);
965 insn = get_insns ();
966 end_sequence ();
967 t2 = gen_reg_rtx (Pmode);
968 emit_libcall_block (insn, t2, t1,
969 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
970 UNSPEC_TLSLDBASE));
971 emit_insn (gen_tld_offset_load (ret, addr, t2));
972 break;
974 case TLS_MODEL_INITIAL_EXEC:
975 tp = gen_reg_rtx (Pmode);
976 tmp = gen_reg_rtx (Pmode);
977 ret = gen_reg_rtx (Pmode);
978 emit_insn (gen_tp_load (tp));
979 if (flag_pic)
980 emit_insn (gen_tie_load_pic (tmp, addr));
981 else
982 emit_insn (gen_tie_load (tmp, addr));
983 emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
984 break;
986 case TLS_MODEL_LOCAL_EXEC:
987 tp = gen_reg_rtx (Pmode);
988 ret = gen_reg_rtx (Pmode);
989 emit_insn (gen_tp_load (tp));
990 emit_insn (gen_tle_load (ret, addr, tp));
991 break;
993 default:
994 gcc_unreachable ();
997 return ret;
1000 /* Helper for hppa_legitimize_address. Given X, return true if it
1001 is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8.
1003 This respectively represent canonical shift-add rtxs or scaled
1004 memory addresses. */
1005 static bool
1006 mem_shadd_or_shadd_rtx_p (rtx x)
1008 return ((GET_CODE (x) == ASHIFT
1009 || GET_CODE (x) == MULT)
1010 && GET_CODE (XEXP (x, 1)) == CONST_INT
1011 && ((GET_CODE (x) == ASHIFT
1012 && pa_shadd_constant_p (INTVAL (XEXP (x, 1))))
1013 || (GET_CODE (x) == MULT
1014 && pa_mem_shadd_constant_p (INTVAL (XEXP (x, 1))))));
1017 /* Try machine-dependent ways of modifying an illegitimate address
1018 to be legitimate. If we find one, return the new, valid address.
1019 This macro is used in only one place: `memory_address' in explow.c.
1021 OLDX is the address as it was before break_out_memory_refs was called.
1022 In some cases it is useful to look at this to decide what needs to be done.
1024 It is always safe for this macro to do nothing. It exists to recognize
1025 opportunities to optimize the output.
1027 For the PA, transform:
1029 memory(X + <large int>)
1031 into:
1033 if (<large int> & mask) >= 16
1034 Y = (<large int> & ~mask) + mask + 1 Round up.
1035 else
1036 Y = (<large int> & ~mask) Round down.
1037 Z = X + Y
1038 memory (Z + (<large int> - Y));
1040 This is for CSE to find several similar references, and only use one Z.
1042 X can either be a SYMBOL_REF or REG, but because combine cannot
1043 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
1044 D will not fit in 14 bits.
1046 MODE_FLOAT references allow displacements which fit in 5 bits, so use
1047 0x1f as the mask.
1049 MODE_INT references allow displacements which fit in 14 bits, so use
1050 0x3fff as the mask.
1052 This relies on the fact that most mode MODE_FLOAT references will use FP
1053 registers and most mode MODE_INT references will use integer registers.
1054 (In the rare case of an FP register used in an integer MODE, we depend
1055 on secondary reloads to clean things up.)
1058 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
1059 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
1060 addressing modes to be used).
1062 Note that the addresses passed into hppa_legitimize_address always
1063 come from a MEM, so we only have to match the MULT form on incoming
1064 addresses. But to be future proof we also match the ASHIFT form.
1066 However, this routine always places those shift-add sequences into
1067 registers, so we have to generate the ASHIFT form as our output.
1069 Put X and Z into registers. Then put the entire expression into
1070 a register. */
1073 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1074 machine_mode mode)
1076 rtx orig = x;
1078 /* We need to canonicalize the order of operands in unscaled indexed
1079 addresses since the code that checks if an address is valid doesn't
1080 always try both orders. */
1081 if (!TARGET_NO_SPACE_REGS
1082 && GET_CODE (x) == PLUS
1083 && GET_MODE (x) == Pmode
1084 && REG_P (XEXP (x, 0))
1085 && REG_P (XEXP (x, 1))
1086 && REG_POINTER (XEXP (x, 0))
1087 && !REG_POINTER (XEXP (x, 1)))
1088 return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
1090 if (tls_referenced_p (x))
1091 return legitimize_tls_address (x);
1092 else if (flag_pic)
1093 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
1095 /* Strip off CONST. */
1096 if (GET_CODE (x) == CONST)
1097 x = XEXP (x, 0);
1099 /* Special case. Get the SYMBOL_REF into a register and use indexing.
1100 That should always be safe. */
1101 if (GET_CODE (x) == PLUS
1102 && GET_CODE (XEXP (x, 0)) == REG
1103 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
1105 rtx reg = force_reg (Pmode, XEXP (x, 1));
1106 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
1109 /* Note we must reject symbols which represent function addresses
1110 since the assembler/linker can't handle arithmetic on plabels. */
1111 if (GET_CODE (x) == PLUS
1112 && GET_CODE (XEXP (x, 1)) == CONST_INT
1113 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
1114 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
1115 || GET_CODE (XEXP (x, 0)) == REG))
1117 rtx int_part, ptr_reg;
1118 int newoffset;
1119 int offset = INTVAL (XEXP (x, 1));
1120 int mask;
1122 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
1123 && !INT14_OK_STRICT ? 0x1f : 0x3fff);
1125 /* Choose which way to round the offset. Round up if we
1126 are >= halfway to the next boundary. */
1127 if ((offset & mask) >= ((mask + 1) / 2))
1128 newoffset = (offset & ~ mask) + mask + 1;
1129 else
1130 newoffset = (offset & ~ mask);
1132 /* If the newoffset will not fit in 14 bits (ldo), then
1133 handling this would take 4 or 5 instructions (2 to load
1134 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1135 add the new offset and the SYMBOL_REF.) Combine can
1136 not handle 4->2 or 5->2 combinations, so do not create
1137 them. */
1138 if (! VAL_14_BITS_P (newoffset)
1139 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1141 rtx const_part = plus_constant (Pmode, XEXP (x, 0), newoffset);
1142 rtx tmp_reg
1143 = force_reg (Pmode,
1144 gen_rtx_HIGH (Pmode, const_part));
1145 ptr_reg
1146 = force_reg (Pmode,
1147 gen_rtx_LO_SUM (Pmode,
1148 tmp_reg, const_part));
1150 else
1152 if (! VAL_14_BITS_P (newoffset))
1153 int_part = force_reg (Pmode, GEN_INT (newoffset));
1154 else
1155 int_part = GEN_INT (newoffset);
1157 ptr_reg = force_reg (Pmode,
1158 gen_rtx_PLUS (Pmode,
1159 force_reg (Pmode, XEXP (x, 0)),
1160 int_part));
1162 return plus_constant (Pmode, ptr_reg, offset - newoffset);
1165 /* Handle (plus (mult (a) (mem_shadd_constant)) (b)). */
1167 if (GET_CODE (x) == PLUS
1168 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1169 && (OBJECT_P (XEXP (x, 1))
1170 || GET_CODE (XEXP (x, 1)) == SUBREG)
1171 && GET_CODE (XEXP (x, 1)) != CONST)
1173 /* If we were given a MULT, we must fix the constant
1174 as we're going to create the ASHIFT form. */
1175 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1176 if (GET_CODE (XEXP (x, 0)) == MULT)
1177 shift_val = exact_log2 (shift_val);
1179 rtx reg1, reg2;
1180 reg1 = XEXP (x, 1);
1181 if (GET_CODE (reg1) != REG)
1182 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1184 reg2 = XEXP (XEXP (x, 0), 0);
1185 if (GET_CODE (reg2) != REG)
1186 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1188 return force_reg (Pmode,
1189 gen_rtx_PLUS (Pmode,
1190 gen_rtx_ASHIFT (Pmode, reg2,
1191 GEN_INT (shift_val)),
1192 reg1));
1195 /* Similarly for (plus (plus (mult (a) (mem_shadd_constant)) (b)) (c)).
1197 Only do so for floating point modes since this is more speculative
1198 and we lose if it's an integer store. */
1199 if (GET_CODE (x) == PLUS
1200 && GET_CODE (XEXP (x, 0)) == PLUS
1201 && mem_shadd_or_shadd_rtx_p (XEXP (XEXP (x, 0), 0))
1202 && (mode == SFmode || mode == DFmode))
1204 int shift_val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
1206 /* If we were given a MULT, we must fix the constant
1207 as we're going to create the ASHIFT form. */
1208 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
1209 shift_val = exact_log2 (shift_val);
1211 /* Try and figure out what to use as a base register. */
1212 rtx reg1, reg2, base, idx;
1214 reg1 = XEXP (XEXP (x, 0), 1);
1215 reg2 = XEXP (x, 1);
1216 base = NULL_RTX;
1217 idx = NULL_RTX;
1219 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1220 then pa_emit_move_sequence will turn on REG_POINTER so we'll know
1221 it's a base register below. */
1222 if (GET_CODE (reg1) != REG)
1223 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1225 if (GET_CODE (reg2) != REG)
1226 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1228 /* Figure out what the base and index are. */
1230 if (GET_CODE (reg1) == REG
1231 && REG_POINTER (reg1))
1233 base = reg1;
1234 idx = gen_rtx_PLUS (Pmode,
1235 gen_rtx_ASHIFT (Pmode,
1236 XEXP (XEXP (XEXP (x, 0), 0), 0),
1237 GEN_INT (shift_val)),
1238 XEXP (x, 1));
1240 else if (GET_CODE (reg2) == REG
1241 && REG_POINTER (reg2))
1243 base = reg2;
1244 idx = XEXP (x, 0);
1247 if (base == 0)
1248 return orig;
1250 /* If the index adds a large constant, try to scale the
1251 constant so that it can be loaded with only one insn. */
1252 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1253 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1254 / INTVAL (XEXP (XEXP (idx, 0), 1)))
1255 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1257 /* Divide the CONST_INT by the scale factor, then add it to A. */
1258 int val = INTVAL (XEXP (idx, 1));
1259 val /= (1 << shift_val);
1261 reg1 = XEXP (XEXP (idx, 0), 0);
1262 if (GET_CODE (reg1) != REG)
1263 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1265 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1267 /* We can now generate a simple scaled indexed address. */
1268 return
1269 force_reg
1270 (Pmode, gen_rtx_PLUS (Pmode,
1271 gen_rtx_ASHIFT (Pmode, reg1,
1272 GEN_INT (shift_val)),
1273 base));
1276 /* If B + C is still a valid base register, then add them. */
1277 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1278 && INTVAL (XEXP (idx, 1)) <= 4096
1279 && INTVAL (XEXP (idx, 1)) >= -4096)
1281 rtx reg1, reg2;
1283 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1285 reg2 = XEXP (XEXP (idx, 0), 0);
1286 if (GET_CODE (reg2) != CONST_INT)
1287 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1289 return force_reg (Pmode,
1290 gen_rtx_PLUS (Pmode,
1291 gen_rtx_ASHIFT (Pmode, reg2,
1292 GEN_INT (shift_val)),
1293 reg1));
1296 /* Get the index into a register, then add the base + index and
1297 return a register holding the result. */
1299 /* First get A into a register. */
1300 reg1 = XEXP (XEXP (idx, 0), 0);
1301 if (GET_CODE (reg1) != REG)
1302 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1304 /* And get B into a register. */
1305 reg2 = XEXP (idx, 1);
1306 if (GET_CODE (reg2) != REG)
1307 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1309 reg1 = force_reg (Pmode,
1310 gen_rtx_PLUS (Pmode,
1311 gen_rtx_ASHIFT (Pmode, reg1,
1312 GEN_INT (shift_val)),
1313 reg2));
1315 /* Add the result to our base register and return. */
1316 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1320 /* Uh-oh. We might have an address for x[n-100000]. This needs
1321 special handling to avoid creating an indexed memory address
1322 with x-100000 as the base.
1324 If the constant part is small enough, then it's still safe because
1325 there is a guard page at the beginning and end of the data segment.
1327 Scaled references are common enough that we want to try and rearrange the
1328 terms so that we can use indexing for these addresses too. Only
1329 do the optimization for floatint point modes. */
1331 if (GET_CODE (x) == PLUS
1332 && pa_symbolic_expression_p (XEXP (x, 1)))
1334 /* Ugly. We modify things here so that the address offset specified
1335 by the index expression is computed first, then added to x to form
1336 the entire address. */
1338 rtx regx1, regx2, regy1, regy2, y;
1340 /* Strip off any CONST. */
1341 y = XEXP (x, 1);
1342 if (GET_CODE (y) == CONST)
1343 y = XEXP (y, 0);
1345 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1347 /* See if this looks like
1348 (plus (mult (reg) (mem_shadd_const))
1349 (const (plus (symbol_ref) (const_int))))
1351 Where const_int is small. In that case the const
1352 expression is a valid pointer for indexing.
1354 If const_int is big, but can be divided evenly by shadd_const
1355 and added to (reg). This allows more scaled indexed addresses. */
1356 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1357 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1358 && GET_CODE (XEXP (y, 1)) == CONST_INT
1359 && INTVAL (XEXP (y, 1)) >= -4096
1360 && INTVAL (XEXP (y, 1)) <= 4095)
1362 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1364 /* If we were given a MULT, we must fix the constant
1365 as we're going to create the ASHIFT form. */
1366 if (GET_CODE (XEXP (x, 0)) == MULT)
1367 shift_val = exact_log2 (shift_val);
1369 rtx reg1, reg2;
1371 reg1 = XEXP (x, 1);
1372 if (GET_CODE (reg1) != REG)
1373 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1375 reg2 = XEXP (XEXP (x, 0), 0);
1376 if (GET_CODE (reg2) != REG)
1377 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1379 return
1380 force_reg (Pmode,
1381 gen_rtx_PLUS (Pmode,
1382 gen_rtx_ASHIFT (Pmode,
1383 reg2,
1384 GEN_INT (shift_val)),
1385 reg1));
1387 else if ((mode == DFmode || mode == SFmode)
1388 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1389 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1390 && GET_CODE (XEXP (y, 1)) == CONST_INT
1391 && INTVAL (XEXP (y, 1)) % (1 << INTVAL (XEXP (XEXP (x, 0), 1))) == 0)
1393 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1395 /* If we were given a MULT, we must fix the constant
1396 as we're going to create the ASHIFT form. */
1397 if (GET_CODE (XEXP (x, 0)) == MULT)
1398 shift_val = exact_log2 (shift_val);
1400 regx1
1401 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1402 / INTVAL (XEXP (XEXP (x, 0), 1))));
1403 regx2 = XEXP (XEXP (x, 0), 0);
1404 if (GET_CODE (regx2) != REG)
1405 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1406 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1407 regx2, regx1));
1408 return
1409 force_reg (Pmode,
1410 gen_rtx_PLUS (Pmode,
1411 gen_rtx_ASHIFT (Pmode, regx2,
1412 GEN_INT (shift_val)),
1413 force_reg (Pmode, XEXP (y, 0))));
1415 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1416 && INTVAL (XEXP (y, 1)) >= -4096
1417 && INTVAL (XEXP (y, 1)) <= 4095)
1419 /* This is safe because of the guard page at the
1420 beginning and end of the data space. Just
1421 return the original address. */
1422 return orig;
1424 else
1426 /* Doesn't look like one we can optimize. */
1427 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1428 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1429 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1430 regx1 = force_reg (Pmode,
1431 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1432 regx1, regy2));
1433 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1438 return orig;
1441 /* Implement the TARGET_REGISTER_MOVE_COST hook.
1443 Compute extra cost of moving data between one register class
1444 and another.
1446 Make moves from SAR so expensive they should never happen. We used to
1447 have 0xffff here, but that generates overflow in rare cases.
1449 Copies involving a FP register and a non-FP register are relatively
1450 expensive because they must go through memory.
1452 Other copies are reasonably cheap. */
1454 static int
1455 hppa_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
1456 reg_class_t from, reg_class_t to)
1458 if (from == SHIFT_REGS)
1459 return 0x100;
1460 else if (to == SHIFT_REGS && FP_REG_CLASS_P (from))
1461 return 18;
1462 else if ((FP_REG_CLASS_P (from) && ! FP_REG_CLASS_P (to))
1463 || (FP_REG_CLASS_P (to) && ! FP_REG_CLASS_P (from)))
1464 return 16;
1465 else
1466 return 2;
1469 /* For the HPPA, REG and REG+CONST is cost 0
1470 and addresses involving symbolic constants are cost 2.
1472 PIC addresses are very expensive.
1474 It is no coincidence that this has the same structure
1475 as pa_legitimate_address_p. */
1477 static int
1478 hppa_address_cost (rtx X, machine_mode mode ATTRIBUTE_UNUSED,
1479 addr_space_t as ATTRIBUTE_UNUSED,
1480 bool speed ATTRIBUTE_UNUSED)
1482 switch (GET_CODE (X))
1484 case REG:
1485 case PLUS:
1486 case LO_SUM:
1487 return 1;
1488 case HIGH:
1489 return 2;
1490 default:
1491 return 4;
1495 /* Compute a (partial) cost for rtx X. Return true if the complete
1496 cost has been computed, and false if subexpressions should be
1497 scanned. In either case, *TOTAL contains the cost result. */
1499 static bool
1500 hppa_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
1501 int *total, bool speed ATTRIBUTE_UNUSED)
1503 int factor;
1505 switch (code)
1507 case CONST_INT:
1508 if (INTVAL (x) == 0)
1509 *total = 0;
1510 else if (INT_14_BITS (x))
1511 *total = 1;
1512 else
1513 *total = 2;
1514 return true;
1516 case HIGH:
1517 *total = 2;
1518 return true;
1520 case CONST:
1521 case LABEL_REF:
1522 case SYMBOL_REF:
1523 *total = 4;
1524 return true;
1526 case CONST_DOUBLE:
1527 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1528 && outer_code != SET)
1529 *total = 0;
1530 else
1531 *total = 8;
1532 return true;
1534 case MULT:
1535 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1537 *total = COSTS_N_INSNS (3);
1538 return true;
1541 /* A mode size N times larger than SImode needs O(N*N) more insns. */
1542 factor = GET_MODE_SIZE (GET_MODE (x)) / 4;
1543 if (factor == 0)
1544 factor = 1;
1546 if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1547 *total = factor * factor * COSTS_N_INSNS (8);
1548 else
1549 *total = factor * factor * COSTS_N_INSNS (20);
1550 return true;
1552 case DIV:
1553 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1555 *total = COSTS_N_INSNS (14);
1556 return true;
1558 /* FALLTHRU */
1560 case UDIV:
1561 case MOD:
1562 case UMOD:
1563 /* A mode size N times larger than SImode needs O(N*N) more insns. */
1564 factor = GET_MODE_SIZE (GET_MODE (x)) / 4;
1565 if (factor == 0)
1566 factor = 1;
1568 *total = factor * factor * COSTS_N_INSNS (60);
1569 return true;
1571 case PLUS: /* this includes shNadd insns */
1572 case MINUS:
1573 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1575 *total = COSTS_N_INSNS (3);
1576 return true;
1579 /* A size N times larger than UNITS_PER_WORD needs N times as
1580 many insns, taking N times as long. */
1581 factor = GET_MODE_SIZE (GET_MODE (x)) / UNITS_PER_WORD;
1582 if (factor == 0)
1583 factor = 1;
1584 *total = factor * COSTS_N_INSNS (1);
1585 return true;
1587 case ASHIFT:
1588 case ASHIFTRT:
1589 case LSHIFTRT:
1590 *total = COSTS_N_INSNS (1);
1591 return true;
1593 default:
1594 return false;
1598 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1599 new rtx with the correct mode. */
1600 static inline rtx
1601 force_mode (machine_mode mode, rtx orig)
1603 if (mode == GET_MODE (orig))
1604 return orig;
1606 gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
1608 return gen_rtx_REG (mode, REGNO (orig));
1611 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
1613 static bool
1614 pa_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1616 return tls_referenced_p (x);
1619 /* Emit insns to move operands[1] into operands[0].
1621 Return 1 if we have written out everything that needs to be done to
1622 do the move. Otherwise, return 0 and the caller will emit the move
1623 normally.
1625 Note SCRATCH_REG may not be in the proper mode depending on how it
1626 will be used. This routine is responsible for creating a new copy
1627 of SCRATCH_REG in the proper mode. */
1630 pa_emit_move_sequence (rtx *operands, machine_mode mode, rtx scratch_reg)
1632 register rtx operand0 = operands[0];
1633 register rtx operand1 = operands[1];
1634 register rtx tem;
1636 /* We can only handle indexed addresses in the destination operand
1637 of floating point stores. Thus, we need to break out indexed
1638 addresses from the destination operand. */
1639 if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1641 gcc_assert (can_create_pseudo_p ());
1643 tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1644 operand0 = replace_equiv_address (operand0, tem);
1647 /* On targets with non-equivalent space registers, break out unscaled
1648 indexed addresses from the source operand before the final CSE.
1649 We have to do this because the REG_POINTER flag is not correctly
1650 carried through various optimization passes and CSE may substitute
1651 a pseudo without the pointer set for one with the pointer set. As
1652 a result, we loose various opportunities to create insns with
1653 unscaled indexed addresses. */
1654 if (!TARGET_NO_SPACE_REGS
1655 && !cse_not_expected
1656 && GET_CODE (operand1) == MEM
1657 && GET_CODE (XEXP (operand1, 0)) == PLUS
1658 && REG_P (XEXP (XEXP (operand1, 0), 0))
1659 && REG_P (XEXP (XEXP (operand1, 0), 1)))
1660 operand1
1661 = replace_equiv_address (operand1,
1662 copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1664 if (scratch_reg
1665 && reload_in_progress && GET_CODE (operand0) == REG
1666 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1667 operand0 = reg_equiv_mem (REGNO (operand0));
1668 else if (scratch_reg
1669 && reload_in_progress && GET_CODE (operand0) == SUBREG
1670 && GET_CODE (SUBREG_REG (operand0)) == REG
1671 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1673 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1674 the code which tracks sets/uses for delete_output_reload. */
1675 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1676 reg_equiv_mem (REGNO (SUBREG_REG (operand0))),
1677 SUBREG_BYTE (operand0));
1678 operand0 = alter_subreg (&temp, true);
1681 if (scratch_reg
1682 && reload_in_progress && GET_CODE (operand1) == REG
1683 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1684 operand1 = reg_equiv_mem (REGNO (operand1));
1685 else if (scratch_reg
1686 && reload_in_progress && GET_CODE (operand1) == SUBREG
1687 && GET_CODE (SUBREG_REG (operand1)) == REG
1688 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1690 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1691 the code which tracks sets/uses for delete_output_reload. */
1692 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1693 reg_equiv_mem (REGNO (SUBREG_REG (operand1))),
1694 SUBREG_BYTE (operand1));
1695 operand1 = alter_subreg (&temp, true);
1698 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1699 && ((tem = find_replacement (&XEXP (operand0, 0)))
1700 != XEXP (operand0, 0)))
1701 operand0 = replace_equiv_address (operand0, tem);
1703 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1704 && ((tem = find_replacement (&XEXP (operand1, 0)))
1705 != XEXP (operand1, 0)))
1706 operand1 = replace_equiv_address (operand1, tem);
1708 /* Handle secondary reloads for loads/stores of FP registers from
1709 REG+D addresses where D does not fit in 5 or 14 bits, including
1710 (subreg (mem (addr))) cases. */
1711 if (scratch_reg
1712 && fp_reg_operand (operand0, mode)
1713 && (MEM_P (operand1)
1714 || (GET_CODE (operand1) == SUBREG
1715 && MEM_P (XEXP (operand1, 0))))
1716 && !floating_point_store_memory_operand (operand1, mode))
1718 if (GET_CODE (operand1) == SUBREG)
1719 operand1 = XEXP (operand1, 0);
1721 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1722 it in WORD_MODE regardless of what mode it was originally given
1723 to us. */
1724 scratch_reg = force_mode (word_mode, scratch_reg);
1726 /* D might not fit in 14 bits either; for such cases load D into
1727 scratch reg. */
1728 if (reg_plus_base_memory_operand (operand1, mode)
1729 && !(TARGET_PA_20
1730 && !TARGET_ELF32
1731 && INT_14_BITS (XEXP (XEXP (operand1, 0), 1))))
1733 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1734 emit_move_insn (scratch_reg,
1735 gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
1736 Pmode,
1737 XEXP (XEXP (operand1, 0), 0),
1738 scratch_reg));
1740 else
1741 emit_move_insn (scratch_reg, XEXP (operand1, 0));
1742 emit_insn (gen_rtx_SET (operand0,
1743 replace_equiv_address (operand1, scratch_reg)));
1744 return 1;
1746 else if (scratch_reg
1747 && fp_reg_operand (operand1, mode)
1748 && (MEM_P (operand0)
1749 || (GET_CODE (operand0) == SUBREG
1750 && MEM_P (XEXP (operand0, 0))))
1751 && !floating_point_store_memory_operand (operand0, mode))
1753 if (GET_CODE (operand0) == SUBREG)
1754 operand0 = XEXP (operand0, 0);
1756 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1757 it in WORD_MODE regardless of what mode it was originally given
1758 to us. */
1759 scratch_reg = force_mode (word_mode, scratch_reg);
1761 /* D might not fit in 14 bits either; for such cases load D into
1762 scratch reg. */
1763 if (reg_plus_base_memory_operand (operand0, mode)
1764 && !(TARGET_PA_20
1765 && !TARGET_ELF32
1766 && INT_14_BITS (XEXP (XEXP (operand0, 0), 1))))
1768 emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
1769 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0,
1770 0)),
1771 Pmode,
1772 XEXP (XEXP (operand0, 0),
1774 scratch_reg));
1776 else
1777 emit_move_insn (scratch_reg, XEXP (operand0, 0));
1778 emit_insn (gen_rtx_SET (replace_equiv_address (operand0, scratch_reg),
1779 operand1));
1780 return 1;
1782 /* Handle secondary reloads for loads of FP registers from constant
1783 expressions by forcing the constant into memory. For the most part,
1784 this is only necessary for SImode and DImode.
1786 Use scratch_reg to hold the address of the memory location. */
1787 else if (scratch_reg
1788 && CONSTANT_P (operand1)
1789 && fp_reg_operand (operand0, mode))
1791 rtx const_mem, xoperands[2];
1793 if (operand1 == CONST0_RTX (mode))
1795 emit_insn (gen_rtx_SET (operand0, operand1));
1796 return 1;
1799 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1800 it in WORD_MODE regardless of what mode it was originally given
1801 to us. */
1802 scratch_reg = force_mode (word_mode, scratch_reg);
1804 /* Force the constant into memory and put the address of the
1805 memory location into scratch_reg. */
1806 const_mem = force_const_mem (mode, operand1);
1807 xoperands[0] = scratch_reg;
1808 xoperands[1] = XEXP (const_mem, 0);
1809 pa_emit_move_sequence (xoperands, Pmode, 0);
1811 /* Now load the destination register. */
1812 emit_insn (gen_rtx_SET (operand0,
1813 replace_equiv_address (const_mem, scratch_reg)));
1814 return 1;
1816 /* Handle secondary reloads for SAR. These occur when trying to load
1817 the SAR from memory or a constant. */
1818 else if (scratch_reg
1819 && GET_CODE (operand0) == REG
1820 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1821 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1822 && (GET_CODE (operand1) == MEM || GET_CODE (operand1) == CONST_INT))
1824 /* D might not fit in 14 bits either; for such cases load D into
1825 scratch reg. */
1826 if (GET_CODE (operand1) == MEM
1827 && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0)))
1829 /* We are reloading the address into the scratch register, so we
1830 want to make sure the scratch register is a full register. */
1831 scratch_reg = force_mode (word_mode, scratch_reg);
1833 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1834 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1835 0)),
1836 Pmode,
1837 XEXP (XEXP (operand1, 0),
1839 scratch_reg));
1841 /* Now we are going to load the scratch register from memory,
1842 we want to load it in the same width as the original MEM,
1843 which must be the same as the width of the ultimate destination,
1844 OPERAND0. */
1845 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1847 emit_move_insn (scratch_reg,
1848 replace_equiv_address (operand1, scratch_reg));
1850 else
1852 /* We want to load the scratch register using the same mode as
1853 the ultimate destination. */
1854 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1856 emit_move_insn (scratch_reg, operand1);
1859 /* And emit the insn to set the ultimate destination. We know that
1860 the scratch register has the same mode as the destination at this
1861 point. */
1862 emit_move_insn (operand0, scratch_reg);
1863 return 1;
1865 /* Handle the most common case: storing into a register. */
1866 else if (register_operand (operand0, mode))
1868 /* Legitimize TLS symbol references. This happens for references
1869 that aren't a legitimate constant. */
1870 if (PA_SYMBOL_REF_TLS_P (operand1))
1871 operand1 = legitimize_tls_address (operand1);
1873 if (register_operand (operand1, mode)
1874 || (GET_CODE (operand1) == CONST_INT
1875 && pa_cint_ok_for_move (INTVAL (operand1)))
1876 || (operand1 == CONST0_RTX (mode))
1877 || (GET_CODE (operand1) == HIGH
1878 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1879 /* Only `general_operands' can come here, so MEM is ok. */
1880 || GET_CODE (operand1) == MEM)
1882 /* Various sets are created during RTL generation which don't
1883 have the REG_POINTER flag correctly set. After the CSE pass,
1884 instruction recognition can fail if we don't consistently
1885 set this flag when performing register copies. This should
1886 also improve the opportunities for creating insns that use
1887 unscaled indexing. */
1888 if (REG_P (operand0) && REG_P (operand1))
1890 if (REG_POINTER (operand1)
1891 && !REG_POINTER (operand0)
1892 && !HARD_REGISTER_P (operand0))
1893 copy_reg_pointer (operand0, operand1);
1896 /* When MEMs are broken out, the REG_POINTER flag doesn't
1897 get set. In some cases, we can set the REG_POINTER flag
1898 from the declaration for the MEM. */
1899 if (REG_P (operand0)
1900 && GET_CODE (operand1) == MEM
1901 && !REG_POINTER (operand0))
1903 tree decl = MEM_EXPR (operand1);
1905 /* Set the register pointer flag and register alignment
1906 if the declaration for this memory reference is a
1907 pointer type. */
1908 if (decl)
1910 tree type;
1912 /* If this is a COMPONENT_REF, use the FIELD_DECL from
1913 tree operand 1. */
1914 if (TREE_CODE (decl) == COMPONENT_REF)
1915 decl = TREE_OPERAND (decl, 1);
1917 type = TREE_TYPE (decl);
1918 type = strip_array_types (type);
1920 if (POINTER_TYPE_P (type))
1922 int align;
1924 type = TREE_TYPE (type);
1925 /* Using TYPE_ALIGN_OK is rather conservative as
1926 only the ada frontend actually sets it. */
1927 align = (TYPE_ALIGN_OK (type) ? TYPE_ALIGN (type)
1928 : BITS_PER_UNIT);
1929 mark_reg_pointer (operand0, align);
1934 emit_insn (gen_rtx_SET (operand0, operand1));
1935 return 1;
1938 else if (GET_CODE (operand0) == MEM)
1940 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1941 && !(reload_in_progress || reload_completed))
1943 rtx temp = gen_reg_rtx (DFmode);
1945 emit_insn (gen_rtx_SET (temp, operand1));
1946 emit_insn (gen_rtx_SET (operand0, temp));
1947 return 1;
1949 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1951 /* Run this case quickly. */
1952 emit_insn (gen_rtx_SET (operand0, operand1));
1953 return 1;
1955 if (! (reload_in_progress || reload_completed))
1957 operands[0] = validize_mem (operand0);
1958 operands[1] = operand1 = force_reg (mode, operand1);
1962 /* Simplify the source if we need to.
1963 Note we do have to handle function labels here, even though we do
1964 not consider them legitimate constants. Loop optimizations can
1965 call the emit_move_xxx with one as a source. */
1966 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1967 || (GET_CODE (operand1) == HIGH
1968 && symbolic_operand (XEXP (operand1, 0), mode))
1969 || function_label_operand (operand1, VOIDmode)
1970 || tls_referenced_p (operand1))
1972 int ishighonly = 0;
1974 if (GET_CODE (operand1) == HIGH)
1976 ishighonly = 1;
1977 operand1 = XEXP (operand1, 0);
1979 if (symbolic_operand (operand1, mode))
1981 /* Argh. The assembler and linker can't handle arithmetic
1982 involving plabels.
1984 So we force the plabel into memory, load operand0 from
1985 the memory location, then add in the constant part. */
1986 if ((GET_CODE (operand1) == CONST
1987 && GET_CODE (XEXP (operand1, 0)) == PLUS
1988 && function_label_operand (XEXP (XEXP (operand1, 0), 0),
1989 VOIDmode))
1990 || function_label_operand (operand1, VOIDmode))
1992 rtx temp, const_part;
1994 /* Figure out what (if any) scratch register to use. */
1995 if (reload_in_progress || reload_completed)
1997 scratch_reg = scratch_reg ? scratch_reg : operand0;
1998 /* SCRATCH_REG will hold an address and maybe the actual
1999 data. We want it in WORD_MODE regardless of what mode it
2000 was originally given to us. */
2001 scratch_reg = force_mode (word_mode, scratch_reg);
2003 else if (flag_pic)
2004 scratch_reg = gen_reg_rtx (Pmode);
2006 if (GET_CODE (operand1) == CONST)
2008 /* Save away the constant part of the expression. */
2009 const_part = XEXP (XEXP (operand1, 0), 1);
2010 gcc_assert (GET_CODE (const_part) == CONST_INT);
2012 /* Force the function label into memory. */
2013 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
2015 else
2017 /* No constant part. */
2018 const_part = NULL_RTX;
2020 /* Force the function label into memory. */
2021 temp = force_const_mem (mode, operand1);
2025 /* Get the address of the memory location. PIC-ify it if
2026 necessary. */
2027 temp = XEXP (temp, 0);
2028 if (flag_pic)
2029 temp = legitimize_pic_address (temp, mode, scratch_reg);
2031 /* Put the address of the memory location into our destination
2032 register. */
2033 operands[1] = temp;
2034 pa_emit_move_sequence (operands, mode, scratch_reg);
2036 /* Now load from the memory location into our destination
2037 register. */
2038 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
2039 pa_emit_move_sequence (operands, mode, scratch_reg);
2041 /* And add back in the constant part. */
2042 if (const_part != NULL_RTX)
2043 expand_inc (operand0, const_part);
2045 return 1;
2048 if (flag_pic)
2050 rtx_insn *insn;
2051 rtx temp;
2053 if (reload_in_progress || reload_completed)
2055 temp = scratch_reg ? scratch_reg : operand0;
2056 /* TEMP will hold an address and maybe the actual
2057 data. We want it in WORD_MODE regardless of what mode it
2058 was originally given to us. */
2059 temp = force_mode (word_mode, temp);
2061 else
2062 temp = gen_reg_rtx (Pmode);
2064 /* Force (const (plus (symbol) (const_int))) to memory
2065 if the const_int will not fit in 14 bits. Although
2066 this requires a relocation, the instruction sequence
2067 needed to load the value is shorter. */
2068 if (GET_CODE (operand1) == CONST
2069 && GET_CODE (XEXP (operand1, 0)) == PLUS
2070 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
2071 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1)))
2073 rtx x, m = force_const_mem (mode, operand1);
2075 x = legitimize_pic_address (XEXP (m, 0), mode, temp);
2076 x = replace_equiv_address (m, x);
2077 insn = emit_move_insn (operand0, x);
2079 else
2081 operands[1] = legitimize_pic_address (operand1, mode, temp);
2082 if (REG_P (operand0) && REG_P (operands[1]))
2083 copy_reg_pointer (operand0, operands[1]);
2084 insn = emit_move_insn (operand0, operands[1]);
2087 /* Put a REG_EQUAL note on this insn. */
2088 set_unique_reg_note (insn, REG_EQUAL, operand1);
2090 /* On the HPPA, references to data space are supposed to use dp,
2091 register 27, but showing it in the RTL inhibits various cse
2092 and loop optimizations. */
2093 else
2095 rtx temp, set;
2097 if (reload_in_progress || reload_completed)
2099 temp = scratch_reg ? scratch_reg : operand0;
2100 /* TEMP will hold an address and maybe the actual
2101 data. We want it in WORD_MODE regardless of what mode it
2102 was originally given to us. */
2103 temp = force_mode (word_mode, temp);
2105 else
2106 temp = gen_reg_rtx (mode);
2108 /* Loading a SYMBOL_REF into a register makes that register
2109 safe to be used as the base in an indexed address.
2111 Don't mark hard registers though. That loses. */
2112 if (GET_CODE (operand0) == REG
2113 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
2114 mark_reg_pointer (operand0, BITS_PER_UNIT);
2115 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
2116 mark_reg_pointer (temp, BITS_PER_UNIT);
2118 if (ishighonly)
2119 set = gen_rtx_SET (operand0, temp);
2120 else
2121 set = gen_rtx_SET (operand0,
2122 gen_rtx_LO_SUM (mode, temp, operand1));
2124 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
2125 emit_insn (set);
2128 return 1;
2130 else if (tls_referenced_p (operand1))
2132 rtx tmp = operand1;
2133 rtx addend = NULL;
2135 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
2137 addend = XEXP (XEXP (tmp, 0), 1);
2138 tmp = XEXP (XEXP (tmp, 0), 0);
2141 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
2142 tmp = legitimize_tls_address (tmp);
2143 if (addend)
2145 tmp = gen_rtx_PLUS (mode, tmp, addend);
2146 tmp = force_operand (tmp, operands[0]);
2148 operands[1] = tmp;
2150 else if (GET_CODE (operand1) != CONST_INT
2151 || !pa_cint_ok_for_move (INTVAL (operand1)))
2153 rtx temp;
2154 rtx_insn *insn;
2155 rtx op1 = operand1;
2156 HOST_WIDE_INT value = 0;
2157 HOST_WIDE_INT insv = 0;
2158 int insert = 0;
2160 if (GET_CODE (operand1) == CONST_INT)
2161 value = INTVAL (operand1);
2163 if (TARGET_64BIT
2164 && GET_CODE (operand1) == CONST_INT
2165 && HOST_BITS_PER_WIDE_INT > 32
2166 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
2168 HOST_WIDE_INT nval;
2170 /* Extract the low order 32 bits of the value and sign extend.
2171 If the new value is the same as the original value, we can
2172 can use the original value as-is. If the new value is
2173 different, we use it and insert the most-significant 32-bits
2174 of the original value into the final result. */
2175 nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
2176 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
2177 if (value != nval)
2179 #if HOST_BITS_PER_WIDE_INT > 32
2180 insv = value >= 0 ? value >> 32 : ~(~value >> 32);
2181 #endif
2182 insert = 1;
2183 value = nval;
2184 operand1 = GEN_INT (nval);
2188 if (reload_in_progress || reload_completed)
2189 temp = scratch_reg ? scratch_reg : operand0;
2190 else
2191 temp = gen_reg_rtx (mode);
2193 /* We don't directly split DImode constants on 32-bit targets
2194 because PLUS uses an 11-bit immediate and the insn sequence
2195 generated is not as efficient as the one using HIGH/LO_SUM. */
2196 if (GET_CODE (operand1) == CONST_INT
2197 && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
2198 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
2199 && !insert)
2201 /* Directly break constant into high and low parts. This
2202 provides better optimization opportunities because various
2203 passes recognize constants split with PLUS but not LO_SUM.
2204 We use a 14-bit signed low part except when the addition
2205 of 0x4000 to the high part might change the sign of the
2206 high part. */
2207 HOST_WIDE_INT low = value & 0x3fff;
2208 HOST_WIDE_INT high = value & ~ 0x3fff;
2210 if (low >= 0x2000)
2212 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
2213 high += 0x2000;
2214 else
2215 high += 0x4000;
2218 low = value - high;
2220 emit_insn (gen_rtx_SET (temp, GEN_INT (high)));
2221 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
2223 else
2225 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
2226 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
2229 insn = emit_move_insn (operands[0], operands[1]);
2231 /* Now insert the most significant 32 bits of the value
2232 into the register. When we don't have a second register
2233 available, it could take up to nine instructions to load
2234 a 64-bit integer constant. Prior to reload, we force
2235 constants that would take more than three instructions
2236 to load to the constant pool. During and after reload,
2237 we have to handle all possible values. */
2238 if (insert)
2240 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2241 register and the value to be inserted is outside the
2242 range that can be loaded with three depdi instructions. */
2243 if (temp != operand0 && (insv >= 16384 || insv < -16384))
2245 operand1 = GEN_INT (insv);
2247 emit_insn (gen_rtx_SET (temp,
2248 gen_rtx_HIGH (mode, operand1)));
2249 emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
2250 if (mode == DImode)
2251 insn = emit_insn (gen_insvdi (operand0, GEN_INT (32),
2252 const0_rtx, temp));
2253 else
2254 insn = emit_insn (gen_insvsi (operand0, GEN_INT (32),
2255 const0_rtx, temp));
2257 else
2259 int len = 5, pos = 27;
2261 /* Insert the bits using the depdi instruction. */
2262 while (pos >= 0)
2264 HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2265 HOST_WIDE_INT sign = v5 < 0;
2267 /* Left extend the insertion. */
2268 insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2269 while (pos > 0 && (insv & 1) == sign)
2271 insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2272 len += 1;
2273 pos -= 1;
2276 if (mode == DImode)
2277 insn = emit_insn (gen_insvdi (operand0,
2278 GEN_INT (len),
2279 GEN_INT (pos),
2280 GEN_INT (v5)));
2281 else
2282 insn = emit_insn (gen_insvsi (operand0,
2283 GEN_INT (len),
2284 GEN_INT (pos),
2285 GEN_INT (v5)));
2287 len = pos > 0 && pos < 5 ? pos : 5;
2288 pos -= len;
2293 set_unique_reg_note (insn, REG_EQUAL, op1);
2295 return 1;
2298 /* Now have insn-emit do whatever it normally does. */
2299 return 0;
2302 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2303 it will need a link/runtime reloc). */
2306 pa_reloc_needed (tree exp)
2308 int reloc = 0;
2310 switch (TREE_CODE (exp))
2312 case ADDR_EXPR:
2313 return 1;
2315 case POINTER_PLUS_EXPR:
2316 case PLUS_EXPR:
2317 case MINUS_EXPR:
2318 reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2319 reloc |= pa_reloc_needed (TREE_OPERAND (exp, 1));
2320 break;
2322 CASE_CONVERT:
2323 case NON_LVALUE_EXPR:
2324 reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2325 break;
2327 case CONSTRUCTOR:
2329 tree value;
2330 unsigned HOST_WIDE_INT ix;
2332 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2333 if (value)
2334 reloc |= pa_reloc_needed (value);
2336 break;
2338 case ERROR_MARK:
2339 break;
2341 default:
2342 break;
2344 return reloc;
2348 /* Return the best assembler insn template
2349 for moving operands[1] into operands[0] as a fullword. */
2350 const char *
2351 pa_singlemove_string (rtx *operands)
2353 HOST_WIDE_INT intval;
2355 if (GET_CODE (operands[0]) == MEM)
2356 return "stw %r1,%0";
2357 if (GET_CODE (operands[1]) == MEM)
2358 return "ldw %1,%0";
2359 if (GET_CODE (operands[1]) == CONST_DOUBLE)
2361 long i;
2362 REAL_VALUE_TYPE d;
2364 gcc_assert (GET_MODE (operands[1]) == SFmode);
2366 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2367 bit pattern. */
2368 REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
2369 REAL_VALUE_TO_TARGET_SINGLE (d, i);
2371 operands[1] = GEN_INT (i);
2372 /* Fall through to CONST_INT case. */
2374 if (GET_CODE (operands[1]) == CONST_INT)
2376 intval = INTVAL (operands[1]);
2378 if (VAL_14_BITS_P (intval))
2379 return "ldi %1,%0";
2380 else if ((intval & 0x7ff) == 0)
2381 return "ldil L'%1,%0";
2382 else if (pa_zdepi_cint_p (intval))
2383 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2384 else
2385 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2387 return "copy %1,%0";
2391 /* Compute position (in OP[1]) and width (in OP[2])
2392 useful for copying IMM to a register using the zdepi
2393 instructions. Store the immediate value to insert in OP[0]. */
2394 static void
2395 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2397 int lsb, len;
2399 /* Find the least significant set bit in IMM. */
2400 for (lsb = 0; lsb < 32; lsb++)
2402 if ((imm & 1) != 0)
2403 break;
2404 imm >>= 1;
2407 /* Choose variants based on *sign* of the 5-bit field. */
2408 if ((imm & 0x10) == 0)
2409 len = (lsb <= 28) ? 4 : 32 - lsb;
2410 else
2412 /* Find the width of the bitstring in IMM. */
2413 for (len = 5; len < 32 - lsb; len++)
2415 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2416 break;
2419 /* Sign extend IMM as a 5-bit value. */
2420 imm = (imm & 0xf) - 0x10;
2423 op[0] = imm;
2424 op[1] = 31 - lsb;
2425 op[2] = len;
2428 /* Compute position (in OP[1]) and width (in OP[2])
2429 useful for copying IMM to a register using the depdi,z
2430 instructions. Store the immediate value to insert in OP[0]. */
2432 static void
2433 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2435 int lsb, len, maxlen;
2437 maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64);
2439 /* Find the least significant set bit in IMM. */
2440 for (lsb = 0; lsb < maxlen; lsb++)
2442 if ((imm & 1) != 0)
2443 break;
2444 imm >>= 1;
2447 /* Choose variants based on *sign* of the 5-bit field. */
2448 if ((imm & 0x10) == 0)
2449 len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb;
2450 else
2452 /* Find the width of the bitstring in IMM. */
2453 for (len = 5; len < maxlen - lsb; len++)
2455 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2456 break;
2459 /* Extend length if host is narrow and IMM is negative. */
2460 if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb)
2461 len += 32;
2463 /* Sign extend IMM as a 5-bit value. */
2464 imm = (imm & 0xf) - 0x10;
2467 op[0] = imm;
2468 op[1] = 63 - lsb;
2469 op[2] = len;
2472 /* Output assembler code to perform a doubleword move insn
2473 with operands OPERANDS. */
2475 const char *
2476 pa_output_move_double (rtx *operands)
2478 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2479 rtx latehalf[2];
2480 rtx addreg0 = 0, addreg1 = 0;
2482 /* First classify both operands. */
2484 if (REG_P (operands[0]))
2485 optype0 = REGOP;
2486 else if (offsettable_memref_p (operands[0]))
2487 optype0 = OFFSOP;
2488 else if (GET_CODE (operands[0]) == MEM)
2489 optype0 = MEMOP;
2490 else
2491 optype0 = RNDOP;
2493 if (REG_P (operands[1]))
2494 optype1 = REGOP;
2495 else if (CONSTANT_P (operands[1]))
2496 optype1 = CNSTOP;
2497 else if (offsettable_memref_p (operands[1]))
2498 optype1 = OFFSOP;
2499 else if (GET_CODE (operands[1]) == MEM)
2500 optype1 = MEMOP;
2501 else
2502 optype1 = RNDOP;
2504 /* Check for the cases that the operand constraints are not
2505 supposed to allow to happen. */
2506 gcc_assert (optype0 == REGOP || optype1 == REGOP);
2508 /* Handle copies between general and floating registers. */
2510 if (optype0 == REGOP && optype1 == REGOP
2511 && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2513 if (FP_REG_P (operands[0]))
2515 output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2516 output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2517 return "{fldds|fldd} -16(%%sp),%0";
2519 else
2521 output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2522 output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2523 return "{ldws|ldw} -12(%%sp),%R0";
2527 /* Handle auto decrementing and incrementing loads and stores
2528 specifically, since the structure of the function doesn't work
2529 for them without major modification. Do it better when we learn
2530 this port about the general inc/dec addressing of PA.
2531 (This was written by tege. Chide him if it doesn't work.) */
2533 if (optype0 == MEMOP)
2535 /* We have to output the address syntax ourselves, since print_operand
2536 doesn't deal with the addresses we want to use. Fix this later. */
2538 rtx addr = XEXP (operands[0], 0);
2539 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2541 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2543 operands[0] = XEXP (addr, 0);
2544 gcc_assert (GET_CODE (operands[1]) == REG
2545 && GET_CODE (operands[0]) == REG);
2547 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2549 /* No overlap between high target register and address
2550 register. (We do this in a non-obvious way to
2551 save a register file writeback) */
2552 if (GET_CODE (addr) == POST_INC)
2553 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2554 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2556 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2558 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2560 operands[0] = XEXP (addr, 0);
2561 gcc_assert (GET_CODE (operands[1]) == REG
2562 && GET_CODE (operands[0]) == REG);
2564 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2565 /* No overlap between high target register and address
2566 register. (We do this in a non-obvious way to save a
2567 register file writeback) */
2568 if (GET_CODE (addr) == PRE_INC)
2569 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2570 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2573 if (optype1 == MEMOP)
2575 /* We have to output the address syntax ourselves, since print_operand
2576 doesn't deal with the addresses we want to use. Fix this later. */
2578 rtx addr = XEXP (operands[1], 0);
2579 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2581 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2583 operands[1] = XEXP (addr, 0);
2584 gcc_assert (GET_CODE (operands[0]) == REG
2585 && GET_CODE (operands[1]) == REG);
2587 if (!reg_overlap_mentioned_p (high_reg, addr))
2589 /* No overlap between high target register and address
2590 register. (We do this in a non-obvious way to
2591 save a register file writeback) */
2592 if (GET_CODE (addr) == POST_INC)
2593 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2594 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2596 else
2598 /* This is an undefined situation. We should load into the
2599 address register *and* update that register. Probably
2600 we don't need to handle this at all. */
2601 if (GET_CODE (addr) == POST_INC)
2602 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2603 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2606 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2608 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2610 operands[1] = XEXP (addr, 0);
2611 gcc_assert (GET_CODE (operands[0]) == REG
2612 && GET_CODE (operands[1]) == REG);
2614 if (!reg_overlap_mentioned_p (high_reg, addr))
2616 /* No overlap between high target register and address
2617 register. (We do this in a non-obvious way to
2618 save a register file writeback) */
2619 if (GET_CODE (addr) == PRE_INC)
2620 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2621 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2623 else
2625 /* This is an undefined situation. We should load into the
2626 address register *and* update that register. Probably
2627 we don't need to handle this at all. */
2628 if (GET_CODE (addr) == PRE_INC)
2629 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2630 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2633 else if (GET_CODE (addr) == PLUS
2634 && GET_CODE (XEXP (addr, 0)) == MULT)
2636 rtx xoperands[4];
2638 /* Load address into left half of destination register. */
2639 xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2640 xoperands[1] = XEXP (addr, 1);
2641 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2642 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2643 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2644 xoperands);
2645 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2647 else if (GET_CODE (addr) == PLUS
2648 && REG_P (XEXP (addr, 0))
2649 && REG_P (XEXP (addr, 1)))
2651 rtx xoperands[3];
2653 /* Load address into left half of destination register. */
2654 xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2655 xoperands[1] = XEXP (addr, 0);
2656 xoperands[2] = XEXP (addr, 1);
2657 output_asm_insn ("{addl|add,l} %1,%2,%0",
2658 xoperands);
2659 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2663 /* If an operand is an unoffsettable memory ref, find a register
2664 we can increment temporarily to make it refer to the second word. */
2666 if (optype0 == MEMOP)
2667 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2669 if (optype1 == MEMOP)
2670 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2672 /* Ok, we can do one word at a time.
2673 Normally we do the low-numbered word first.
2675 In either case, set up in LATEHALF the operands to use
2676 for the high-numbered word and in some cases alter the
2677 operands in OPERANDS to be suitable for the low-numbered word. */
2679 if (optype0 == REGOP)
2680 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2681 else if (optype0 == OFFSOP)
2682 latehalf[0] = adjust_address_nv (operands[0], SImode, 4);
2683 else
2684 latehalf[0] = operands[0];
2686 if (optype1 == REGOP)
2687 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2688 else if (optype1 == OFFSOP)
2689 latehalf[1] = adjust_address_nv (operands[1], SImode, 4);
2690 else if (optype1 == CNSTOP)
2691 split_double (operands[1], &operands[1], &latehalf[1]);
2692 else
2693 latehalf[1] = operands[1];
2695 /* If the first move would clobber the source of the second one,
2696 do them in the other order.
2698 This can happen in two cases:
2700 mem -> register where the first half of the destination register
2701 is the same register used in the memory's address. Reload
2702 can create such insns.
2704 mem in this case will be either register indirect or register
2705 indirect plus a valid offset.
2707 register -> register move where REGNO(dst) == REGNO(src + 1)
2708 someone (Tim/Tege?) claimed this can happen for parameter loads.
2710 Handle mem -> register case first. */
2711 if (optype0 == REGOP
2712 && (optype1 == MEMOP || optype1 == OFFSOP)
2713 && refers_to_regno_p (REGNO (operands[0]), operands[1]))
2715 /* Do the late half first. */
2716 if (addreg1)
2717 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2718 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2720 /* Then clobber. */
2721 if (addreg1)
2722 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2723 return pa_singlemove_string (operands);
2726 /* Now handle register -> register case. */
2727 if (optype0 == REGOP && optype1 == REGOP
2728 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2730 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2731 return pa_singlemove_string (operands);
2734 /* Normal case: do the two words, low-numbered first. */
2736 output_asm_insn (pa_singlemove_string (operands), operands);
2738 /* Make any unoffsettable addresses point at high-numbered word. */
2739 if (addreg0)
2740 output_asm_insn ("ldo 4(%0),%0", &addreg0);
2741 if (addreg1)
2742 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2744 /* Do that word. */
2745 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2747 /* Undo the adds we just did. */
2748 if (addreg0)
2749 output_asm_insn ("ldo -4(%0),%0", &addreg0);
2750 if (addreg1)
2751 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2753 return "";
2756 const char *
2757 pa_output_fp_move_double (rtx *operands)
2759 if (FP_REG_P (operands[0]))
2761 if (FP_REG_P (operands[1])
2762 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2763 output_asm_insn ("fcpy,dbl %f1,%0", operands);
2764 else
2765 output_asm_insn ("fldd%F1 %1,%0", operands);
2767 else if (FP_REG_P (operands[1]))
2769 output_asm_insn ("fstd%F0 %1,%0", operands);
2771 else
2773 rtx xoperands[2];
2775 gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2777 /* This is a pain. You have to be prepared to deal with an
2778 arbitrary address here including pre/post increment/decrement.
2780 so avoid this in the MD. */
2781 gcc_assert (GET_CODE (operands[0]) == REG);
2783 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2784 xoperands[0] = operands[0];
2785 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2787 return "";
2790 /* Return a REG that occurs in ADDR with coefficient 1.
2791 ADDR can be effectively incremented by incrementing REG. */
2793 static rtx
2794 find_addr_reg (rtx addr)
2796 while (GET_CODE (addr) == PLUS)
2798 if (GET_CODE (XEXP (addr, 0)) == REG)
2799 addr = XEXP (addr, 0);
2800 else if (GET_CODE (XEXP (addr, 1)) == REG)
2801 addr = XEXP (addr, 1);
2802 else if (CONSTANT_P (XEXP (addr, 0)))
2803 addr = XEXP (addr, 1);
2804 else if (CONSTANT_P (XEXP (addr, 1)))
2805 addr = XEXP (addr, 0);
2806 else
2807 gcc_unreachable ();
2809 gcc_assert (GET_CODE (addr) == REG);
2810 return addr;
2813 /* Emit code to perform a block move.
2815 OPERANDS[0] is the destination pointer as a REG, clobbered.
2816 OPERANDS[1] is the source pointer as a REG, clobbered.
2817 OPERANDS[2] is a register for temporary storage.
2818 OPERANDS[3] is a register for temporary storage.
2819 OPERANDS[4] is the size as a CONST_INT
2820 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2821 OPERANDS[6] is another temporary register. */
2823 const char *
2824 pa_output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2826 int align = INTVAL (operands[5]);
2827 unsigned long n_bytes = INTVAL (operands[4]);
2829 /* We can't move more than a word at a time because the PA
2830 has no longer integer move insns. (Could use fp mem ops?) */
2831 if (align > (TARGET_64BIT ? 8 : 4))
2832 align = (TARGET_64BIT ? 8 : 4);
2834 /* Note that we know each loop below will execute at least twice
2835 (else we would have open-coded the copy). */
2836 switch (align)
2838 case 8:
2839 /* Pre-adjust the loop counter. */
2840 operands[4] = GEN_INT (n_bytes - 16);
2841 output_asm_insn ("ldi %4,%2", operands);
2843 /* Copying loop. */
2844 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2845 output_asm_insn ("ldd,ma 8(%1),%6", operands);
2846 output_asm_insn ("std,ma %3,8(%0)", operands);
2847 output_asm_insn ("addib,>= -16,%2,.-12", operands);
2848 output_asm_insn ("std,ma %6,8(%0)", operands);
2850 /* Handle the residual. There could be up to 7 bytes of
2851 residual to copy! */
2852 if (n_bytes % 16 != 0)
2854 operands[4] = GEN_INT (n_bytes % 8);
2855 if (n_bytes % 16 >= 8)
2856 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2857 if (n_bytes % 8 != 0)
2858 output_asm_insn ("ldd 0(%1),%6", operands);
2859 if (n_bytes % 16 >= 8)
2860 output_asm_insn ("std,ma %3,8(%0)", operands);
2861 if (n_bytes % 8 != 0)
2862 output_asm_insn ("stdby,e %6,%4(%0)", operands);
2864 return "";
2866 case 4:
2867 /* Pre-adjust the loop counter. */
2868 operands[4] = GEN_INT (n_bytes - 8);
2869 output_asm_insn ("ldi %4,%2", operands);
2871 /* Copying loop. */
2872 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2873 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2874 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2875 output_asm_insn ("addib,>= -8,%2,.-12", operands);
2876 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2878 /* Handle the residual. There could be up to 7 bytes of
2879 residual to copy! */
2880 if (n_bytes % 8 != 0)
2882 operands[4] = GEN_INT (n_bytes % 4);
2883 if (n_bytes % 8 >= 4)
2884 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2885 if (n_bytes % 4 != 0)
2886 output_asm_insn ("ldw 0(%1),%6", operands);
2887 if (n_bytes % 8 >= 4)
2888 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2889 if (n_bytes % 4 != 0)
2890 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2892 return "";
2894 case 2:
2895 /* Pre-adjust the loop counter. */
2896 operands[4] = GEN_INT (n_bytes - 4);
2897 output_asm_insn ("ldi %4,%2", operands);
2899 /* Copying loop. */
2900 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2901 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2902 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2903 output_asm_insn ("addib,>= -4,%2,.-12", operands);
2904 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2906 /* Handle the residual. */
2907 if (n_bytes % 4 != 0)
2909 if (n_bytes % 4 >= 2)
2910 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2911 if (n_bytes % 2 != 0)
2912 output_asm_insn ("ldb 0(%1),%6", operands);
2913 if (n_bytes % 4 >= 2)
2914 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2915 if (n_bytes % 2 != 0)
2916 output_asm_insn ("stb %6,0(%0)", operands);
2918 return "";
2920 case 1:
2921 /* Pre-adjust the loop counter. */
2922 operands[4] = GEN_INT (n_bytes - 2);
2923 output_asm_insn ("ldi %4,%2", operands);
2925 /* Copying loop. */
2926 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2927 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2928 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2929 output_asm_insn ("addib,>= -2,%2,.-12", operands);
2930 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2932 /* Handle the residual. */
2933 if (n_bytes % 2 != 0)
2935 output_asm_insn ("ldb 0(%1),%3", operands);
2936 output_asm_insn ("stb %3,0(%0)", operands);
2938 return "";
2940 default:
2941 gcc_unreachable ();
2945 /* Count the number of insns necessary to handle this block move.
2947 Basic structure is the same as emit_block_move, except that we
2948 count insns rather than emit them. */
2950 static int
2951 compute_movmem_length (rtx_insn *insn)
2953 rtx pat = PATTERN (insn);
2954 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2955 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2956 unsigned int n_insns = 0;
2958 /* We can't move more than four bytes at a time because the PA
2959 has no longer integer move insns. (Could use fp mem ops?) */
2960 if (align > (TARGET_64BIT ? 8 : 4))
2961 align = (TARGET_64BIT ? 8 : 4);
2963 /* The basic copying loop. */
2964 n_insns = 6;
2966 /* Residuals. */
2967 if (n_bytes % (2 * align) != 0)
2969 if ((n_bytes % (2 * align)) >= align)
2970 n_insns += 2;
2972 if ((n_bytes % align) != 0)
2973 n_insns += 2;
2976 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2977 return n_insns * 4;
2980 /* Emit code to perform a block clear.
2982 OPERANDS[0] is the destination pointer as a REG, clobbered.
2983 OPERANDS[1] is a register for temporary storage.
2984 OPERANDS[2] is the size as a CONST_INT
2985 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
2987 const char *
2988 pa_output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2990 int align = INTVAL (operands[3]);
2991 unsigned long n_bytes = INTVAL (operands[2]);
2993 /* We can't clear more than a word at a time because the PA
2994 has no longer integer move insns. */
2995 if (align > (TARGET_64BIT ? 8 : 4))
2996 align = (TARGET_64BIT ? 8 : 4);
2998 /* Note that we know each loop below will execute at least twice
2999 (else we would have open-coded the copy). */
3000 switch (align)
3002 case 8:
3003 /* Pre-adjust the loop counter. */
3004 operands[2] = GEN_INT (n_bytes - 16);
3005 output_asm_insn ("ldi %2,%1", operands);
3007 /* Loop. */
3008 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3009 output_asm_insn ("addib,>= -16,%1,.-4", operands);
3010 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3012 /* Handle the residual. There could be up to 7 bytes of
3013 residual to copy! */
3014 if (n_bytes % 16 != 0)
3016 operands[2] = GEN_INT (n_bytes % 8);
3017 if (n_bytes % 16 >= 8)
3018 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3019 if (n_bytes % 8 != 0)
3020 output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
3022 return "";
3024 case 4:
3025 /* Pre-adjust the loop counter. */
3026 operands[2] = GEN_INT (n_bytes - 8);
3027 output_asm_insn ("ldi %2,%1", operands);
3029 /* Loop. */
3030 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3031 output_asm_insn ("addib,>= -8,%1,.-4", operands);
3032 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3034 /* Handle the residual. There could be up to 7 bytes of
3035 residual to copy! */
3036 if (n_bytes % 8 != 0)
3038 operands[2] = GEN_INT (n_bytes % 4);
3039 if (n_bytes % 8 >= 4)
3040 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3041 if (n_bytes % 4 != 0)
3042 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
3044 return "";
3046 case 2:
3047 /* Pre-adjust the loop counter. */
3048 operands[2] = GEN_INT (n_bytes - 4);
3049 output_asm_insn ("ldi %2,%1", operands);
3051 /* Loop. */
3052 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3053 output_asm_insn ("addib,>= -4,%1,.-4", operands);
3054 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3056 /* Handle the residual. */
3057 if (n_bytes % 4 != 0)
3059 if (n_bytes % 4 >= 2)
3060 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3061 if (n_bytes % 2 != 0)
3062 output_asm_insn ("stb %%r0,0(%0)", operands);
3064 return "";
3066 case 1:
3067 /* Pre-adjust the loop counter. */
3068 operands[2] = GEN_INT (n_bytes - 2);
3069 output_asm_insn ("ldi %2,%1", operands);
3071 /* Loop. */
3072 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3073 output_asm_insn ("addib,>= -2,%1,.-4", operands);
3074 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3076 /* Handle the residual. */
3077 if (n_bytes % 2 != 0)
3078 output_asm_insn ("stb %%r0,0(%0)", operands);
3080 return "";
3082 default:
3083 gcc_unreachable ();
3087 /* Count the number of insns necessary to handle this block move.
3089 Basic structure is the same as emit_block_move, except that we
3090 count insns rather than emit them. */
3092 static int
3093 compute_clrmem_length (rtx_insn *insn)
3095 rtx pat = PATTERN (insn);
3096 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
3097 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
3098 unsigned int n_insns = 0;
3100 /* We can't clear more than a word at a time because the PA
3101 has no longer integer move insns. */
3102 if (align > (TARGET_64BIT ? 8 : 4))
3103 align = (TARGET_64BIT ? 8 : 4);
3105 /* The basic loop. */
3106 n_insns = 4;
3108 /* Residuals. */
3109 if (n_bytes % (2 * align) != 0)
3111 if ((n_bytes % (2 * align)) >= align)
3112 n_insns++;
3114 if ((n_bytes % align) != 0)
3115 n_insns++;
3118 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
3119 return n_insns * 4;
3123 const char *
3124 pa_output_and (rtx *operands)
3126 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3128 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3129 int ls0, ls1, ms0, p, len;
3131 for (ls0 = 0; ls0 < 32; ls0++)
3132 if ((mask & (1 << ls0)) == 0)
3133 break;
3135 for (ls1 = ls0; ls1 < 32; ls1++)
3136 if ((mask & (1 << ls1)) != 0)
3137 break;
3139 for (ms0 = ls1; ms0 < 32; ms0++)
3140 if ((mask & (1 << ms0)) == 0)
3141 break;
3143 gcc_assert (ms0 == 32);
3145 if (ls1 == 32)
3147 len = ls0;
3149 gcc_assert (len);
3151 operands[2] = GEN_INT (len);
3152 return "{extru|extrw,u} %1,31,%2,%0";
3154 else
3156 /* We could use this `depi' for the case above as well, but `depi'
3157 requires one more register file access than an `extru'. */
3159 p = 31 - ls0;
3160 len = ls1 - ls0;
3162 operands[2] = GEN_INT (p);
3163 operands[3] = GEN_INT (len);
3164 return "{depi|depwi} 0,%2,%3,%0";
3167 else
3168 return "and %1,%2,%0";
3171 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3172 storing the result in operands[0]. */
3173 const char *
3174 pa_output_64bit_and (rtx *operands)
3176 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3178 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3179 int ls0, ls1, ms0, p, len;
3181 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
3182 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
3183 break;
3185 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
3186 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
3187 break;
3189 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
3190 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
3191 break;
3193 gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
3195 if (ls1 == HOST_BITS_PER_WIDE_INT)
3197 len = ls0;
3199 gcc_assert (len);
3201 operands[2] = GEN_INT (len);
3202 return "extrd,u %1,63,%2,%0";
3204 else
3206 /* We could use this `depi' for the case above as well, but `depi'
3207 requires one more register file access than an `extru'. */
3209 p = 63 - ls0;
3210 len = ls1 - ls0;
3212 operands[2] = GEN_INT (p);
3213 operands[3] = GEN_INT (len);
3214 return "depdi 0,%2,%3,%0";
3217 else
3218 return "and %1,%2,%0";
3221 const char *
3222 pa_output_ior (rtx *operands)
3224 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3225 int bs0, bs1, p, len;
3227 if (INTVAL (operands[2]) == 0)
3228 return "copy %1,%0";
3230 for (bs0 = 0; bs0 < 32; bs0++)
3231 if ((mask & (1 << bs0)) != 0)
3232 break;
3234 for (bs1 = bs0; bs1 < 32; bs1++)
3235 if ((mask & (1 << bs1)) == 0)
3236 break;
3238 gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3240 p = 31 - bs0;
3241 len = bs1 - bs0;
3243 operands[2] = GEN_INT (p);
3244 operands[3] = GEN_INT (len);
3245 return "{depi|depwi} -1,%2,%3,%0";
3248 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3249 storing the result in operands[0]. */
3250 const char *
3251 pa_output_64bit_ior (rtx *operands)
3253 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3254 int bs0, bs1, p, len;
3256 if (INTVAL (operands[2]) == 0)
3257 return "copy %1,%0";
3259 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
3260 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
3261 break;
3263 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
3264 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
3265 break;
3267 gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
3268 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3270 p = 63 - bs0;
3271 len = bs1 - bs0;
3273 operands[2] = GEN_INT (p);
3274 operands[3] = GEN_INT (len);
3275 return "depdi -1,%2,%3,%0";
3278 /* Target hook for assembling integer objects. This code handles
3279 aligned SI and DI integers specially since function references
3280 must be preceded by P%. */
3282 static bool
3283 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
3285 if (size == UNITS_PER_WORD
3286 && aligned_p
3287 && function_label_operand (x, VOIDmode))
3289 fputs (size == 8? "\t.dword\t" : "\t.word\t", asm_out_file);
3291 /* We don't want an OPD when generating fast indirect calls. */
3292 if (!TARGET_FAST_INDIRECT_CALLS)
3293 fputs ("P%", asm_out_file);
3295 output_addr_const (asm_out_file, x);
3296 fputc ('\n', asm_out_file);
3297 return true;
3299 return default_assemble_integer (x, size, aligned_p);
3302 /* Output an ascii string. */
3303 void
3304 pa_output_ascii (FILE *file, const char *p, int size)
3306 int i;
3307 int chars_output;
3308 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
3310 /* The HP assembler can only take strings of 256 characters at one
3311 time. This is a limitation on input line length, *not* the
3312 length of the string. Sigh. Even worse, it seems that the
3313 restriction is in number of input characters (see \xnn &
3314 \whatever). So we have to do this very carefully. */
3316 fputs ("\t.STRING \"", file);
3318 chars_output = 0;
3319 for (i = 0; i < size; i += 4)
3321 int co = 0;
3322 int io = 0;
3323 for (io = 0, co = 0; io < MIN (4, size - i); io++)
3325 register unsigned int c = (unsigned char) p[i + io];
3327 if (c == '\"' || c == '\\')
3328 partial_output[co++] = '\\';
3329 if (c >= ' ' && c < 0177)
3330 partial_output[co++] = c;
3331 else
3333 unsigned int hexd;
3334 partial_output[co++] = '\\';
3335 partial_output[co++] = 'x';
3336 hexd = c / 16 - 0 + '0';
3337 if (hexd > '9')
3338 hexd -= '9' - 'a' + 1;
3339 partial_output[co++] = hexd;
3340 hexd = c % 16 - 0 + '0';
3341 if (hexd > '9')
3342 hexd -= '9' - 'a' + 1;
3343 partial_output[co++] = hexd;
3346 if (chars_output + co > 243)
3348 fputs ("\"\n\t.STRING \"", file);
3349 chars_output = 0;
3351 fwrite (partial_output, 1, (size_t) co, file);
3352 chars_output += co;
3353 co = 0;
3355 fputs ("\"\n", file);
3358 /* Try to rewrite floating point comparisons & branches to avoid
3359 useless add,tr insns.
3361 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3362 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3363 first attempt to remove useless add,tr insns. It is zero
3364 for the second pass as reorg sometimes leaves bogus REG_DEAD
3365 notes lying around.
3367 When CHECK_NOTES is zero we can only eliminate add,tr insns
3368 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3369 instructions. */
3370 static void
3371 remove_useless_addtr_insns (int check_notes)
3373 rtx_insn *insn;
3374 static int pass = 0;
3376 /* This is fairly cheap, so always run it when optimizing. */
3377 if (optimize > 0)
3379 int fcmp_count = 0;
3380 int fbranch_count = 0;
3382 /* Walk all the insns in this function looking for fcmp & fbranch
3383 instructions. Keep track of how many of each we find. */
3384 for (insn = get_insns (); insn; insn = next_insn (insn))
3386 rtx tmp;
3388 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
3389 if (! NONJUMP_INSN_P (insn) && ! JUMP_P (insn))
3390 continue;
3392 tmp = PATTERN (insn);
3394 /* It must be a set. */
3395 if (GET_CODE (tmp) != SET)
3396 continue;
3398 /* If the destination is CCFP, then we've found an fcmp insn. */
3399 tmp = SET_DEST (tmp);
3400 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3402 fcmp_count++;
3403 continue;
3406 tmp = PATTERN (insn);
3407 /* If this is an fbranch instruction, bump the fbranch counter. */
3408 if (GET_CODE (tmp) == SET
3409 && SET_DEST (tmp) == pc_rtx
3410 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3411 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3412 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3413 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3415 fbranch_count++;
3416 continue;
3421 /* Find all floating point compare + branch insns. If possible,
3422 reverse the comparison & the branch to avoid add,tr insns. */
3423 for (insn = get_insns (); insn; insn = next_insn (insn))
3425 rtx tmp;
3426 rtx_insn *next;
3428 /* Ignore anything that isn't an INSN. */
3429 if (! NONJUMP_INSN_P (insn))
3430 continue;
3432 tmp = PATTERN (insn);
3434 /* It must be a set. */
3435 if (GET_CODE (tmp) != SET)
3436 continue;
3438 /* The destination must be CCFP, which is register zero. */
3439 tmp = SET_DEST (tmp);
3440 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3441 continue;
3443 /* INSN should be a set of CCFP.
3445 See if the result of this insn is used in a reversed FP
3446 conditional branch. If so, reverse our condition and
3447 the branch. Doing so avoids useless add,tr insns. */
3448 next = next_insn (insn);
3449 while (next)
3451 /* Jumps, calls and labels stop our search. */
3452 if (JUMP_P (next) || CALL_P (next) || LABEL_P (next))
3453 break;
3455 /* As does another fcmp insn. */
3456 if (NONJUMP_INSN_P (next)
3457 && GET_CODE (PATTERN (next)) == SET
3458 && GET_CODE (SET_DEST (PATTERN (next))) == REG
3459 && REGNO (SET_DEST (PATTERN (next))) == 0)
3460 break;
3462 next = next_insn (next);
3465 /* Is NEXT_INSN a branch? */
3466 if (next && JUMP_P (next))
3468 rtx pattern = PATTERN (next);
3470 /* If it a reversed fp conditional branch (e.g. uses add,tr)
3471 and CCFP dies, then reverse our conditional and the branch
3472 to avoid the add,tr. */
3473 if (GET_CODE (pattern) == SET
3474 && SET_DEST (pattern) == pc_rtx
3475 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3476 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3477 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3478 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3479 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3480 && (fcmp_count == fbranch_count
3481 || (check_notes
3482 && find_regno_note (next, REG_DEAD, 0))))
3484 /* Reverse the branch. */
3485 tmp = XEXP (SET_SRC (pattern), 1);
3486 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3487 XEXP (SET_SRC (pattern), 2) = tmp;
3488 INSN_CODE (next) = -1;
3490 /* Reverse our condition. */
3491 tmp = PATTERN (insn);
3492 PUT_CODE (XEXP (tmp, 1),
3493 (reverse_condition_maybe_unordered
3494 (GET_CODE (XEXP (tmp, 1)))));
3500 pass = !pass;
3504 /* You may have trouble believing this, but this is the 32 bit HP-PA
3505 stack layout. Wow.
3507 Offset Contents
3509 Variable arguments (optional; any number may be allocated)
3511 SP-(4*(N+9)) arg word N
3513 SP-56 arg word 5
3514 SP-52 arg word 4
3516 Fixed arguments (must be allocated; may remain unused)
3518 SP-48 arg word 3
3519 SP-44 arg word 2
3520 SP-40 arg word 1
3521 SP-36 arg word 0
3523 Frame Marker
3525 SP-32 External Data Pointer (DP)
3526 SP-28 External sr4
3527 SP-24 External/stub RP (RP')
3528 SP-20 Current RP
3529 SP-16 Static Link
3530 SP-12 Clean up
3531 SP-8 Calling Stub RP (RP'')
3532 SP-4 Previous SP
3534 Top of Frame
3536 SP-0 Stack Pointer (points to next available address)
3540 /* This function saves registers as follows. Registers marked with ' are
3541 this function's registers (as opposed to the previous function's).
3542 If a frame_pointer isn't needed, r4 is saved as a general register;
3543 the space for the frame pointer is still allocated, though, to keep
3544 things simple.
3547 Top of Frame
3549 SP (FP') Previous FP
3550 SP + 4 Alignment filler (sigh)
3551 SP + 8 Space for locals reserved here.
3555 SP + n All call saved register used.
3559 SP + o All call saved fp registers used.
3563 SP + p (SP') points to next available address.
3567 /* Global variables set by output_function_prologue(). */
3568 /* Size of frame. Need to know this to emit return insns from
3569 leaf procedures. */
3570 static HOST_WIDE_INT actual_fsize, local_fsize;
3571 static int save_fregs;
3573 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3574 Handle case where DISP > 8k by using the add_high_const patterns.
3576 Note in DISP > 8k case, we will leave the high part of the address
3577 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3579 static void
3580 store_reg (int reg, HOST_WIDE_INT disp, int base)
3582 rtx dest, src, basereg;
3583 rtx_insn *insn;
3585 src = gen_rtx_REG (word_mode, reg);
3586 basereg = gen_rtx_REG (Pmode, base);
3587 if (VAL_14_BITS_P (disp))
3589 dest = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
3590 insn = emit_move_insn (dest, src);
3592 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3594 rtx delta = GEN_INT (disp);
3595 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3597 emit_move_insn (tmpreg, delta);
3598 insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3599 if (DO_FRAME_NOTES)
3601 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3602 gen_rtx_SET (tmpreg,
3603 gen_rtx_PLUS (Pmode, basereg, delta)));
3604 RTX_FRAME_RELATED_P (insn) = 1;
3606 dest = gen_rtx_MEM (word_mode, tmpreg);
3607 insn = emit_move_insn (dest, src);
3609 else
3611 rtx delta = GEN_INT (disp);
3612 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3613 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3615 emit_move_insn (tmpreg, high);
3616 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3617 insn = emit_move_insn (dest, src);
3618 if (DO_FRAME_NOTES)
3619 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3620 gen_rtx_SET (gen_rtx_MEM (word_mode,
3621 gen_rtx_PLUS (word_mode,
3622 basereg,
3623 delta)),
3624 src));
3627 if (DO_FRAME_NOTES)
3628 RTX_FRAME_RELATED_P (insn) = 1;
3631 /* Emit RTL to store REG at the memory location specified by BASE and then
3632 add MOD to BASE. MOD must be <= 8k. */
3634 static void
3635 store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3637 rtx basereg, srcreg, delta;
3638 rtx_insn *insn;
3640 gcc_assert (VAL_14_BITS_P (mod));
3642 basereg = gen_rtx_REG (Pmode, base);
3643 srcreg = gen_rtx_REG (word_mode, reg);
3644 delta = GEN_INT (mod);
3646 insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3647 if (DO_FRAME_NOTES)
3649 RTX_FRAME_RELATED_P (insn) = 1;
3651 /* RTX_FRAME_RELATED_P must be set on each frame related set
3652 in a parallel with more than one element. */
3653 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3654 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3658 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3659 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3660 whether to add a frame note or not.
3662 In the DISP > 8k case, we leave the high part of the address in %r1.
3663 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3665 static void
3666 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3668 rtx_insn *insn;
3670 if (VAL_14_BITS_P (disp))
3672 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3673 plus_constant (Pmode,
3674 gen_rtx_REG (Pmode, base), disp));
3676 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3678 rtx basereg = gen_rtx_REG (Pmode, base);
3679 rtx delta = GEN_INT (disp);
3680 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3682 emit_move_insn (tmpreg, delta);
3683 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3684 gen_rtx_PLUS (Pmode, tmpreg, basereg));
3685 if (DO_FRAME_NOTES)
3686 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3687 gen_rtx_SET (tmpreg,
3688 gen_rtx_PLUS (Pmode, basereg, delta)));
3690 else
3692 rtx basereg = gen_rtx_REG (Pmode, base);
3693 rtx delta = GEN_INT (disp);
3694 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3696 emit_move_insn (tmpreg,
3697 gen_rtx_PLUS (Pmode, basereg,
3698 gen_rtx_HIGH (Pmode, delta)));
3699 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3700 gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3703 if (DO_FRAME_NOTES && note)
3704 RTX_FRAME_RELATED_P (insn) = 1;
3707 HOST_WIDE_INT
3708 pa_compute_frame_size (HOST_WIDE_INT size, int *fregs_live)
3710 int freg_saved = 0;
3711 int i, j;
3713 /* The code in pa_expand_prologue and pa_expand_epilogue must
3714 be consistent with the rounding and size calculation done here.
3715 Change them at the same time. */
3717 /* We do our own stack alignment. First, round the size of the
3718 stack locals up to a word boundary. */
3719 size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3721 /* Space for previous frame pointer + filler. If any frame is
3722 allocated, we need to add in the STARTING_FRAME_OFFSET. We
3723 waste some space here for the sake of HP compatibility. The
3724 first slot is only used when the frame pointer is needed. */
3725 if (size || frame_pointer_needed)
3726 size += STARTING_FRAME_OFFSET;
3728 /* If the current function calls __builtin_eh_return, then we need
3729 to allocate stack space for registers that will hold data for
3730 the exception handler. */
3731 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3733 unsigned int i;
3735 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3736 continue;
3737 size += i * UNITS_PER_WORD;
3740 /* Account for space used by the callee general register saves. */
3741 for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3742 if (df_regs_ever_live_p (i))
3743 size += UNITS_PER_WORD;
3745 /* Account for space used by the callee floating point register saves. */
3746 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3747 if (df_regs_ever_live_p (i)
3748 || (!TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3750 freg_saved = 1;
3752 /* We always save both halves of the FP register, so always
3753 increment the frame size by 8 bytes. */
3754 size += 8;
3757 /* If any of the floating registers are saved, account for the
3758 alignment needed for the floating point register save block. */
3759 if (freg_saved)
3761 size = (size + 7) & ~7;
3762 if (fregs_live)
3763 *fregs_live = 1;
3766 /* The various ABIs include space for the outgoing parameters in the
3767 size of the current function's stack frame. We don't need to align
3768 for the outgoing arguments as their alignment is set by the final
3769 rounding for the frame as a whole. */
3770 size += crtl->outgoing_args_size;
3772 /* Allocate space for the fixed frame marker. This space must be
3773 allocated for any function that makes calls or allocates
3774 stack space. */
3775 if (!crtl->is_leaf || size)
3776 size += TARGET_64BIT ? 48 : 32;
3778 /* Finally, round to the preferred stack boundary. */
3779 return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3780 & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3783 /* Generate the assembly code for function entry. FILE is a stdio
3784 stream to output the code to. SIZE is an int: how many units of
3785 temporary storage to allocate.
3787 Refer to the array `regs_ever_live' to determine which registers to
3788 save; `regs_ever_live[I]' is nonzero if register number I is ever
3789 used in the function. This function is responsible for knowing
3790 which registers should not be saved even if used. */
3792 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3793 of memory. If any fpu reg is used in the function, we allocate
3794 such a block here, at the bottom of the frame, just in case it's needed.
3796 If this function is a leaf procedure, then we may choose not
3797 to do a "save" insn. The decision about whether or not
3798 to do this is made in regclass.c. */
3800 static void
3801 pa_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3803 /* The function's label and associated .PROC must never be
3804 separated and must be output *after* any profiling declarations
3805 to avoid changing spaces/subspaces within a procedure. */
3806 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3807 fputs ("\t.PROC\n", file);
3809 /* pa_expand_prologue does the dirty work now. We just need
3810 to output the assembler directives which denote the start
3811 of a function. */
3812 fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
3813 if (crtl->is_leaf)
3814 fputs (",NO_CALLS", file);
3815 else
3816 fputs (",CALLS", file);
3817 if (rp_saved)
3818 fputs (",SAVE_RP", file);
3820 /* The SAVE_SP flag is used to indicate that register %r3 is stored
3821 at the beginning of the frame and that it is used as the frame
3822 pointer for the frame. We do this because our current frame
3823 layout doesn't conform to that specified in the HP runtime
3824 documentation and we need a way to indicate to programs such as
3825 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
3826 isn't used by HP compilers but is supported by the assembler.
3827 However, SAVE_SP is supposed to indicate that the previous stack
3828 pointer has been saved in the frame marker. */
3829 if (frame_pointer_needed)
3830 fputs (",SAVE_SP", file);
3832 /* Pass on information about the number of callee register saves
3833 performed in the prologue.
3835 The compiler is supposed to pass the highest register number
3836 saved, the assembler then has to adjust that number before
3837 entering it into the unwind descriptor (to account for any
3838 caller saved registers with lower register numbers than the
3839 first callee saved register). */
3840 if (gr_saved)
3841 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3843 if (fr_saved)
3844 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3846 fputs ("\n\t.ENTRY\n", file);
3848 remove_useless_addtr_insns (0);
3851 void
3852 pa_expand_prologue (void)
3854 int merge_sp_adjust_with_store = 0;
3855 HOST_WIDE_INT size = get_frame_size ();
3856 HOST_WIDE_INT offset;
3857 int i;
3858 rtx tmpreg;
3859 rtx_insn *insn;
3861 gr_saved = 0;
3862 fr_saved = 0;
3863 save_fregs = 0;
3865 /* Compute total size for frame pointer, filler, locals and rounding to
3866 the next word boundary. Similar code appears in pa_compute_frame_size
3867 and must be changed in tandem with this code. */
3868 local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3869 if (local_fsize || frame_pointer_needed)
3870 local_fsize += STARTING_FRAME_OFFSET;
3872 actual_fsize = pa_compute_frame_size (size, &save_fregs);
3873 if (flag_stack_usage_info)
3874 current_function_static_stack_size = actual_fsize;
3876 /* Compute a few things we will use often. */
3877 tmpreg = gen_rtx_REG (word_mode, 1);
3879 /* Save RP first. The calling conventions manual states RP will
3880 always be stored into the caller's frame at sp - 20 or sp - 16
3881 depending on which ABI is in use. */
3882 if (df_regs_ever_live_p (2) || crtl->calls_eh_return)
3884 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3885 rp_saved = true;
3887 else
3888 rp_saved = false;
3890 /* Allocate the local frame and set up the frame pointer if needed. */
3891 if (actual_fsize != 0)
3893 if (frame_pointer_needed)
3895 /* Copy the old frame pointer temporarily into %r1. Set up the
3896 new stack pointer, then store away the saved old frame pointer
3897 into the stack at sp and at the same time update the stack
3898 pointer by actual_fsize bytes. Two versions, first
3899 handles small (<8k) frames. The second handles large (>=8k)
3900 frames. */
3901 insn = emit_move_insn (tmpreg, hard_frame_pointer_rtx);
3902 if (DO_FRAME_NOTES)
3903 RTX_FRAME_RELATED_P (insn) = 1;
3905 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3906 if (DO_FRAME_NOTES)
3907 RTX_FRAME_RELATED_P (insn) = 1;
3909 if (VAL_14_BITS_P (actual_fsize))
3910 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
3911 else
3913 /* It is incorrect to store the saved frame pointer at *sp,
3914 then increment sp (writes beyond the current stack boundary).
3916 So instead use stwm to store at *sp and post-increment the
3917 stack pointer as an atomic operation. Then increment sp to
3918 finish allocating the new frame. */
3919 HOST_WIDE_INT adjust1 = 8192 - 64;
3920 HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
3922 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
3923 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3924 adjust2, 1);
3927 /* We set SAVE_SP in frames that need a frame pointer. Thus,
3928 we need to store the previous stack pointer (frame pointer)
3929 into the frame marker on targets that use the HP unwind
3930 library. This allows the HP unwind library to be used to
3931 unwind GCC frames. However, we are not fully compatible
3932 with the HP library because our frame layout differs from
3933 that specified in the HP runtime specification.
3935 We don't want a frame note on this instruction as the frame
3936 marker moves during dynamic stack allocation.
3938 This instruction also serves as a blockage to prevent
3939 register spills from being scheduled before the stack
3940 pointer is raised. This is necessary as we store
3941 registers using the frame pointer as a base register,
3942 and the frame pointer is set before sp is raised. */
3943 if (TARGET_HPUX_UNWIND_LIBRARY)
3945 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
3946 GEN_INT (TARGET_64BIT ? -8 : -4));
3948 emit_move_insn (gen_rtx_MEM (word_mode, addr),
3949 hard_frame_pointer_rtx);
3951 else
3952 emit_insn (gen_blockage ());
3954 /* no frame pointer needed. */
3955 else
3957 /* In some cases we can perform the first callee register save
3958 and allocating the stack frame at the same time. If so, just
3959 make a note of it and defer allocating the frame until saving
3960 the callee registers. */
3961 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
3962 merge_sp_adjust_with_store = 1;
3963 /* Can not optimize. Adjust the stack frame by actual_fsize
3964 bytes. */
3965 else
3966 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3967 actual_fsize, 1);
3971 /* Normal register save.
3973 Do not save the frame pointer in the frame_pointer_needed case. It
3974 was done earlier. */
3975 if (frame_pointer_needed)
3977 offset = local_fsize;
3979 /* Saving the EH return data registers in the frame is the simplest
3980 way to get the frame unwind information emitted. We put them
3981 just before the general registers. */
3982 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3984 unsigned int i, regno;
3986 for (i = 0; ; ++i)
3988 regno = EH_RETURN_DATA_REGNO (i);
3989 if (regno == INVALID_REGNUM)
3990 break;
3992 store_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
3993 offset += UNITS_PER_WORD;
3997 for (i = 18; i >= 4; i--)
3998 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4000 store_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4001 offset += UNITS_PER_WORD;
4002 gr_saved++;
4004 /* Account for %r3 which is saved in a special place. */
4005 gr_saved++;
4007 /* No frame pointer needed. */
4008 else
4010 offset = local_fsize - actual_fsize;
4012 /* Saving the EH return data registers in the frame is the simplest
4013 way to get the frame unwind information emitted. */
4014 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4016 unsigned int i, regno;
4018 for (i = 0; ; ++i)
4020 regno = EH_RETURN_DATA_REGNO (i);
4021 if (regno == INVALID_REGNUM)
4022 break;
4024 /* If merge_sp_adjust_with_store is nonzero, then we can
4025 optimize the first save. */
4026 if (merge_sp_adjust_with_store)
4028 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
4029 merge_sp_adjust_with_store = 0;
4031 else
4032 store_reg (regno, offset, STACK_POINTER_REGNUM);
4033 offset += UNITS_PER_WORD;
4037 for (i = 18; i >= 3; i--)
4038 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4040 /* If merge_sp_adjust_with_store is nonzero, then we can
4041 optimize the first GR save. */
4042 if (merge_sp_adjust_with_store)
4044 store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
4045 merge_sp_adjust_with_store = 0;
4047 else
4048 store_reg (i, offset, STACK_POINTER_REGNUM);
4049 offset += UNITS_PER_WORD;
4050 gr_saved++;
4053 /* If we wanted to merge the SP adjustment with a GR save, but we never
4054 did any GR saves, then just emit the adjustment here. */
4055 if (merge_sp_adjust_with_store)
4056 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4057 actual_fsize, 1);
4060 /* The hppa calling conventions say that %r19, the pic offset
4061 register, is saved at sp - 32 (in this function's frame)
4062 when generating PIC code. FIXME: What is the correct thing
4063 to do for functions which make no calls and allocate no
4064 frame? Do we need to allocate a frame, or can we just omit
4065 the save? For now we'll just omit the save.
4067 We don't want a note on this insn as the frame marker can
4068 move if there is a dynamic stack allocation. */
4069 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
4071 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
4073 emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
4077 /* Align pointer properly (doubleword boundary). */
4078 offset = (offset + 7) & ~7;
4080 /* Floating point register store. */
4081 if (save_fregs)
4083 rtx base;
4085 /* First get the frame or stack pointer to the start of the FP register
4086 save area. */
4087 if (frame_pointer_needed)
4089 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4090 base = hard_frame_pointer_rtx;
4092 else
4094 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4095 base = stack_pointer_rtx;
4098 /* Now actually save the FP registers. */
4099 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4101 if (df_regs_ever_live_p (i)
4102 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4104 rtx addr, reg;
4105 rtx_insn *insn;
4106 addr = gen_rtx_MEM (DFmode,
4107 gen_rtx_POST_INC (word_mode, tmpreg));
4108 reg = gen_rtx_REG (DFmode, i);
4109 insn = emit_move_insn (addr, reg);
4110 if (DO_FRAME_NOTES)
4112 RTX_FRAME_RELATED_P (insn) = 1;
4113 if (TARGET_64BIT)
4115 rtx mem = gen_rtx_MEM (DFmode,
4116 plus_constant (Pmode, base,
4117 offset));
4118 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4119 gen_rtx_SET (mem, reg));
4121 else
4123 rtx meml = gen_rtx_MEM (SFmode,
4124 plus_constant (Pmode, base,
4125 offset));
4126 rtx memr = gen_rtx_MEM (SFmode,
4127 plus_constant (Pmode, base,
4128 offset + 4));
4129 rtx regl = gen_rtx_REG (SFmode, i);
4130 rtx regr = gen_rtx_REG (SFmode, i + 1);
4131 rtx setl = gen_rtx_SET (meml, regl);
4132 rtx setr = gen_rtx_SET (memr, regr);
4133 rtvec vec;
4135 RTX_FRAME_RELATED_P (setl) = 1;
4136 RTX_FRAME_RELATED_P (setr) = 1;
4137 vec = gen_rtvec (2, setl, setr);
4138 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4139 gen_rtx_SEQUENCE (VOIDmode, vec));
4142 offset += GET_MODE_SIZE (DFmode);
4143 fr_saved++;
4149 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
4150 Handle case where DISP > 8k by using the add_high_const patterns. */
4152 static void
4153 load_reg (int reg, HOST_WIDE_INT disp, int base)
4155 rtx dest = gen_rtx_REG (word_mode, reg);
4156 rtx basereg = gen_rtx_REG (Pmode, base);
4157 rtx src;
4159 if (VAL_14_BITS_P (disp))
4160 src = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
4161 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
4163 rtx delta = GEN_INT (disp);
4164 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4166 emit_move_insn (tmpreg, delta);
4167 if (TARGET_DISABLE_INDEXING)
4169 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4170 src = gen_rtx_MEM (word_mode, tmpreg);
4172 else
4173 src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4175 else
4177 rtx delta = GEN_INT (disp);
4178 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
4179 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4181 emit_move_insn (tmpreg, high);
4182 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
4185 emit_move_insn (dest, src);
4188 /* Update the total code bytes output to the text section. */
4190 static void
4191 update_total_code_bytes (unsigned int nbytes)
4193 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
4194 && !IN_NAMED_SECTION_P (cfun->decl))
4196 unsigned int old_total = total_code_bytes;
4198 total_code_bytes += nbytes;
4200 /* Be prepared to handle overflows. */
4201 if (old_total > total_code_bytes)
4202 total_code_bytes = UINT_MAX;
4206 /* This function generates the assembly code for function exit.
4207 Args are as for output_function_prologue ().
4209 The function epilogue should not depend on the current stack
4210 pointer! It should use the frame pointer only. This is mandatory
4211 because of alloca; we also take advantage of it to omit stack
4212 adjustments before returning. */
4214 static void
4215 pa_output_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4217 rtx_insn *insn = get_last_insn ();
4218 bool extra_nop;
4220 /* pa_expand_epilogue does the dirty work now. We just need
4221 to output the assembler directives which denote the end
4222 of a function.
4224 To make debuggers happy, emit a nop if the epilogue was completely
4225 eliminated due to a volatile call as the last insn in the
4226 current function. That way the return address (in %r2) will
4227 always point to a valid instruction in the current function. */
4229 /* Get the last real insn. */
4230 if (NOTE_P (insn))
4231 insn = prev_real_insn (insn);
4233 /* If it is a sequence, then look inside. */
4234 if (insn && NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE)
4235 insn = as_a <rtx_sequence *> (PATTERN (insn))-> insn (0);
4237 /* If insn is a CALL_INSN, then it must be a call to a volatile
4238 function (otherwise there would be epilogue insns). */
4239 if (insn && CALL_P (insn))
4241 fputs ("\tnop\n", file);
4242 extra_nop = true;
4244 else
4245 extra_nop = false;
4247 fputs ("\t.EXIT\n\t.PROCEND\n", file);
4249 if (TARGET_SOM && TARGET_GAS)
4251 /* We are done with this subspace except possibly for some additional
4252 debug information. Forget that we are in this subspace to ensure
4253 that the next function is output in its own subspace. */
4254 in_section = NULL;
4255 cfun->machine->in_nsubspa = 2;
4258 /* Thunks do their own insn accounting. */
4259 if (cfun->is_thunk)
4260 return;
4262 if (INSN_ADDRESSES_SET_P ())
4264 last_address = extra_nop ? 4 : 0;
4265 insn = get_last_nonnote_insn ();
4266 if (insn)
4268 last_address += INSN_ADDRESSES (INSN_UID (insn));
4269 if (INSN_P (insn))
4270 last_address += insn_default_length (insn);
4272 last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4273 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
4275 else
4276 last_address = UINT_MAX;
4278 /* Finally, update the total number of code bytes output so far. */
4279 update_total_code_bytes (last_address);
4282 void
4283 pa_expand_epilogue (void)
4285 rtx tmpreg;
4286 HOST_WIDE_INT offset;
4287 HOST_WIDE_INT ret_off = 0;
4288 int i;
4289 int merge_sp_adjust_with_load = 0;
4291 /* We will use this often. */
4292 tmpreg = gen_rtx_REG (word_mode, 1);
4294 /* Try to restore RP early to avoid load/use interlocks when
4295 RP gets used in the return (bv) instruction. This appears to still
4296 be necessary even when we schedule the prologue and epilogue. */
4297 if (rp_saved)
4299 ret_off = TARGET_64BIT ? -16 : -20;
4300 if (frame_pointer_needed)
4302 load_reg (2, ret_off, HARD_FRAME_POINTER_REGNUM);
4303 ret_off = 0;
4305 else
4307 /* No frame pointer, and stack is smaller than 8k. */
4308 if (VAL_14_BITS_P (ret_off - actual_fsize))
4310 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4311 ret_off = 0;
4316 /* General register restores. */
4317 if (frame_pointer_needed)
4319 offset = local_fsize;
4321 /* If the current function calls __builtin_eh_return, then we need
4322 to restore the saved EH data registers. */
4323 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4325 unsigned int i, regno;
4327 for (i = 0; ; ++i)
4329 regno = EH_RETURN_DATA_REGNO (i);
4330 if (regno == INVALID_REGNUM)
4331 break;
4333 load_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4334 offset += UNITS_PER_WORD;
4338 for (i = 18; i >= 4; i--)
4339 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4341 load_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4342 offset += UNITS_PER_WORD;
4345 else
4347 offset = local_fsize - actual_fsize;
4349 /* If the current function calls __builtin_eh_return, then we need
4350 to restore the saved EH data registers. */
4351 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4353 unsigned int i, regno;
4355 for (i = 0; ; ++i)
4357 regno = EH_RETURN_DATA_REGNO (i);
4358 if (regno == INVALID_REGNUM)
4359 break;
4361 /* Only for the first load.
4362 merge_sp_adjust_with_load holds the register load
4363 with which we will merge the sp adjustment. */
4364 if (merge_sp_adjust_with_load == 0
4365 && local_fsize == 0
4366 && VAL_14_BITS_P (-actual_fsize))
4367 merge_sp_adjust_with_load = regno;
4368 else
4369 load_reg (regno, offset, STACK_POINTER_REGNUM);
4370 offset += UNITS_PER_WORD;
4374 for (i = 18; i >= 3; i--)
4376 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4378 /* Only for the first load.
4379 merge_sp_adjust_with_load holds the register load
4380 with which we will merge the sp adjustment. */
4381 if (merge_sp_adjust_with_load == 0
4382 && local_fsize == 0
4383 && VAL_14_BITS_P (-actual_fsize))
4384 merge_sp_adjust_with_load = i;
4385 else
4386 load_reg (i, offset, STACK_POINTER_REGNUM);
4387 offset += UNITS_PER_WORD;
4392 /* Align pointer properly (doubleword boundary). */
4393 offset = (offset + 7) & ~7;
4395 /* FP register restores. */
4396 if (save_fregs)
4398 /* Adjust the register to index off of. */
4399 if (frame_pointer_needed)
4400 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4401 else
4402 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4404 /* Actually do the restores now. */
4405 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4406 if (df_regs_ever_live_p (i)
4407 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4409 rtx src = gen_rtx_MEM (DFmode,
4410 gen_rtx_POST_INC (word_mode, tmpreg));
4411 rtx dest = gen_rtx_REG (DFmode, i);
4412 emit_move_insn (dest, src);
4416 /* Emit a blockage insn here to keep these insns from being moved to
4417 an earlier spot in the epilogue, or into the main instruction stream.
4419 This is necessary as we must not cut the stack back before all the
4420 restores are finished. */
4421 emit_insn (gen_blockage ());
4423 /* Reset stack pointer (and possibly frame pointer). The stack
4424 pointer is initially set to fp + 64 to avoid a race condition. */
4425 if (frame_pointer_needed)
4427 rtx delta = GEN_INT (-64);
4429 set_reg_plus_d (STACK_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM, 64, 0);
4430 emit_insn (gen_pre_load (hard_frame_pointer_rtx,
4431 stack_pointer_rtx, delta));
4433 /* If we were deferring a callee register restore, do it now. */
4434 else if (merge_sp_adjust_with_load)
4436 rtx delta = GEN_INT (-actual_fsize);
4437 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4439 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4441 else if (actual_fsize != 0)
4442 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4443 - actual_fsize, 0);
4445 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4446 frame greater than 8k), do so now. */
4447 if (ret_off != 0)
4448 load_reg (2, ret_off, STACK_POINTER_REGNUM);
4450 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4452 rtx sa = EH_RETURN_STACKADJ_RTX;
4454 emit_insn (gen_blockage ());
4455 emit_insn (TARGET_64BIT
4456 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4457 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4461 bool
4462 pa_can_use_return_insn (void)
4464 if (!reload_completed)
4465 return false;
4467 if (frame_pointer_needed)
4468 return false;
4470 if (df_regs_ever_live_p (2))
4471 return false;
4473 if (crtl->profile)
4474 return false;
4476 return pa_compute_frame_size (get_frame_size (), 0) == 0;
4480 hppa_pic_save_rtx (void)
4482 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4485 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4486 #define NO_DEFERRED_PROFILE_COUNTERS 0
4487 #endif
4490 /* Vector of funcdef numbers. */
4491 static vec<int> funcdef_nos;
4493 /* Output deferred profile counters. */
4494 static void
4495 output_deferred_profile_counters (void)
4497 unsigned int i;
4498 int align, n;
4500 if (funcdef_nos.is_empty ())
4501 return;
4503 switch_to_section (data_section);
4504 align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4505 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4507 for (i = 0; funcdef_nos.iterate (i, &n); i++)
4509 targetm.asm_out.internal_label (asm_out_file, "LP", n);
4510 assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4513 funcdef_nos.release ();
4516 void
4517 hppa_profile_hook (int label_no)
4519 /* We use SImode for the address of the function in both 32 and
4520 64-bit code to avoid having to provide DImode versions of the
4521 lcla2 and load_offset_label_address insn patterns. */
4522 rtx reg = gen_reg_rtx (SImode);
4523 rtx_code_label *label_rtx = gen_label_rtx ();
4524 rtx begin_label_rtx;
4525 rtx_insn *call_insn;
4526 char begin_label_name[16];
4528 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4529 label_no);
4530 begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4532 if (TARGET_64BIT)
4533 emit_move_insn (arg_pointer_rtx,
4534 gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
4535 GEN_INT (64)));
4537 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4539 /* The address of the function is loaded into %r25 with an instruction-
4540 relative sequence that avoids the use of relocations. The sequence
4541 is split so that the load_offset_label_address instruction can
4542 occupy the delay slot of the call to _mcount. */
4543 if (TARGET_PA_20)
4544 emit_insn (gen_lcla2 (reg, label_rtx));
4545 else
4546 emit_insn (gen_lcla1 (reg, label_rtx));
4548 emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25),
4549 reg, begin_label_rtx, label_rtx));
4551 #if !NO_DEFERRED_PROFILE_COUNTERS
4553 rtx count_label_rtx, addr, r24;
4554 char count_label_name[16];
4556 funcdef_nos.safe_push (label_no);
4557 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4558 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name));
4560 addr = force_reg (Pmode, count_label_rtx);
4561 r24 = gen_rtx_REG (Pmode, 24);
4562 emit_move_insn (r24, addr);
4564 call_insn =
4565 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4566 gen_rtx_SYMBOL_REF (Pmode,
4567 "_mcount")),
4568 GEN_INT (TARGET_64BIT ? 24 : 12)));
4570 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4572 #else
4574 call_insn =
4575 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4576 gen_rtx_SYMBOL_REF (Pmode,
4577 "_mcount")),
4578 GEN_INT (TARGET_64BIT ? 16 : 8)));
4580 #endif
4582 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4583 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4585 /* Indicate the _mcount call cannot throw, nor will it execute a
4586 non-local goto. */
4587 make_reg_eh_region_note_nothrow_nononlocal (call_insn);
4590 /* Fetch the return address for the frame COUNT steps up from
4591 the current frame, after the prologue. FRAMEADDR is the
4592 frame pointer of the COUNT frame.
4594 We want to ignore any export stub remnants here. To handle this,
4595 we examine the code at the return address, and if it is an export
4596 stub, we return a memory rtx for the stub return address stored
4597 at frame-24.
4599 The value returned is used in two different ways:
4601 1. To find a function's caller.
4603 2. To change the return address for a function.
4605 This function handles most instances of case 1; however, it will
4606 fail if there are two levels of stubs to execute on the return
4607 path. The only way I believe that can happen is if the return value
4608 needs a parameter relocation, which never happens for C code.
4610 This function handles most instances of case 2; however, it will
4611 fail if we did not originally have stub code on the return path
4612 but will need stub code on the new return path. This can happen if
4613 the caller & callee are both in the main program, but the new
4614 return location is in a shared library. */
4617 pa_return_addr_rtx (int count, rtx frameaddr)
4619 rtx label;
4620 rtx rp;
4621 rtx saved_rp;
4622 rtx ins;
4624 /* The instruction stream at the return address of a PA1.X export stub is:
4626 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4627 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4628 0x00011820 | stub+16: mtsp r1,sr0
4629 0xe0400002 | stub+20: be,n 0(sr0,rp)
4631 0xe0400002 must be specified as -532676606 so that it won't be
4632 rejected as an invalid immediate operand on 64-bit hosts.
4634 The instruction stream at the return address of a PA2.0 export stub is:
4636 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4637 0xe840d002 | stub+12: bve,n (rp)
4640 HOST_WIDE_INT insns[4];
4641 int i, len;
4643 if (count != 0)
4644 return NULL_RTX;
4646 rp = get_hard_reg_initial_val (Pmode, 2);
4648 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4649 return rp;
4651 /* If there is no export stub then just use the value saved from
4652 the return pointer register. */
4654 saved_rp = gen_reg_rtx (Pmode);
4655 emit_move_insn (saved_rp, rp);
4657 /* Get pointer to the instruction stream. We have to mask out the
4658 privilege level from the two low order bits of the return address
4659 pointer here so that ins will point to the start of the first
4660 instruction that would have been executed if we returned. */
4661 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4662 label = gen_label_rtx ();
4664 if (TARGET_PA_20)
4666 insns[0] = 0x4bc23fd1;
4667 insns[1] = -398405630;
4668 len = 2;
4670 else
4672 insns[0] = 0x4bc23fd1;
4673 insns[1] = 0x004010a1;
4674 insns[2] = 0x00011820;
4675 insns[3] = -532676606;
4676 len = 4;
4679 /* Check the instruction stream at the normal return address for the
4680 export stub. If it is an export stub, than our return address is
4681 really in -24[frameaddr]. */
4683 for (i = 0; i < len; i++)
4685 rtx op0 = gen_rtx_MEM (SImode, plus_constant (Pmode, ins, i * 4));
4686 rtx op1 = GEN_INT (insns[i]);
4687 emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label);
4690 /* Here we know that our return address points to an export
4691 stub. We don't want to return the address of the export stub,
4692 but rather the return address of the export stub. That return
4693 address is stored at -24[frameaddr]. */
4695 emit_move_insn (saved_rp,
4696 gen_rtx_MEM (Pmode,
4697 memory_address (Pmode,
4698 plus_constant (Pmode, frameaddr,
4699 -24))));
4701 emit_label (label);
4703 return saved_rp;
4706 void
4707 pa_emit_bcond_fp (rtx operands[])
4709 enum rtx_code code = GET_CODE (operands[0]);
4710 rtx operand0 = operands[1];
4711 rtx operand1 = operands[2];
4712 rtx label = operands[3];
4714 emit_insn (gen_rtx_SET (gen_rtx_REG (CCFPmode, 0),
4715 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1)));
4717 emit_jump_insn (gen_rtx_SET (pc_rtx,
4718 gen_rtx_IF_THEN_ELSE (VOIDmode,
4719 gen_rtx_fmt_ee (NE,
4720 VOIDmode,
4721 gen_rtx_REG (CCFPmode, 0),
4722 const0_rtx),
4723 gen_rtx_LABEL_REF (VOIDmode, label),
4724 pc_rtx)));
4728 /* Adjust the cost of a scheduling dependency. Return the new cost of
4729 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4731 static int
4732 pa_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
4734 enum attr_type attr_type;
4736 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4737 true dependencies as they are described with bypasses now. */
4738 if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0)
4739 return cost;
4741 if (! recog_memoized (insn))
4742 return 0;
4744 attr_type = get_attr_type (insn);
4746 switch (REG_NOTE_KIND (link))
4748 case REG_DEP_ANTI:
4749 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4750 cycles later. */
4752 if (attr_type == TYPE_FPLOAD)
4754 rtx pat = PATTERN (insn);
4755 rtx dep_pat = PATTERN (dep_insn);
4756 if (GET_CODE (pat) == PARALLEL)
4758 /* This happens for the fldXs,mb patterns. */
4759 pat = XVECEXP (pat, 0, 0);
4761 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4762 /* If this happens, we have to extend this to schedule
4763 optimally. Return 0 for now. */
4764 return 0;
4766 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4768 if (! recog_memoized (dep_insn))
4769 return 0;
4770 switch (get_attr_type (dep_insn))
4772 case TYPE_FPALU:
4773 case TYPE_FPMULSGL:
4774 case TYPE_FPMULDBL:
4775 case TYPE_FPDIVSGL:
4776 case TYPE_FPDIVDBL:
4777 case TYPE_FPSQRTSGL:
4778 case TYPE_FPSQRTDBL:
4779 /* A fpload can't be issued until one cycle before a
4780 preceding arithmetic operation has finished if
4781 the target of the fpload is any of the sources
4782 (or destination) of the arithmetic operation. */
4783 return insn_default_latency (dep_insn) - 1;
4785 default:
4786 return 0;
4790 else if (attr_type == TYPE_FPALU)
4792 rtx pat = PATTERN (insn);
4793 rtx dep_pat = PATTERN (dep_insn);
4794 if (GET_CODE (pat) == PARALLEL)
4796 /* This happens for the fldXs,mb patterns. */
4797 pat = XVECEXP (pat, 0, 0);
4799 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4800 /* If this happens, we have to extend this to schedule
4801 optimally. Return 0 for now. */
4802 return 0;
4804 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4806 if (! recog_memoized (dep_insn))
4807 return 0;
4808 switch (get_attr_type (dep_insn))
4810 case TYPE_FPDIVSGL:
4811 case TYPE_FPDIVDBL:
4812 case TYPE_FPSQRTSGL:
4813 case TYPE_FPSQRTDBL:
4814 /* An ALU flop can't be issued until two cycles before a
4815 preceding divide or sqrt operation has finished if
4816 the target of the ALU flop is any of the sources
4817 (or destination) of the divide or sqrt operation. */
4818 return insn_default_latency (dep_insn) - 2;
4820 default:
4821 return 0;
4826 /* For other anti dependencies, the cost is 0. */
4827 return 0;
4829 case REG_DEP_OUTPUT:
4830 /* Output dependency; DEP_INSN writes a register that INSN writes some
4831 cycles later. */
4832 if (attr_type == TYPE_FPLOAD)
4834 rtx pat = PATTERN (insn);
4835 rtx dep_pat = PATTERN (dep_insn);
4836 if (GET_CODE (pat) == PARALLEL)
4838 /* This happens for the fldXs,mb patterns. */
4839 pat = XVECEXP (pat, 0, 0);
4841 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4842 /* If this happens, we have to extend this to schedule
4843 optimally. Return 0 for now. */
4844 return 0;
4846 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4848 if (! recog_memoized (dep_insn))
4849 return 0;
4850 switch (get_attr_type (dep_insn))
4852 case TYPE_FPALU:
4853 case TYPE_FPMULSGL:
4854 case TYPE_FPMULDBL:
4855 case TYPE_FPDIVSGL:
4856 case TYPE_FPDIVDBL:
4857 case TYPE_FPSQRTSGL:
4858 case TYPE_FPSQRTDBL:
4859 /* A fpload can't be issued until one cycle before a
4860 preceding arithmetic operation has finished if
4861 the target of the fpload is the destination of the
4862 arithmetic operation.
4864 Exception: For PA7100LC, PA7200 and PA7300, the cost
4865 is 3 cycles, unless they bundle together. We also
4866 pay the penalty if the second insn is a fpload. */
4867 return insn_default_latency (dep_insn) - 1;
4869 default:
4870 return 0;
4874 else if (attr_type == TYPE_FPALU)
4876 rtx pat = PATTERN (insn);
4877 rtx dep_pat = PATTERN (dep_insn);
4878 if (GET_CODE (pat) == PARALLEL)
4880 /* This happens for the fldXs,mb patterns. */
4881 pat = XVECEXP (pat, 0, 0);
4883 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4884 /* If this happens, we have to extend this to schedule
4885 optimally. Return 0 for now. */
4886 return 0;
4888 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4890 if (! recog_memoized (dep_insn))
4891 return 0;
4892 switch (get_attr_type (dep_insn))
4894 case TYPE_FPDIVSGL:
4895 case TYPE_FPDIVDBL:
4896 case TYPE_FPSQRTSGL:
4897 case TYPE_FPSQRTDBL:
4898 /* An ALU flop can't be issued until two cycles before a
4899 preceding divide or sqrt operation has finished if
4900 the target of the ALU flop is also the target of
4901 the divide or sqrt operation. */
4902 return insn_default_latency (dep_insn) - 2;
4904 default:
4905 return 0;
4910 /* For other output dependencies, the cost is 0. */
4911 return 0;
4913 default:
4914 gcc_unreachable ();
4918 /* Adjust scheduling priorities. We use this to try and keep addil
4919 and the next use of %r1 close together. */
4920 static int
4921 pa_adjust_priority (rtx_insn *insn, int priority)
4923 rtx set = single_set (insn);
4924 rtx src, dest;
4925 if (set)
4927 src = SET_SRC (set);
4928 dest = SET_DEST (set);
4929 if (GET_CODE (src) == LO_SUM
4930 && symbolic_operand (XEXP (src, 1), VOIDmode)
4931 && ! read_only_operand (XEXP (src, 1), VOIDmode))
4932 priority >>= 3;
4934 else if (GET_CODE (src) == MEM
4935 && GET_CODE (XEXP (src, 0)) == LO_SUM
4936 && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
4937 && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
4938 priority >>= 1;
4940 else if (GET_CODE (dest) == MEM
4941 && GET_CODE (XEXP (dest, 0)) == LO_SUM
4942 && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
4943 && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
4944 priority >>= 3;
4946 return priority;
4949 /* The 700 can only issue a single insn at a time.
4950 The 7XXX processors can issue two insns at a time.
4951 The 8000 can issue 4 insns at a time. */
4952 static int
4953 pa_issue_rate (void)
4955 switch (pa_cpu)
4957 case PROCESSOR_700: return 1;
4958 case PROCESSOR_7100: return 2;
4959 case PROCESSOR_7100LC: return 2;
4960 case PROCESSOR_7200: return 2;
4961 case PROCESSOR_7300: return 2;
4962 case PROCESSOR_8000: return 4;
4964 default:
4965 gcc_unreachable ();
4971 /* Return any length plus adjustment needed by INSN which already has
4972 its length computed as LENGTH. Return LENGTH if no adjustment is
4973 necessary.
4975 Also compute the length of an inline block move here as it is too
4976 complicated to express as a length attribute in pa.md. */
4978 pa_adjust_insn_length (rtx_insn *insn, int length)
4980 rtx pat = PATTERN (insn);
4982 /* If length is negative or undefined, provide initial length. */
4983 if ((unsigned int) length >= INT_MAX)
4985 if (GET_CODE (pat) == SEQUENCE)
4986 insn = as_a <rtx_insn *> (XVECEXP (pat, 0, 0));
4988 switch (get_attr_type (insn))
4990 case TYPE_MILLI:
4991 length = pa_attr_length_millicode_call (insn);
4992 break;
4993 case TYPE_CALL:
4994 length = pa_attr_length_call (insn, 0);
4995 break;
4996 case TYPE_SIBCALL:
4997 length = pa_attr_length_call (insn, 1);
4998 break;
4999 case TYPE_DYNCALL:
5000 length = pa_attr_length_indirect_call (insn);
5001 break;
5002 case TYPE_SH_FUNC_ADRS:
5003 length = pa_attr_length_millicode_call (insn) + 20;
5004 break;
5005 default:
5006 gcc_unreachable ();
5010 /* Block move pattern. */
5011 if (NONJUMP_INSN_P (insn)
5012 && GET_CODE (pat) == PARALLEL
5013 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5014 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5015 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
5016 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
5017 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
5018 length += compute_movmem_length (insn) - 4;
5019 /* Block clear pattern. */
5020 else if (NONJUMP_INSN_P (insn)
5021 && GET_CODE (pat) == PARALLEL
5022 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5023 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5024 && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
5025 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
5026 length += compute_clrmem_length (insn) - 4;
5027 /* Conditional branch with an unfilled delay slot. */
5028 else if (JUMP_P (insn) && ! simplejump_p (insn))
5030 /* Adjust a short backwards conditional with an unfilled delay slot. */
5031 if (GET_CODE (pat) == SET
5032 && length == 4
5033 && JUMP_LABEL (insn) != NULL_RTX
5034 && ! forward_branch_p (insn))
5035 length += 4;
5036 else if (GET_CODE (pat) == PARALLEL
5037 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
5038 && length == 4)
5039 length += 4;
5040 /* Adjust dbra insn with short backwards conditional branch with
5041 unfilled delay slot -- only for case where counter is in a
5042 general register register. */
5043 else if (GET_CODE (pat) == PARALLEL
5044 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
5045 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
5046 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
5047 && length == 4
5048 && ! forward_branch_p (insn))
5049 length += 4;
5051 return length;
5054 /* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook. */
5056 static bool
5057 pa_print_operand_punct_valid_p (unsigned char code)
5059 if (code == '@'
5060 || code == '#'
5061 || code == '*'
5062 || code == '^')
5063 return true;
5065 return false;
5068 /* Print operand X (an rtx) in assembler syntax to file FILE.
5069 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
5070 For `%' followed by punctuation, CODE is the punctuation and X is null. */
5072 void
5073 pa_print_operand (FILE *file, rtx x, int code)
5075 switch (code)
5077 case '#':
5078 /* Output a 'nop' if there's nothing for the delay slot. */
5079 if (dbr_sequence_length () == 0)
5080 fputs ("\n\tnop", file);
5081 return;
5082 case '*':
5083 /* Output a nullification completer if there's nothing for the */
5084 /* delay slot or nullification is requested. */
5085 if (dbr_sequence_length () == 0 ||
5086 (final_sequence &&
5087 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
5088 fputs (",n", file);
5089 return;
5090 case 'R':
5091 /* Print out the second register name of a register pair.
5092 I.e., R (6) => 7. */
5093 fputs (reg_names[REGNO (x) + 1], file);
5094 return;
5095 case 'r':
5096 /* A register or zero. */
5097 if (x == const0_rtx
5098 || (x == CONST0_RTX (DFmode))
5099 || (x == CONST0_RTX (SFmode)))
5101 fputs ("%r0", file);
5102 return;
5104 else
5105 break;
5106 case 'f':
5107 /* A register or zero (floating point). */
5108 if (x == const0_rtx
5109 || (x == CONST0_RTX (DFmode))
5110 || (x == CONST0_RTX (SFmode)))
5112 fputs ("%fr0", file);
5113 return;
5115 else
5116 break;
5117 case 'A':
5119 rtx xoperands[2];
5121 xoperands[0] = XEXP (XEXP (x, 0), 0);
5122 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
5123 pa_output_global_address (file, xoperands[1], 0);
5124 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
5125 return;
5128 case 'C': /* Plain (C)ondition */
5129 case 'X':
5130 switch (GET_CODE (x))
5132 case EQ:
5133 fputs ("=", file); break;
5134 case NE:
5135 fputs ("<>", file); break;
5136 case GT:
5137 fputs (">", file); break;
5138 case GE:
5139 fputs (">=", file); break;
5140 case GEU:
5141 fputs (">>=", file); break;
5142 case GTU:
5143 fputs (">>", file); break;
5144 case LT:
5145 fputs ("<", file); break;
5146 case LE:
5147 fputs ("<=", file); break;
5148 case LEU:
5149 fputs ("<<=", file); break;
5150 case LTU:
5151 fputs ("<<", file); break;
5152 default:
5153 gcc_unreachable ();
5155 return;
5156 case 'N': /* Condition, (N)egated */
5157 switch (GET_CODE (x))
5159 case EQ:
5160 fputs ("<>", file); break;
5161 case NE:
5162 fputs ("=", file); break;
5163 case GT:
5164 fputs ("<=", file); break;
5165 case GE:
5166 fputs ("<", file); break;
5167 case GEU:
5168 fputs ("<<", file); break;
5169 case GTU:
5170 fputs ("<<=", file); break;
5171 case LT:
5172 fputs (">=", file); break;
5173 case LE:
5174 fputs (">", file); break;
5175 case LEU:
5176 fputs (">>", file); break;
5177 case LTU:
5178 fputs (">>=", file); break;
5179 default:
5180 gcc_unreachable ();
5182 return;
5183 /* For floating point comparisons. Note that the output
5184 predicates are the complement of the desired mode. The
5185 conditions for GT, GE, LT, LE and LTGT cause an invalid
5186 operation exception if the result is unordered and this
5187 exception is enabled in the floating-point status register. */
5188 case 'Y':
5189 switch (GET_CODE (x))
5191 case EQ:
5192 fputs ("!=", file); break;
5193 case NE:
5194 fputs ("=", file); break;
5195 case GT:
5196 fputs ("!>", file); break;
5197 case GE:
5198 fputs ("!>=", file); break;
5199 case LT:
5200 fputs ("!<", file); break;
5201 case LE:
5202 fputs ("!<=", file); break;
5203 case LTGT:
5204 fputs ("!<>", file); break;
5205 case UNLE:
5206 fputs ("!?<=", file); break;
5207 case UNLT:
5208 fputs ("!?<", file); break;
5209 case UNGE:
5210 fputs ("!?>=", file); break;
5211 case UNGT:
5212 fputs ("!?>", file); break;
5213 case UNEQ:
5214 fputs ("!?=", file); break;
5215 case UNORDERED:
5216 fputs ("!?", file); break;
5217 case ORDERED:
5218 fputs ("?", file); break;
5219 default:
5220 gcc_unreachable ();
5222 return;
5223 case 'S': /* Condition, operands are (S)wapped. */
5224 switch (GET_CODE (x))
5226 case EQ:
5227 fputs ("=", file); break;
5228 case NE:
5229 fputs ("<>", file); break;
5230 case GT:
5231 fputs ("<", file); break;
5232 case GE:
5233 fputs ("<=", file); break;
5234 case GEU:
5235 fputs ("<<=", file); break;
5236 case GTU:
5237 fputs ("<<", file); break;
5238 case LT:
5239 fputs (">", file); break;
5240 case LE:
5241 fputs (">=", file); break;
5242 case LEU:
5243 fputs (">>=", file); break;
5244 case LTU:
5245 fputs (">>", file); break;
5246 default:
5247 gcc_unreachable ();
5249 return;
5250 case 'B': /* Condition, (B)oth swapped and negate. */
5251 switch (GET_CODE (x))
5253 case EQ:
5254 fputs ("<>", file); break;
5255 case NE:
5256 fputs ("=", file); break;
5257 case GT:
5258 fputs (">=", file); break;
5259 case GE:
5260 fputs (">", file); break;
5261 case GEU:
5262 fputs (">>", file); break;
5263 case GTU:
5264 fputs (">>=", file); break;
5265 case LT:
5266 fputs ("<=", file); break;
5267 case LE:
5268 fputs ("<", file); break;
5269 case LEU:
5270 fputs ("<<", file); break;
5271 case LTU:
5272 fputs ("<<=", file); break;
5273 default:
5274 gcc_unreachable ();
5276 return;
5277 case 'k':
5278 gcc_assert (GET_CODE (x) == CONST_INT);
5279 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
5280 return;
5281 case 'Q':
5282 gcc_assert (GET_CODE (x) == CONST_INT);
5283 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
5284 return;
5285 case 'L':
5286 gcc_assert (GET_CODE (x) == CONST_INT);
5287 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
5288 return;
5289 case 'o':
5290 gcc_assert (GET_CODE (x) == CONST_INT
5291 && (INTVAL (x) == 1 || INTVAL (x) == 2 || INTVAL (x) == 3));
5292 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5293 return;
5294 case 'O':
5295 gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
5296 fprintf (file, "%d", exact_log2 (INTVAL (x)));
5297 return;
5298 case 'p':
5299 gcc_assert (GET_CODE (x) == CONST_INT);
5300 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
5301 return;
5302 case 'P':
5303 gcc_assert (GET_CODE (x) == CONST_INT);
5304 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
5305 return;
5306 case 'I':
5307 if (GET_CODE (x) == CONST_INT)
5308 fputs ("i", file);
5309 return;
5310 case 'M':
5311 case 'F':
5312 switch (GET_CODE (XEXP (x, 0)))
5314 case PRE_DEC:
5315 case PRE_INC:
5316 if (ASSEMBLER_DIALECT == 0)
5317 fputs ("s,mb", file);
5318 else
5319 fputs (",mb", file);
5320 break;
5321 case POST_DEC:
5322 case POST_INC:
5323 if (ASSEMBLER_DIALECT == 0)
5324 fputs ("s,ma", file);
5325 else
5326 fputs (",ma", file);
5327 break;
5328 case PLUS:
5329 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5330 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5332 if (ASSEMBLER_DIALECT == 0)
5333 fputs ("x", file);
5335 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5336 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5338 if (ASSEMBLER_DIALECT == 0)
5339 fputs ("x,s", file);
5340 else
5341 fputs (",s", file);
5343 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
5344 fputs ("s", file);
5345 break;
5346 default:
5347 if (code == 'F' && ASSEMBLER_DIALECT == 0)
5348 fputs ("s", file);
5349 break;
5351 return;
5352 case 'G':
5353 pa_output_global_address (file, x, 0);
5354 return;
5355 case 'H':
5356 pa_output_global_address (file, x, 1);
5357 return;
5358 case 0: /* Don't do anything special */
5359 break;
5360 case 'Z':
5362 unsigned op[3];
5363 compute_zdepwi_operands (INTVAL (x), op);
5364 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5365 return;
5367 case 'z':
5369 unsigned op[3];
5370 compute_zdepdi_operands (INTVAL (x), op);
5371 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5372 return;
5374 case 'c':
5375 /* We can get here from a .vtable_inherit due to our
5376 CONSTANT_ADDRESS_P rejecting perfectly good constant
5377 addresses. */
5378 break;
5379 default:
5380 gcc_unreachable ();
5382 if (GET_CODE (x) == REG)
5384 fputs (reg_names [REGNO (x)], file);
5385 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5387 fputs ("R", file);
5388 return;
5390 if (FP_REG_P (x)
5391 && GET_MODE_SIZE (GET_MODE (x)) <= 4
5392 && (REGNO (x) & 1) == 0)
5393 fputs ("L", file);
5395 else if (GET_CODE (x) == MEM)
5397 int size = GET_MODE_SIZE (GET_MODE (x));
5398 rtx base = NULL_RTX;
5399 switch (GET_CODE (XEXP (x, 0)))
5401 case PRE_DEC:
5402 case POST_DEC:
5403 base = XEXP (XEXP (x, 0), 0);
5404 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5405 break;
5406 case PRE_INC:
5407 case POST_INC:
5408 base = XEXP (XEXP (x, 0), 0);
5409 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5410 break;
5411 case PLUS:
5412 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5413 fprintf (file, "%s(%s)",
5414 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5415 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5416 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5417 fprintf (file, "%s(%s)",
5418 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5419 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5420 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5421 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5423 /* Because the REG_POINTER flag can get lost during reload,
5424 pa_legitimate_address_p canonicalizes the order of the
5425 index and base registers in the combined move patterns. */
5426 rtx base = XEXP (XEXP (x, 0), 1);
5427 rtx index = XEXP (XEXP (x, 0), 0);
5429 fprintf (file, "%s(%s)",
5430 reg_names [REGNO (index)], reg_names [REGNO (base)]);
5432 else
5433 output_address (XEXP (x, 0));
5434 break;
5435 default:
5436 output_address (XEXP (x, 0));
5437 break;
5440 else
5441 output_addr_const (file, x);
5444 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
5446 void
5447 pa_output_global_address (FILE *file, rtx x, int round_constant)
5450 /* Imagine (high (const (plus ...))). */
5451 if (GET_CODE (x) == HIGH)
5452 x = XEXP (x, 0);
5454 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5455 output_addr_const (file, x);
5456 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5458 output_addr_const (file, x);
5459 fputs ("-$global$", file);
5461 else if (GET_CODE (x) == CONST)
5463 const char *sep = "";
5464 int offset = 0; /* assembler wants -$global$ at end */
5465 rtx base = NULL_RTX;
5467 switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
5469 case SYMBOL_REF:
5470 base = XEXP (XEXP (x, 0), 0);
5471 output_addr_const (file, base);
5472 break;
5473 case CONST_INT:
5474 offset = INTVAL (XEXP (XEXP (x, 0), 0));
5475 break;
5476 default:
5477 gcc_unreachable ();
5480 switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
5482 case SYMBOL_REF:
5483 base = XEXP (XEXP (x, 0), 1);
5484 output_addr_const (file, base);
5485 break;
5486 case CONST_INT:
5487 offset = INTVAL (XEXP (XEXP (x, 0), 1));
5488 break;
5489 default:
5490 gcc_unreachable ();
5493 /* How bogus. The compiler is apparently responsible for
5494 rounding the constant if it uses an LR field selector.
5496 The linker and/or assembler seem a better place since
5497 they have to do this kind of thing already.
5499 If we fail to do this, HP's optimizing linker may eliminate
5500 an addil, but not update the ldw/stw/ldo instruction that
5501 uses the result of the addil. */
5502 if (round_constant)
5503 offset = ((offset + 0x1000) & ~0x1fff);
5505 switch (GET_CODE (XEXP (x, 0)))
5507 case PLUS:
5508 if (offset < 0)
5510 offset = -offset;
5511 sep = "-";
5513 else
5514 sep = "+";
5515 break;
5517 case MINUS:
5518 gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5519 sep = "-";
5520 break;
5522 default:
5523 gcc_unreachable ();
5526 if (!read_only_operand (base, VOIDmode) && !flag_pic)
5527 fputs ("-$global$", file);
5528 if (offset)
5529 fprintf (file, "%s%d", sep, offset);
5531 else
5532 output_addr_const (file, x);
5535 /* Output boilerplate text to appear at the beginning of the file.
5536 There are several possible versions. */
5537 #define aputs(x) fputs(x, asm_out_file)
5538 static inline void
5539 pa_file_start_level (void)
5541 if (TARGET_64BIT)
5542 aputs ("\t.LEVEL 2.0w\n");
5543 else if (TARGET_PA_20)
5544 aputs ("\t.LEVEL 2.0\n");
5545 else if (TARGET_PA_11)
5546 aputs ("\t.LEVEL 1.1\n");
5547 else
5548 aputs ("\t.LEVEL 1.0\n");
5551 static inline void
5552 pa_file_start_space (int sortspace)
5554 aputs ("\t.SPACE $PRIVATE$");
5555 if (sortspace)
5556 aputs (",SORT=16");
5557 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31");
5558 if (flag_tm)
5559 aputs ("\n\t.SUBSPA $TM_CLONE_TABLE$,QUAD=1,ALIGN=8,ACCESS=31");
5560 aputs ("\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5561 "\n\t.SPACE $TEXT$");
5562 if (sortspace)
5563 aputs (",SORT=8");
5564 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5565 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5568 static inline void
5569 pa_file_start_file (int want_version)
5571 if (write_symbols != NO_DEBUG)
5573 output_file_directive (asm_out_file, main_input_filename);
5574 if (want_version)
5575 aputs ("\t.version\t\"01.01\"\n");
5579 static inline void
5580 pa_file_start_mcount (const char *aswhat)
5582 if (profile_flag)
5583 fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5586 static void
5587 pa_elf_file_start (void)
5589 pa_file_start_level ();
5590 pa_file_start_mcount ("ENTRY");
5591 pa_file_start_file (0);
5594 static void
5595 pa_som_file_start (void)
5597 pa_file_start_level ();
5598 pa_file_start_space (0);
5599 aputs ("\t.IMPORT $global$,DATA\n"
5600 "\t.IMPORT $$dyncall,MILLICODE\n");
5601 pa_file_start_mcount ("CODE");
5602 pa_file_start_file (0);
5605 static void
5606 pa_linux_file_start (void)
5608 pa_file_start_file (1);
5609 pa_file_start_level ();
5610 pa_file_start_mcount ("CODE");
5613 static void
5614 pa_hpux64_gas_file_start (void)
5616 pa_file_start_level ();
5617 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5618 if (profile_flag)
5619 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5620 #endif
5621 pa_file_start_file (1);
5624 static void
5625 pa_hpux64_hpas_file_start (void)
5627 pa_file_start_level ();
5628 pa_file_start_space (1);
5629 pa_file_start_mcount ("CODE");
5630 pa_file_start_file (0);
5632 #undef aputs
5634 /* Search the deferred plabel list for SYMBOL and return its internal
5635 label. If an entry for SYMBOL is not found, a new entry is created. */
5638 pa_get_deferred_plabel (rtx symbol)
5640 const char *fname = XSTR (symbol, 0);
5641 size_t i;
5643 /* See if we have already put this function on the list of deferred
5644 plabels. This list is generally small, so a liner search is not
5645 too ugly. If it proves too slow replace it with something faster. */
5646 for (i = 0; i < n_deferred_plabels; i++)
5647 if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
5648 break;
5650 /* If the deferred plabel list is empty, or this entry was not found
5651 on the list, create a new entry on the list. */
5652 if (deferred_plabels == NULL || i == n_deferred_plabels)
5654 tree id;
5656 if (deferred_plabels == 0)
5657 deferred_plabels = ggc_alloc<deferred_plabel> ();
5658 else
5659 deferred_plabels = GGC_RESIZEVEC (struct deferred_plabel,
5660 deferred_plabels,
5661 n_deferred_plabels + 1);
5663 i = n_deferred_plabels++;
5664 deferred_plabels[i].internal_label = gen_label_rtx ();
5665 deferred_plabels[i].symbol = symbol;
5667 /* Gross. We have just implicitly taken the address of this
5668 function. Mark it in the same manner as assemble_name. */
5669 id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5670 if (id)
5671 mark_referenced (id);
5674 return deferred_plabels[i].internal_label;
5677 static void
5678 output_deferred_plabels (void)
5680 size_t i;
5682 /* If we have some deferred plabels, then we need to switch into the
5683 data or readonly data section, and align it to a 4 byte boundary
5684 before outputting the deferred plabels. */
5685 if (n_deferred_plabels)
5687 switch_to_section (flag_pic ? data_section : readonly_data_section);
5688 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5691 /* Now output the deferred plabels. */
5692 for (i = 0; i < n_deferred_plabels; i++)
5694 targetm.asm_out.internal_label (asm_out_file, "L",
5695 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5696 assemble_integer (deferred_plabels[i].symbol,
5697 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5701 /* Initialize optabs to point to emulation routines. */
5703 static void
5704 pa_init_libfuncs (void)
5706 if (HPUX_LONG_DOUBLE_LIBRARY)
5708 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5709 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5710 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5711 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5712 set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5713 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5714 set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5715 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5716 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5718 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5719 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5720 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5721 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5722 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5723 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5724 set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5726 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5727 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5728 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5729 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5731 set_conv_libfunc (sfix_optab, SImode, TFmode,
5732 TARGET_64BIT ? "__U_Qfcnvfxt_quad_to_sgl"
5733 : "_U_Qfcnvfxt_quad_to_sgl");
5734 set_conv_libfunc (sfix_optab, DImode, TFmode,
5735 "_U_Qfcnvfxt_quad_to_dbl");
5736 set_conv_libfunc (ufix_optab, SImode, TFmode,
5737 "_U_Qfcnvfxt_quad_to_usgl");
5738 set_conv_libfunc (ufix_optab, DImode, TFmode,
5739 "_U_Qfcnvfxt_quad_to_udbl");
5741 set_conv_libfunc (sfloat_optab, TFmode, SImode,
5742 "_U_Qfcnvxf_sgl_to_quad");
5743 set_conv_libfunc (sfloat_optab, TFmode, DImode,
5744 "_U_Qfcnvxf_dbl_to_quad");
5745 set_conv_libfunc (ufloat_optab, TFmode, SImode,
5746 "_U_Qfcnvxf_usgl_to_quad");
5747 set_conv_libfunc (ufloat_optab, TFmode, DImode,
5748 "_U_Qfcnvxf_udbl_to_quad");
5751 if (TARGET_SYNC_LIBCALL)
5752 init_sync_libfuncs (UNITS_PER_WORD);
5755 /* HP's millicode routines mean something special to the assembler.
5756 Keep track of which ones we have used. */
5758 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5759 static void import_milli (enum millicodes);
5760 static char imported[(int) end1000];
5761 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5762 static const char import_string[] = ".IMPORT $$....,MILLICODE";
5763 #define MILLI_START 10
5765 static void
5766 import_milli (enum millicodes code)
5768 char str[sizeof (import_string)];
5770 if (!imported[(int) code])
5772 imported[(int) code] = 1;
5773 strcpy (str, import_string);
5774 strncpy (str + MILLI_START, milli_names[(int) code], 4);
5775 output_asm_insn (str, 0);
5779 /* The register constraints have put the operands and return value in
5780 the proper registers. */
5782 const char *
5783 pa_output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx_insn *insn)
5785 import_milli (mulI);
5786 return pa_output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5789 /* Emit the rtl for doing a division by a constant. */
5791 /* Do magic division millicodes exist for this value? */
5792 const int pa_magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5794 /* We'll use an array to keep track of the magic millicodes and
5795 whether or not we've used them already. [n][0] is signed, [n][1] is
5796 unsigned. */
5798 static int div_milli[16][2];
5801 pa_emit_hpdiv_const (rtx *operands, int unsignedp)
5803 if (GET_CODE (operands[2]) == CONST_INT
5804 && INTVAL (operands[2]) > 0
5805 && INTVAL (operands[2]) < 16
5806 && pa_magic_milli[INTVAL (operands[2])])
5808 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5810 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
5811 emit
5812 (gen_rtx_PARALLEL
5813 (VOIDmode,
5814 gen_rtvec (6, gen_rtx_SET (gen_rtx_REG (SImode, 29),
5815 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5816 SImode,
5817 gen_rtx_REG (SImode, 26),
5818 operands[2])),
5819 gen_rtx_CLOBBER (VOIDmode, operands[4]),
5820 gen_rtx_CLOBBER (VOIDmode, operands[3]),
5821 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5822 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
5823 gen_rtx_CLOBBER (VOIDmode, ret))));
5824 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
5825 return 1;
5827 return 0;
5830 const char *
5831 pa_output_div_insn (rtx *operands, int unsignedp, rtx_insn *insn)
5833 int divisor;
5835 /* If the divisor is a constant, try to use one of the special
5836 opcodes .*/
5837 if (GET_CODE (operands[0]) == CONST_INT)
5839 static char buf[100];
5840 divisor = INTVAL (operands[0]);
5841 if (!div_milli[divisor][unsignedp])
5843 div_milli[divisor][unsignedp] = 1;
5844 if (unsignedp)
5845 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
5846 else
5847 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
5849 if (unsignedp)
5851 sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
5852 INTVAL (operands[0]));
5853 return pa_output_millicode_call (insn,
5854 gen_rtx_SYMBOL_REF (SImode, buf));
5856 else
5858 sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
5859 INTVAL (operands[0]));
5860 return pa_output_millicode_call (insn,
5861 gen_rtx_SYMBOL_REF (SImode, buf));
5864 /* Divisor isn't a special constant. */
5865 else
5867 if (unsignedp)
5869 import_milli (divU);
5870 return pa_output_millicode_call (insn,
5871 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
5873 else
5875 import_milli (divI);
5876 return pa_output_millicode_call (insn,
5877 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
5882 /* Output a $$rem millicode to do mod. */
5884 const char *
5885 pa_output_mod_insn (int unsignedp, rtx_insn *insn)
5887 if (unsignedp)
5889 import_milli (remU);
5890 return pa_output_millicode_call (insn,
5891 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
5893 else
5895 import_milli (remI);
5896 return pa_output_millicode_call (insn,
5897 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
5901 void
5902 pa_output_arg_descriptor (rtx_insn *call_insn)
5904 const char *arg_regs[4];
5905 machine_mode arg_mode;
5906 rtx link;
5907 int i, output_flag = 0;
5908 int regno;
5910 /* We neither need nor want argument location descriptors for the
5911 64bit runtime environment or the ELF32 environment. */
5912 if (TARGET_64BIT || TARGET_ELF32)
5913 return;
5915 for (i = 0; i < 4; i++)
5916 arg_regs[i] = 0;
5918 /* Specify explicitly that no argument relocations should take place
5919 if using the portable runtime calling conventions. */
5920 if (TARGET_PORTABLE_RUNTIME)
5922 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5923 asm_out_file);
5924 return;
5927 gcc_assert (CALL_P (call_insn));
5928 for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
5929 link; link = XEXP (link, 1))
5931 rtx use = XEXP (link, 0);
5933 if (! (GET_CODE (use) == USE
5934 && GET_CODE (XEXP (use, 0)) == REG
5935 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5936 continue;
5938 arg_mode = GET_MODE (XEXP (use, 0));
5939 regno = REGNO (XEXP (use, 0));
5940 if (regno >= 23 && regno <= 26)
5942 arg_regs[26 - regno] = "GR";
5943 if (arg_mode == DImode)
5944 arg_regs[25 - regno] = "GR";
5946 else if (regno >= 32 && regno <= 39)
5948 if (arg_mode == SFmode)
5949 arg_regs[(regno - 32) / 2] = "FR";
5950 else
5952 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
5953 arg_regs[(regno - 34) / 2] = "FR";
5954 arg_regs[(regno - 34) / 2 + 1] = "FU";
5955 #else
5956 arg_regs[(regno - 34) / 2] = "FU";
5957 arg_regs[(regno - 34) / 2 + 1] = "FR";
5958 #endif
5962 fputs ("\t.CALL ", asm_out_file);
5963 for (i = 0; i < 4; i++)
5965 if (arg_regs[i])
5967 if (output_flag++)
5968 fputc (',', asm_out_file);
5969 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
5972 fputc ('\n', asm_out_file);
5975 /* Inform reload about cases where moving X with a mode MODE to or from
5976 a register in RCLASS requires an extra scratch or immediate register.
5977 Return the class needed for the immediate register. */
5979 static reg_class_t
5980 pa_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
5981 machine_mode mode, secondary_reload_info *sri)
5983 int regno;
5984 enum reg_class rclass = (enum reg_class) rclass_i;
5986 /* Handle the easy stuff first. */
5987 if (rclass == R1_REGS)
5988 return NO_REGS;
5990 if (REG_P (x))
5992 regno = REGNO (x);
5993 if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
5994 return NO_REGS;
5996 else
5997 regno = -1;
5999 /* If we have something like (mem (mem (...)), we can safely assume the
6000 inner MEM will end up in a general register after reloading, so there's
6001 no need for a secondary reload. */
6002 if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
6003 return NO_REGS;
6005 /* Trying to load a constant into a FP register during PIC code
6006 generation requires %r1 as a scratch register. For float modes,
6007 the only legitimate constant is CONST0_RTX. However, there are
6008 a few patterns that accept constant double operands. */
6009 if (flag_pic
6010 && FP_REG_CLASS_P (rclass)
6011 && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
6013 switch (mode)
6015 case SImode:
6016 sri->icode = CODE_FOR_reload_insi_r1;
6017 break;
6019 case DImode:
6020 sri->icode = CODE_FOR_reload_indi_r1;
6021 break;
6023 case SFmode:
6024 sri->icode = CODE_FOR_reload_insf_r1;
6025 break;
6027 case DFmode:
6028 sri->icode = CODE_FOR_reload_indf_r1;
6029 break;
6031 default:
6032 gcc_unreachable ();
6034 return NO_REGS;
6037 /* Secondary reloads of symbolic expressions require %r1 as a scratch
6038 register when we're generating PIC code or when the operand isn't
6039 readonly. */
6040 if (pa_symbolic_expression_p (x))
6042 if (GET_CODE (x) == HIGH)
6043 x = XEXP (x, 0);
6045 if (flag_pic || !read_only_operand (x, VOIDmode))
6047 switch (mode)
6049 case SImode:
6050 sri->icode = CODE_FOR_reload_insi_r1;
6051 break;
6053 case DImode:
6054 sri->icode = CODE_FOR_reload_indi_r1;
6055 break;
6057 default:
6058 gcc_unreachable ();
6060 return NO_REGS;
6064 /* Profiling showed the PA port spends about 1.3% of its compilation
6065 time in true_regnum from calls inside pa_secondary_reload_class. */
6066 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
6067 regno = true_regnum (x);
6069 /* Handle reloads for floating point loads and stores. */
6070 if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
6071 && FP_REG_CLASS_P (rclass))
6073 if (MEM_P (x))
6075 x = XEXP (x, 0);
6077 /* We don't need a secondary reload for indexed memory addresses.
6079 When INT14_OK_STRICT is true, it might appear that we could
6080 directly allow register indirect memory addresses. However,
6081 this doesn't work because we don't support SUBREGs in
6082 floating-point register copies and reload doesn't tell us
6083 when it's going to use a SUBREG. */
6084 if (IS_INDEX_ADDR_P (x))
6085 return NO_REGS;
6088 /* Request a secondary reload with a general scratch register
6089 for everything else. ??? Could symbolic operands be handled
6090 directly when generating non-pic PA 2.0 code? */
6091 sri->icode = (in_p
6092 ? direct_optab_handler (reload_in_optab, mode)
6093 : direct_optab_handler (reload_out_optab, mode));
6094 return NO_REGS;
6097 /* A SAR<->FP register copy requires an intermediate general register
6098 and secondary memory. We need a secondary reload with a general
6099 scratch register for spills. */
6100 if (rclass == SHIFT_REGS)
6102 /* Handle spill. */
6103 if (regno >= FIRST_PSEUDO_REGISTER || regno < 0)
6105 sri->icode = (in_p
6106 ? direct_optab_handler (reload_in_optab, mode)
6107 : direct_optab_handler (reload_out_optab, mode));
6108 return NO_REGS;
6111 /* Handle FP copy. */
6112 if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))
6113 return GENERAL_REGS;
6116 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
6117 && REGNO_REG_CLASS (regno) == SHIFT_REGS
6118 && FP_REG_CLASS_P (rclass))
6119 return GENERAL_REGS;
6121 return NO_REGS;
6124 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY. The argument pointer
6125 is only marked as live on entry by df-scan when it is a fixed
6126 register. It isn't a fixed register in the 64-bit runtime,
6127 so we need to mark it here. */
6129 static void
6130 pa_extra_live_on_entry (bitmap regs)
6132 if (TARGET_64BIT)
6133 bitmap_set_bit (regs, ARG_POINTER_REGNUM);
6136 /* Implement EH_RETURN_HANDLER_RTX. The MEM needs to be volatile
6137 to prevent it from being deleted. */
6140 pa_eh_return_handler_rtx (void)
6142 rtx tmp;
6144 tmp = gen_rtx_PLUS (word_mode, hard_frame_pointer_rtx,
6145 TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20));
6146 tmp = gen_rtx_MEM (word_mode, tmp);
6147 tmp->volatil = 1;
6148 return tmp;
6151 /* In the 32-bit runtime, arguments larger than eight bytes are passed
6152 by invisible reference. As a GCC extension, we also pass anything
6153 with a zero or variable size by reference.
6155 The 64-bit runtime does not describe passing any types by invisible
6156 reference. The internals of GCC can't currently handle passing
6157 empty structures, and zero or variable length arrays when they are
6158 not passed entirely on the stack or by reference. Thus, as a GCC
6159 extension, we pass these types by reference. The HP compiler doesn't
6160 support these types, so hopefully there shouldn't be any compatibility
6161 issues. This may have to be revisited when HP releases a C99 compiler
6162 or updates the ABI. */
6164 static bool
6165 pa_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
6166 machine_mode mode, const_tree type,
6167 bool named ATTRIBUTE_UNUSED)
6169 HOST_WIDE_INT size;
6171 if (type)
6172 size = int_size_in_bytes (type);
6173 else
6174 size = GET_MODE_SIZE (mode);
6176 if (TARGET_64BIT)
6177 return size <= 0;
6178 else
6179 return size <= 0 || size > 8;
6182 enum direction
6183 pa_function_arg_padding (machine_mode mode, const_tree type)
6185 if (mode == BLKmode
6186 || (TARGET_64BIT
6187 && type
6188 && (AGGREGATE_TYPE_P (type)
6189 || TREE_CODE (type) == COMPLEX_TYPE
6190 || TREE_CODE (type) == VECTOR_TYPE)))
6192 /* Return none if justification is not required. */
6193 if (type
6194 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
6195 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
6196 return none;
6198 /* The directions set here are ignored when a BLKmode argument larger
6199 than a word is placed in a register. Different code is used for
6200 the stack and registers. This makes it difficult to have a
6201 consistent data representation for both the stack and registers.
6202 For both runtimes, the justification and padding for arguments on
6203 the stack and in registers should be identical. */
6204 if (TARGET_64BIT)
6205 /* The 64-bit runtime specifies left justification for aggregates. */
6206 return upward;
6207 else
6208 /* The 32-bit runtime architecture specifies right justification.
6209 When the argument is passed on the stack, the argument is padded
6210 with garbage on the left. The HP compiler pads with zeros. */
6211 return downward;
6214 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
6215 return downward;
6216 else
6217 return none;
6221 /* Do what is necessary for `va_start'. We look at the current function
6222 to determine if stdargs or varargs is used and fill in an initial
6223 va_list. A pointer to this constructor is returned. */
6225 static rtx
6226 hppa_builtin_saveregs (void)
6228 rtx offset, dest;
6229 tree fntype = TREE_TYPE (current_function_decl);
6230 int argadj = ((!stdarg_p (fntype))
6231 ? UNITS_PER_WORD : 0);
6233 if (argadj)
6234 offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, argadj);
6235 else
6236 offset = crtl->args.arg_offset_rtx;
6238 if (TARGET_64BIT)
6240 int i, off;
6242 /* Adjust for varargs/stdarg differences. */
6243 if (argadj)
6244 offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, -argadj);
6245 else
6246 offset = crtl->args.arg_offset_rtx;
6248 /* We need to save %r26 .. %r19 inclusive starting at offset -64
6249 from the incoming arg pointer and growing to larger addresses. */
6250 for (i = 26, off = -64; i >= 19; i--, off += 8)
6251 emit_move_insn (gen_rtx_MEM (word_mode,
6252 plus_constant (Pmode,
6253 arg_pointer_rtx, off)),
6254 gen_rtx_REG (word_mode, i));
6256 /* The incoming args pointer points just beyond the flushback area;
6257 normally this is not a serious concern. However, when we are doing
6258 varargs/stdargs we want to make the arg pointer point to the start
6259 of the incoming argument area. */
6260 emit_move_insn (virtual_incoming_args_rtx,
6261 plus_constant (Pmode, arg_pointer_rtx, -64));
6263 /* Now return a pointer to the first anonymous argument. */
6264 return copy_to_reg (expand_binop (Pmode, add_optab,
6265 virtual_incoming_args_rtx,
6266 offset, 0, 0, OPTAB_LIB_WIDEN));
6269 /* Store general registers on the stack. */
6270 dest = gen_rtx_MEM (BLKmode,
6271 plus_constant (Pmode, crtl->args.internal_arg_pointer,
6272 -16));
6273 set_mem_alias_set (dest, get_varargs_alias_set ());
6274 set_mem_align (dest, BITS_PER_WORD);
6275 move_block_from_reg (23, dest, 4);
6277 /* move_block_from_reg will emit code to store the argument registers
6278 individually as scalar stores.
6280 However, other insns may later load from the same addresses for
6281 a structure load (passing a struct to a varargs routine).
6283 The alias code assumes that such aliasing can never happen, so we
6284 have to keep memory referencing insns from moving up beyond the
6285 last argument register store. So we emit a blockage insn here. */
6286 emit_insn (gen_blockage ());
6288 return copy_to_reg (expand_binop (Pmode, add_optab,
6289 crtl->args.internal_arg_pointer,
6290 offset, 0, 0, OPTAB_LIB_WIDEN));
6293 static void
6294 hppa_va_start (tree valist, rtx nextarg)
6296 nextarg = expand_builtin_saveregs ();
6297 std_expand_builtin_va_start (valist, nextarg);
6300 static tree
6301 hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6302 gimple_seq *post_p)
6304 if (TARGET_64BIT)
6306 /* Args grow upward. We can use the generic routines. */
6307 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6309 else /* !TARGET_64BIT */
6311 tree ptr = build_pointer_type (type);
6312 tree valist_type;
6313 tree t, u;
6314 unsigned int size, ofs;
6315 bool indirect;
6317 indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0);
6318 if (indirect)
6320 type = ptr;
6321 ptr = build_pointer_type (type);
6323 size = int_size_in_bytes (type);
6324 valist_type = TREE_TYPE (valist);
6326 /* Args grow down. Not handled by generic routines. */
6328 u = fold_convert (sizetype, size_in_bytes (type));
6329 u = fold_build1 (NEGATE_EXPR, sizetype, u);
6330 t = fold_build_pointer_plus (valist, u);
6332 /* Align to 4 or 8 byte boundary depending on argument size. */
6334 u = build_int_cst (TREE_TYPE (t), (HOST_WIDE_INT)(size > 4 ? -8 : -4));
6335 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u);
6336 t = fold_convert (valist_type, t);
6338 t = build2 (MODIFY_EXPR, valist_type, valist, t);
6340 ofs = (8 - size) % 4;
6341 if (ofs != 0)
6342 t = fold_build_pointer_plus_hwi (t, ofs);
6344 t = fold_convert (ptr, t);
6345 t = build_va_arg_indirect_ref (t);
6347 if (indirect)
6348 t = build_va_arg_indirect_ref (t);
6350 return t;
6354 /* True if MODE is valid for the target. By "valid", we mean able to
6355 be manipulated in non-trivial ways. In particular, this means all
6356 the arithmetic is supported.
6358 Currently, TImode is not valid as the HP 64-bit runtime documentation
6359 doesn't document the alignment and calling conventions for this type.
6360 Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
6361 2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE. */
6363 static bool
6364 pa_scalar_mode_supported_p (machine_mode mode)
6366 int precision = GET_MODE_PRECISION (mode);
6368 switch (GET_MODE_CLASS (mode))
6370 case MODE_PARTIAL_INT:
6371 case MODE_INT:
6372 if (precision == CHAR_TYPE_SIZE)
6373 return true;
6374 if (precision == SHORT_TYPE_SIZE)
6375 return true;
6376 if (precision == INT_TYPE_SIZE)
6377 return true;
6378 if (precision == LONG_TYPE_SIZE)
6379 return true;
6380 if (precision == LONG_LONG_TYPE_SIZE)
6381 return true;
6382 return false;
6384 case MODE_FLOAT:
6385 if (precision == FLOAT_TYPE_SIZE)
6386 return true;
6387 if (precision == DOUBLE_TYPE_SIZE)
6388 return true;
6389 if (precision == LONG_DOUBLE_TYPE_SIZE)
6390 return true;
6391 return false;
6393 case MODE_DECIMAL_FLOAT:
6394 return false;
6396 default:
6397 gcc_unreachable ();
6401 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
6402 it branches into the delay slot. Otherwise, return FALSE. */
6404 static bool
6405 branch_to_delay_slot_p (rtx_insn *insn)
6407 rtx_insn *jump_insn;
6409 if (dbr_sequence_length ())
6410 return FALSE;
6412 jump_insn = next_active_insn (JUMP_LABEL (insn));
6413 while (insn)
6415 insn = next_active_insn (insn);
6416 if (jump_insn == insn)
6417 return TRUE;
6419 /* We can't rely on the length of asms. So, we return FALSE when
6420 the branch is followed by an asm. */
6421 if (!insn
6422 || GET_CODE (PATTERN (insn)) == ASM_INPUT
6423 || extract_asm_operands (PATTERN (insn)) != NULL_RTX
6424 || get_attr_length (insn) > 0)
6425 break;
6428 return FALSE;
6431 /* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot.
6433 This occurs when INSN has an unfilled delay slot and is followed
6434 by an asm. Disaster can occur if the asm is empty and the jump
6435 branches into the delay slot. So, we add a nop in the delay slot
6436 when this occurs. */
6438 static bool
6439 branch_needs_nop_p (rtx_insn *insn)
6441 rtx_insn *jump_insn;
6443 if (dbr_sequence_length ())
6444 return FALSE;
6446 jump_insn = next_active_insn (JUMP_LABEL (insn));
6447 while (insn)
6449 insn = next_active_insn (insn);
6450 if (!insn || jump_insn == insn)
6451 return TRUE;
6453 if (!(GET_CODE (PATTERN (insn)) == ASM_INPUT
6454 || extract_asm_operands (PATTERN (insn)) != NULL_RTX)
6455 && get_attr_length (insn) > 0)
6456 break;
6459 return FALSE;
6462 /* Return TRUE if INSN, a forward jump insn, can use nullification
6463 to skip the following instruction. This avoids an extra cycle due
6464 to a mis-predicted branch when we fall through. */
6466 static bool
6467 use_skip_p (rtx_insn *insn)
6469 rtx_insn *jump_insn = next_active_insn (JUMP_LABEL (insn));
6471 while (insn)
6473 insn = next_active_insn (insn);
6475 /* We can't rely on the length of asms, so we can't skip asms. */
6476 if (!insn
6477 || GET_CODE (PATTERN (insn)) == ASM_INPUT
6478 || extract_asm_operands (PATTERN (insn)) != NULL_RTX)
6479 break;
6480 if (get_attr_length (insn) == 4
6481 && jump_insn == next_active_insn (insn))
6482 return TRUE;
6483 if (get_attr_length (insn) > 0)
6484 break;
6487 return FALSE;
6490 /* This routine handles all the normal conditional branch sequences we
6491 might need to generate. It handles compare immediate vs compare
6492 register, nullification of delay slots, varying length branches,
6493 negated branches, and all combinations of the above. It returns the
6494 output appropriate to emit the branch corresponding to all given
6495 parameters. */
6497 const char *
6498 pa_output_cbranch (rtx *operands, int negated, rtx_insn *insn)
6500 static char buf[100];
6501 bool useskip;
6502 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6503 int length = get_attr_length (insn);
6504 int xdelay;
6506 /* A conditional branch to the following instruction (e.g. the delay slot)
6507 is asking for a disaster. This can happen when not optimizing and
6508 when jump optimization fails.
6510 While it is usually safe to emit nothing, this can fail if the
6511 preceding instruction is a nullified branch with an empty delay
6512 slot and the same branch target as this branch. We could check
6513 for this but jump optimization should eliminate nop jumps. It
6514 is always safe to emit a nop. */
6515 if (branch_to_delay_slot_p (insn))
6516 return "nop";
6518 /* The doubleword form of the cmpib instruction doesn't have the LEU
6519 and GTU conditions while the cmpb instruction does. Since we accept
6520 zero for cmpb, we must ensure that we use cmpb for the comparison. */
6521 if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6522 operands[2] = gen_rtx_REG (DImode, 0);
6523 if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
6524 operands[1] = gen_rtx_REG (DImode, 0);
6526 /* If this is a long branch with its delay slot unfilled, set `nullify'
6527 as it can nullify the delay slot and save a nop. */
6528 if (length == 8 && dbr_sequence_length () == 0)
6529 nullify = 1;
6531 /* If this is a short forward conditional branch which did not get
6532 its delay slot filled, the delay slot can still be nullified. */
6533 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6534 nullify = forward_branch_p (insn);
6536 /* A forward branch over a single nullified insn can be done with a
6537 comclr instruction. This avoids a single cycle penalty due to
6538 mis-predicted branch if we fall through (branch not taken). */
6539 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6541 switch (length)
6543 /* All short conditional branches except backwards with an unfilled
6544 delay slot. */
6545 case 4:
6546 if (useskip)
6547 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6548 else
6549 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6550 if (GET_MODE (operands[1]) == DImode)
6551 strcat (buf, "*");
6552 if (negated)
6553 strcat (buf, "%B3");
6554 else
6555 strcat (buf, "%S3");
6556 if (useskip)
6557 strcat (buf, " %2,%r1,%%r0");
6558 else if (nullify)
6560 if (branch_needs_nop_p (insn))
6561 strcat (buf, ",n %2,%r1,%0%#");
6562 else
6563 strcat (buf, ",n %2,%r1,%0");
6565 else
6566 strcat (buf, " %2,%r1,%0");
6567 break;
6569 /* All long conditionals. Note a short backward branch with an
6570 unfilled delay slot is treated just like a long backward branch
6571 with an unfilled delay slot. */
6572 case 8:
6573 /* Handle weird backwards branch with a filled delay slot
6574 which is nullified. */
6575 if (dbr_sequence_length () != 0
6576 && ! forward_branch_p (insn)
6577 && nullify)
6579 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6580 if (GET_MODE (operands[1]) == DImode)
6581 strcat (buf, "*");
6582 if (negated)
6583 strcat (buf, "%S3");
6584 else
6585 strcat (buf, "%B3");
6586 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6588 /* Handle short backwards branch with an unfilled delay slot.
6589 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6590 taken and untaken branches. */
6591 else if (dbr_sequence_length () == 0
6592 && ! forward_branch_p (insn)
6593 && INSN_ADDRESSES_SET_P ()
6594 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6595 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6597 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6598 if (GET_MODE (operands[1]) == DImode)
6599 strcat (buf, "*");
6600 if (negated)
6601 strcat (buf, "%B3 %2,%r1,%0%#");
6602 else
6603 strcat (buf, "%S3 %2,%r1,%0%#");
6605 else
6607 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6608 if (GET_MODE (operands[1]) == DImode)
6609 strcat (buf, "*");
6610 if (negated)
6611 strcat (buf, "%S3");
6612 else
6613 strcat (buf, "%B3");
6614 if (nullify)
6615 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6616 else
6617 strcat (buf, " %2,%r1,%%r0\n\tb %0");
6619 break;
6621 default:
6622 /* The reversed conditional branch must branch over one additional
6623 instruction if the delay slot is filled and needs to be extracted
6624 by pa_output_lbranch. If the delay slot is empty or this is a
6625 nullified forward branch, the instruction after the reversed
6626 condition branch must be nullified. */
6627 if (dbr_sequence_length () == 0
6628 || (nullify && forward_branch_p (insn)))
6630 nullify = 1;
6631 xdelay = 0;
6632 operands[4] = GEN_INT (length);
6634 else
6636 xdelay = 1;
6637 operands[4] = GEN_INT (length + 4);
6640 /* Create a reversed conditional branch which branches around
6641 the following insns. */
6642 if (GET_MODE (operands[1]) != DImode)
6644 if (nullify)
6646 if (negated)
6647 strcpy (buf,
6648 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6649 else
6650 strcpy (buf,
6651 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6653 else
6655 if (negated)
6656 strcpy (buf,
6657 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6658 else
6659 strcpy (buf,
6660 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6663 else
6665 if (nullify)
6667 if (negated)
6668 strcpy (buf,
6669 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6670 else
6671 strcpy (buf,
6672 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6674 else
6676 if (negated)
6677 strcpy (buf,
6678 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6679 else
6680 strcpy (buf,
6681 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6685 output_asm_insn (buf, operands);
6686 return pa_output_lbranch (operands[0], insn, xdelay);
6688 return buf;
6691 /* This routine handles output of long unconditional branches that
6692 exceed the maximum range of a simple branch instruction. Since
6693 we don't have a register available for the branch, we save register
6694 %r1 in the frame marker, load the branch destination DEST into %r1,
6695 execute the branch, and restore %r1 in the delay slot of the branch.
6697 Since long branches may have an insn in the delay slot and the
6698 delay slot is used to restore %r1, we in general need to extract
6699 this insn and execute it before the branch. However, to facilitate
6700 use of this function by conditional branches, we also provide an
6701 option to not extract the delay insn so that it will be emitted
6702 after the long branch. So, if there is an insn in the delay slot,
6703 it is extracted if XDELAY is nonzero.
6705 The lengths of the various long-branch sequences are 20, 16 and 24
6706 bytes for the portable runtime, non-PIC and PIC cases, respectively. */
6708 const char *
6709 pa_output_lbranch (rtx dest, rtx_insn *insn, int xdelay)
6711 rtx xoperands[2];
6713 xoperands[0] = dest;
6715 /* First, free up the delay slot. */
6716 if (xdelay && dbr_sequence_length () != 0)
6718 /* We can't handle a jump in the delay slot. */
6719 gcc_assert (! JUMP_P (NEXT_INSN (insn)));
6721 final_scan_insn (NEXT_INSN (insn), asm_out_file,
6722 optimize, 0, NULL);
6724 /* Now delete the delay insn. */
6725 SET_INSN_DELETED (NEXT_INSN (insn));
6728 /* Output an insn to save %r1. The runtime documentation doesn't
6729 specify whether the "Clean Up" slot in the callers frame can
6730 be clobbered by the callee. It isn't copied by HP's builtin
6731 alloca, so this suggests that it can be clobbered if necessary.
6732 The "Static Link" location is copied by HP builtin alloca, so
6733 we avoid using it. Using the cleanup slot might be a problem
6734 if we have to interoperate with languages that pass cleanup
6735 information. However, it should be possible to handle these
6736 situations with GCC's asm feature.
6738 The "Current RP" slot is reserved for the called procedure, so
6739 we try to use it when we don't have a frame of our own. It's
6740 rather unlikely that we won't have a frame when we need to emit
6741 a very long branch.
6743 Really the way to go long term is a register scavenger; goto
6744 the target of the jump and find a register which we can use
6745 as a scratch to hold the value in %r1. Then, we wouldn't have
6746 to free up the delay slot or clobber a slot that may be needed
6747 for other purposes. */
6748 if (TARGET_64BIT)
6750 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6751 /* Use the return pointer slot in the frame marker. */
6752 output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
6753 else
6754 /* Use the slot at -40 in the frame marker since HP builtin
6755 alloca doesn't copy it. */
6756 output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
6758 else
6760 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6761 /* Use the return pointer slot in the frame marker. */
6762 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
6763 else
6764 /* Use the "Clean Up" slot in the frame marker. In GCC,
6765 the only other use of this location is for copying a
6766 floating point double argument from a floating-point
6767 register to two general registers. The copy is done
6768 as an "atomic" operation when outputting a call, so it
6769 won't interfere with our using the location here. */
6770 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
6773 if (TARGET_PORTABLE_RUNTIME)
6775 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6776 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6777 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6779 else if (flag_pic)
6781 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6782 if (TARGET_SOM || !TARGET_GAS)
6784 xoperands[1] = gen_label_rtx ();
6785 output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands);
6786 targetm.asm_out.internal_label (asm_out_file, "L",
6787 CODE_LABEL_NUMBER (xoperands[1]));
6788 output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands);
6790 else
6792 output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands);
6793 output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
6795 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6797 else
6798 /* Now output a very long branch to the original target. */
6799 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
6801 /* Now restore the value of %r1 in the delay slot. */
6802 if (TARGET_64BIT)
6804 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6805 return "ldd -16(%%r30),%%r1";
6806 else
6807 return "ldd -40(%%r30),%%r1";
6809 else
6811 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6812 return "ldw -20(%%r30),%%r1";
6813 else
6814 return "ldw -12(%%r30),%%r1";
6818 /* This routine handles all the branch-on-bit conditional branch sequences we
6819 might need to generate. It handles nullification of delay slots,
6820 varying length branches, negated branches and all combinations of the
6821 above. it returns the appropriate output template to emit the branch. */
6823 const char *
6824 pa_output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn, int which)
6826 static char buf[100];
6827 bool useskip;
6828 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6829 int length = get_attr_length (insn);
6830 int xdelay;
6832 /* A conditional branch to the following instruction (e.g. the delay slot) is
6833 asking for a disaster. I do not think this can happen as this pattern
6834 is only used when optimizing; jump optimization should eliminate the
6835 jump. But be prepared just in case. */
6837 if (branch_to_delay_slot_p (insn))
6838 return "nop";
6840 /* If this is a long branch with its delay slot unfilled, set `nullify'
6841 as it can nullify the delay slot and save a nop. */
6842 if (length == 8 && dbr_sequence_length () == 0)
6843 nullify = 1;
6845 /* If this is a short forward conditional branch which did not get
6846 its delay slot filled, the delay slot can still be nullified. */
6847 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6848 nullify = forward_branch_p (insn);
6850 /* A forward branch over a single nullified insn can be done with a
6851 extrs instruction. This avoids a single cycle penalty due to
6852 mis-predicted branch if we fall through (branch not taken). */
6853 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6855 switch (length)
6858 /* All short conditional branches except backwards with an unfilled
6859 delay slot. */
6860 case 4:
6861 if (useskip)
6862 strcpy (buf, "{extrs,|extrw,s,}");
6863 else
6864 strcpy (buf, "bb,");
6865 if (useskip && GET_MODE (operands[0]) == DImode)
6866 strcpy (buf, "extrd,s,*");
6867 else if (GET_MODE (operands[0]) == DImode)
6868 strcpy (buf, "bb,*");
6869 if ((which == 0 && negated)
6870 || (which == 1 && ! negated))
6871 strcat (buf, ">=");
6872 else
6873 strcat (buf, "<");
6874 if (useskip)
6875 strcat (buf, " %0,%1,1,%%r0");
6876 else if (nullify && negated)
6878 if (branch_needs_nop_p (insn))
6879 strcat (buf, ",n %0,%1,%3%#");
6880 else
6881 strcat (buf, ",n %0,%1,%3");
6883 else if (nullify && ! negated)
6885 if (branch_needs_nop_p (insn))
6886 strcat (buf, ",n %0,%1,%2%#");
6887 else
6888 strcat (buf, ",n %0,%1,%2");
6890 else if (! nullify && negated)
6891 strcat (buf, " %0,%1,%3");
6892 else if (! nullify && ! negated)
6893 strcat (buf, " %0,%1,%2");
6894 break;
6896 /* All long conditionals. Note a short backward branch with an
6897 unfilled delay slot is treated just like a long backward branch
6898 with an unfilled delay slot. */
6899 case 8:
6900 /* Handle weird backwards branch with a filled delay slot
6901 which is nullified. */
6902 if (dbr_sequence_length () != 0
6903 && ! forward_branch_p (insn)
6904 && nullify)
6906 strcpy (buf, "bb,");
6907 if (GET_MODE (operands[0]) == DImode)
6908 strcat (buf, "*");
6909 if ((which == 0 && negated)
6910 || (which == 1 && ! negated))
6911 strcat (buf, "<");
6912 else
6913 strcat (buf, ">=");
6914 if (negated)
6915 strcat (buf, ",n %0,%1,.+12\n\tb %3");
6916 else
6917 strcat (buf, ",n %0,%1,.+12\n\tb %2");
6919 /* Handle short backwards branch with an unfilled delay slot.
6920 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6921 taken and untaken branches. */
6922 else if (dbr_sequence_length () == 0
6923 && ! forward_branch_p (insn)
6924 && INSN_ADDRESSES_SET_P ()
6925 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6926 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6928 strcpy (buf, "bb,");
6929 if (GET_MODE (operands[0]) == DImode)
6930 strcat (buf, "*");
6931 if ((which == 0 && negated)
6932 || (which == 1 && ! negated))
6933 strcat (buf, ">=");
6934 else
6935 strcat (buf, "<");
6936 if (negated)
6937 strcat (buf, " %0,%1,%3%#");
6938 else
6939 strcat (buf, " %0,%1,%2%#");
6941 else
6943 if (GET_MODE (operands[0]) == DImode)
6944 strcpy (buf, "extrd,s,*");
6945 else
6946 strcpy (buf, "{extrs,|extrw,s,}");
6947 if ((which == 0 && negated)
6948 || (which == 1 && ! negated))
6949 strcat (buf, "<");
6950 else
6951 strcat (buf, ">=");
6952 if (nullify && negated)
6953 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
6954 else if (nullify && ! negated)
6955 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
6956 else if (negated)
6957 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
6958 else
6959 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
6961 break;
6963 default:
6964 /* The reversed conditional branch must branch over one additional
6965 instruction if the delay slot is filled and needs to be extracted
6966 by pa_output_lbranch. If the delay slot is empty or this is a
6967 nullified forward branch, the instruction after the reversed
6968 condition branch must be nullified. */
6969 if (dbr_sequence_length () == 0
6970 || (nullify && forward_branch_p (insn)))
6972 nullify = 1;
6973 xdelay = 0;
6974 operands[4] = GEN_INT (length);
6976 else
6978 xdelay = 1;
6979 operands[4] = GEN_INT (length + 4);
6982 if (GET_MODE (operands[0]) == DImode)
6983 strcpy (buf, "bb,*");
6984 else
6985 strcpy (buf, "bb,");
6986 if ((which == 0 && negated)
6987 || (which == 1 && !negated))
6988 strcat (buf, "<");
6989 else
6990 strcat (buf, ">=");
6991 if (nullify)
6992 strcat (buf, ",n %0,%1,.+%4");
6993 else
6994 strcat (buf, " %0,%1,.+%4");
6995 output_asm_insn (buf, operands);
6996 return pa_output_lbranch (negated ? operands[3] : operands[2],
6997 insn, xdelay);
6999 return buf;
7002 /* This routine handles all the branch-on-variable-bit conditional branch
7003 sequences we might need to generate. It handles nullification of delay
7004 slots, varying length branches, negated branches and all combinations
7005 of the above. it returns the appropriate output template to emit the
7006 branch. */
7008 const char *
7009 pa_output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn,
7010 int which)
7012 static char buf[100];
7013 bool useskip;
7014 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7015 int length = get_attr_length (insn);
7016 int xdelay;
7018 /* A conditional branch to the following instruction (e.g. the delay slot) is
7019 asking for a disaster. I do not think this can happen as this pattern
7020 is only used when optimizing; jump optimization should eliminate the
7021 jump. But be prepared just in case. */
7023 if (branch_to_delay_slot_p (insn))
7024 return "nop";
7026 /* If this is a long branch with its delay slot unfilled, set `nullify'
7027 as it can nullify the delay slot and save a nop. */
7028 if (length == 8 && dbr_sequence_length () == 0)
7029 nullify = 1;
7031 /* If this is a short forward conditional branch which did not get
7032 its delay slot filled, the delay slot can still be nullified. */
7033 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7034 nullify = forward_branch_p (insn);
7036 /* A forward branch over a single nullified insn can be done with a
7037 extrs instruction. This avoids a single cycle penalty due to
7038 mis-predicted branch if we fall through (branch not taken). */
7039 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
7041 switch (length)
7044 /* All short conditional branches except backwards with an unfilled
7045 delay slot. */
7046 case 4:
7047 if (useskip)
7048 strcpy (buf, "{vextrs,|extrw,s,}");
7049 else
7050 strcpy (buf, "{bvb,|bb,}");
7051 if (useskip && GET_MODE (operands[0]) == DImode)
7052 strcpy (buf, "extrd,s,*");
7053 else if (GET_MODE (operands[0]) == DImode)
7054 strcpy (buf, "bb,*");
7055 if ((which == 0 && negated)
7056 || (which == 1 && ! negated))
7057 strcat (buf, ">=");
7058 else
7059 strcat (buf, "<");
7060 if (useskip)
7061 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
7062 else if (nullify && negated)
7064 if (branch_needs_nop_p (insn))
7065 strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
7066 else
7067 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
7069 else if (nullify && ! negated)
7071 if (branch_needs_nop_p (insn))
7072 strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
7073 else
7074 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
7076 else if (! nullify && negated)
7077 strcat (buf, "{ %0,%3| %0,%%sar,%3}");
7078 else if (! nullify && ! negated)
7079 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
7080 break;
7082 /* All long conditionals. Note a short backward branch with an
7083 unfilled delay slot is treated just like a long backward branch
7084 with an unfilled delay slot. */
7085 case 8:
7086 /* Handle weird backwards branch with a filled delay slot
7087 which is nullified. */
7088 if (dbr_sequence_length () != 0
7089 && ! forward_branch_p (insn)
7090 && nullify)
7092 strcpy (buf, "{bvb,|bb,}");
7093 if (GET_MODE (operands[0]) == DImode)
7094 strcat (buf, "*");
7095 if ((which == 0 && negated)
7096 || (which == 1 && ! negated))
7097 strcat (buf, "<");
7098 else
7099 strcat (buf, ">=");
7100 if (negated)
7101 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
7102 else
7103 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
7105 /* Handle short backwards branch with an unfilled delay slot.
7106 Using a bb;nop rather than extrs;bl saves 1 cycle for both
7107 taken and untaken branches. */
7108 else if (dbr_sequence_length () == 0
7109 && ! forward_branch_p (insn)
7110 && INSN_ADDRESSES_SET_P ()
7111 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7112 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7114 strcpy (buf, "{bvb,|bb,}");
7115 if (GET_MODE (operands[0]) == DImode)
7116 strcat (buf, "*");
7117 if ((which == 0 && negated)
7118 || (which == 1 && ! negated))
7119 strcat (buf, ">=");
7120 else
7121 strcat (buf, "<");
7122 if (negated)
7123 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
7124 else
7125 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
7127 else
7129 strcpy (buf, "{vextrs,|extrw,s,}");
7130 if (GET_MODE (operands[0]) == DImode)
7131 strcpy (buf, "extrd,s,*");
7132 if ((which == 0 && negated)
7133 || (which == 1 && ! negated))
7134 strcat (buf, "<");
7135 else
7136 strcat (buf, ">=");
7137 if (nullify && negated)
7138 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
7139 else if (nullify && ! negated)
7140 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
7141 else if (negated)
7142 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
7143 else
7144 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
7146 break;
7148 default:
7149 /* The reversed conditional branch must branch over one additional
7150 instruction if the delay slot is filled and needs to be extracted
7151 by pa_output_lbranch. If the delay slot is empty or this is a
7152 nullified forward branch, the instruction after the reversed
7153 condition branch must be nullified. */
7154 if (dbr_sequence_length () == 0
7155 || (nullify && forward_branch_p (insn)))
7157 nullify = 1;
7158 xdelay = 0;
7159 operands[4] = GEN_INT (length);
7161 else
7163 xdelay = 1;
7164 operands[4] = GEN_INT (length + 4);
7167 if (GET_MODE (operands[0]) == DImode)
7168 strcpy (buf, "bb,*");
7169 else
7170 strcpy (buf, "{bvb,|bb,}");
7171 if ((which == 0 && negated)
7172 || (which == 1 && !negated))
7173 strcat (buf, "<");
7174 else
7175 strcat (buf, ">=");
7176 if (nullify)
7177 strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
7178 else
7179 strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
7180 output_asm_insn (buf, operands);
7181 return pa_output_lbranch (negated ? operands[3] : operands[2],
7182 insn, xdelay);
7184 return buf;
7187 /* Return the output template for emitting a dbra type insn.
7189 Note it may perform some output operations on its own before
7190 returning the final output string. */
7191 const char *
7192 pa_output_dbra (rtx *operands, rtx_insn *insn, int which_alternative)
7194 int length = get_attr_length (insn);
7196 /* A conditional branch to the following instruction (e.g. the delay slot) is
7197 asking for a disaster. Be prepared! */
7199 if (branch_to_delay_slot_p (insn))
7201 if (which_alternative == 0)
7202 return "ldo %1(%0),%0";
7203 else if (which_alternative == 1)
7205 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
7206 output_asm_insn ("ldw -16(%%r30),%4", operands);
7207 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7208 return "{fldws|fldw} -16(%%r30),%0";
7210 else
7212 output_asm_insn ("ldw %0,%4", operands);
7213 return "ldo %1(%4),%4\n\tstw %4,%0";
7217 if (which_alternative == 0)
7219 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7220 int xdelay;
7222 /* If this is a long branch with its delay slot unfilled, set `nullify'
7223 as it can nullify the delay slot and save a nop. */
7224 if (length == 8 && dbr_sequence_length () == 0)
7225 nullify = 1;
7227 /* If this is a short forward conditional branch which did not get
7228 its delay slot filled, the delay slot can still be nullified. */
7229 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7230 nullify = forward_branch_p (insn);
7232 switch (length)
7234 case 4:
7235 if (nullify)
7237 if (branch_needs_nop_p (insn))
7238 return "addib,%C2,n %1,%0,%3%#";
7239 else
7240 return "addib,%C2,n %1,%0,%3";
7242 else
7243 return "addib,%C2 %1,%0,%3";
7245 case 8:
7246 /* Handle weird backwards branch with a fulled delay slot
7247 which is nullified. */
7248 if (dbr_sequence_length () != 0
7249 && ! forward_branch_p (insn)
7250 && nullify)
7251 return "addib,%N2,n %1,%0,.+12\n\tb %3";
7252 /* Handle short backwards branch with an unfilled delay slot.
7253 Using a addb;nop rather than addi;bl saves 1 cycle for both
7254 taken and untaken branches. */
7255 else if (dbr_sequence_length () == 0
7256 && ! forward_branch_p (insn)
7257 && INSN_ADDRESSES_SET_P ()
7258 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7259 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7260 return "addib,%C2 %1,%0,%3%#";
7262 /* Handle normal cases. */
7263 if (nullify)
7264 return "addi,%N2 %1,%0,%0\n\tb,n %3";
7265 else
7266 return "addi,%N2 %1,%0,%0\n\tb %3";
7268 default:
7269 /* The reversed conditional branch must branch over one additional
7270 instruction if the delay slot is filled and needs to be extracted
7271 by pa_output_lbranch. If the delay slot is empty or this is a
7272 nullified forward branch, the instruction after the reversed
7273 condition branch must be nullified. */
7274 if (dbr_sequence_length () == 0
7275 || (nullify && forward_branch_p (insn)))
7277 nullify = 1;
7278 xdelay = 0;
7279 operands[4] = GEN_INT (length);
7281 else
7283 xdelay = 1;
7284 operands[4] = GEN_INT (length + 4);
7287 if (nullify)
7288 output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
7289 else
7290 output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
7292 return pa_output_lbranch (operands[3], insn, xdelay);
7296 /* Deal with gross reload from FP register case. */
7297 else if (which_alternative == 1)
7299 /* Move loop counter from FP register to MEM then into a GR,
7300 increment the GR, store the GR into MEM, and finally reload
7301 the FP register from MEM from within the branch's delay slot. */
7302 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
7303 operands);
7304 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7305 if (length == 24)
7306 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
7307 else if (length == 28)
7308 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7309 else
7311 operands[5] = GEN_INT (length - 16);
7312 output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
7313 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7314 return pa_output_lbranch (operands[3], insn, 0);
7317 /* Deal with gross reload from memory case. */
7318 else
7320 /* Reload loop counter from memory, the store back to memory
7321 happens in the branch's delay slot. */
7322 output_asm_insn ("ldw %0,%4", operands);
7323 if (length == 12)
7324 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
7325 else if (length == 16)
7326 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
7327 else
7329 operands[5] = GEN_INT (length - 4);
7330 output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
7331 return pa_output_lbranch (operands[3], insn, 0);
7336 /* Return the output template for emitting a movb type insn.
7338 Note it may perform some output operations on its own before
7339 returning the final output string. */
7340 const char *
7341 pa_output_movb (rtx *operands, rtx_insn *insn, int which_alternative,
7342 int reverse_comparison)
7344 int length = get_attr_length (insn);
7346 /* A conditional branch to the following instruction (e.g. the delay slot) is
7347 asking for a disaster. Be prepared! */
7349 if (branch_to_delay_slot_p (insn))
7351 if (which_alternative == 0)
7352 return "copy %1,%0";
7353 else if (which_alternative == 1)
7355 output_asm_insn ("stw %1,-16(%%r30)", operands);
7356 return "{fldws|fldw} -16(%%r30),%0";
7358 else if (which_alternative == 2)
7359 return "stw %1,%0";
7360 else
7361 return "mtsar %r1";
7364 /* Support the second variant. */
7365 if (reverse_comparison)
7366 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
7368 if (which_alternative == 0)
7370 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7371 int xdelay;
7373 /* If this is a long branch with its delay slot unfilled, set `nullify'
7374 as it can nullify the delay slot and save a nop. */
7375 if (length == 8 && dbr_sequence_length () == 0)
7376 nullify = 1;
7378 /* If this is a short forward conditional branch which did not get
7379 its delay slot filled, the delay slot can still be nullified. */
7380 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7381 nullify = forward_branch_p (insn);
7383 switch (length)
7385 case 4:
7386 if (nullify)
7388 if (branch_needs_nop_p (insn))
7389 return "movb,%C2,n %1,%0,%3%#";
7390 else
7391 return "movb,%C2,n %1,%0,%3";
7393 else
7394 return "movb,%C2 %1,%0,%3";
7396 case 8:
7397 /* Handle weird backwards branch with a filled delay slot
7398 which is nullified. */
7399 if (dbr_sequence_length () != 0
7400 && ! forward_branch_p (insn)
7401 && nullify)
7402 return "movb,%N2,n %1,%0,.+12\n\tb %3";
7404 /* Handle short backwards branch with an unfilled delay slot.
7405 Using a movb;nop rather than or;bl saves 1 cycle for both
7406 taken and untaken branches. */
7407 else if (dbr_sequence_length () == 0
7408 && ! forward_branch_p (insn)
7409 && INSN_ADDRESSES_SET_P ()
7410 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7411 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7412 return "movb,%C2 %1,%0,%3%#";
7413 /* Handle normal cases. */
7414 if (nullify)
7415 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
7416 else
7417 return "or,%N2 %1,%%r0,%0\n\tb %3";
7419 default:
7420 /* The reversed conditional branch must branch over one additional
7421 instruction if the delay slot is filled and needs to be extracted
7422 by pa_output_lbranch. If the delay slot is empty or this is a
7423 nullified forward branch, the instruction after the reversed
7424 condition branch must be nullified. */
7425 if (dbr_sequence_length () == 0
7426 || (nullify && forward_branch_p (insn)))
7428 nullify = 1;
7429 xdelay = 0;
7430 operands[4] = GEN_INT (length);
7432 else
7434 xdelay = 1;
7435 operands[4] = GEN_INT (length + 4);
7438 if (nullify)
7439 output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
7440 else
7441 output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
7443 return pa_output_lbranch (operands[3], insn, xdelay);
7446 /* Deal with gross reload for FP destination register case. */
7447 else if (which_alternative == 1)
7449 /* Move source register to MEM, perform the branch test, then
7450 finally load the FP register from MEM from within the branch's
7451 delay slot. */
7452 output_asm_insn ("stw %1,-16(%%r30)", operands);
7453 if (length == 12)
7454 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7455 else if (length == 16)
7456 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7457 else
7459 operands[4] = GEN_INT (length - 4);
7460 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
7461 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7462 return pa_output_lbranch (operands[3], insn, 0);
7465 /* Deal with gross reload from memory case. */
7466 else if (which_alternative == 2)
7468 /* Reload loop counter from memory, the store back to memory
7469 happens in the branch's delay slot. */
7470 if (length == 8)
7471 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7472 else if (length == 12)
7473 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7474 else
7476 operands[4] = GEN_INT (length);
7477 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7478 operands);
7479 return pa_output_lbranch (operands[3], insn, 0);
7482 /* Handle SAR as a destination. */
7483 else
7485 if (length == 8)
7486 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7487 else if (length == 12)
7488 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7489 else
7491 operands[4] = GEN_INT (length);
7492 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7493 operands);
7494 return pa_output_lbranch (operands[3], insn, 0);
7499 /* Copy any FP arguments in INSN into integer registers. */
7500 static void
7501 copy_fp_args (rtx_insn *insn)
7503 rtx link;
7504 rtx xoperands[2];
7506 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7508 int arg_mode, regno;
7509 rtx use = XEXP (link, 0);
7511 if (! (GET_CODE (use) == USE
7512 && GET_CODE (XEXP (use, 0)) == REG
7513 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7514 continue;
7516 arg_mode = GET_MODE (XEXP (use, 0));
7517 regno = REGNO (XEXP (use, 0));
7519 /* Is it a floating point register? */
7520 if (regno >= 32 && regno <= 39)
7522 /* Copy the FP register into an integer register via memory. */
7523 if (arg_mode == SFmode)
7525 xoperands[0] = XEXP (use, 0);
7526 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
7527 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
7528 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7530 else
7532 xoperands[0] = XEXP (use, 0);
7533 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
7534 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
7535 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
7536 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7542 /* Compute length of the FP argument copy sequence for INSN. */
7543 static int
7544 length_fp_args (rtx_insn *insn)
7546 int length = 0;
7547 rtx link;
7549 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7551 int arg_mode, regno;
7552 rtx use = XEXP (link, 0);
7554 if (! (GET_CODE (use) == USE
7555 && GET_CODE (XEXP (use, 0)) == REG
7556 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7557 continue;
7559 arg_mode = GET_MODE (XEXP (use, 0));
7560 regno = REGNO (XEXP (use, 0));
7562 /* Is it a floating point register? */
7563 if (regno >= 32 && regno <= 39)
7565 if (arg_mode == SFmode)
7566 length += 8;
7567 else
7568 length += 12;
7572 return length;
7575 /* Return the attribute length for the millicode call instruction INSN.
7576 The length must match the code generated by pa_output_millicode_call.
7577 We include the delay slot in the returned length as it is better to
7578 over estimate the length than to under estimate it. */
7581 pa_attr_length_millicode_call (rtx_insn *insn)
7583 unsigned long distance = -1;
7584 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7586 if (INSN_ADDRESSES_SET_P ())
7588 distance = (total + insn_current_reference_address (insn));
7589 if (distance < total)
7590 distance = -1;
7593 if (TARGET_64BIT)
7595 if (!TARGET_LONG_CALLS && distance < 7600000)
7596 return 8;
7598 return 20;
7600 else if (TARGET_PORTABLE_RUNTIME)
7601 return 24;
7602 else
7604 if (!TARGET_LONG_CALLS && distance < MAX_PCREL17F_OFFSET)
7605 return 8;
7607 if (!flag_pic)
7608 return 12;
7610 return 24;
7614 /* INSN is a function call.
7616 CALL_DEST is the routine we are calling. */
7618 const char *
7619 pa_output_millicode_call (rtx_insn *insn, rtx call_dest)
7621 int attr_length = get_attr_length (insn);
7622 int seq_length = dbr_sequence_length ();
7623 rtx xoperands[3];
7625 xoperands[0] = call_dest;
7626 xoperands[2] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
7628 /* Handle the common case where we are sure that the branch will
7629 reach the beginning of the $CODE$ subspace. The within reach
7630 form of the $$sh_func_adrs call has a length of 28. Because it
7631 has an attribute type of sh_func_adrs, it never has a nonzero
7632 sequence length (i.e., the delay slot is never filled). */
7633 if (!TARGET_LONG_CALLS
7634 && (attr_length == 8
7635 || (attr_length == 28
7636 && get_attr_type (insn) == TYPE_SH_FUNC_ADRS)))
7638 output_asm_insn ("{bl|b,l} %0,%2", xoperands);
7640 else
7642 if (TARGET_64BIT)
7644 /* It might seem that one insn could be saved by accessing
7645 the millicode function using the linkage table. However,
7646 this doesn't work in shared libraries and other dynamically
7647 loaded objects. Using a pc-relative sequence also avoids
7648 problems related to the implicit use of the gp register. */
7649 output_asm_insn ("b,l .+8,%%r1", xoperands);
7651 if (TARGET_GAS)
7653 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
7654 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
7656 else
7658 xoperands[1] = gen_label_rtx ();
7659 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7660 targetm.asm_out.internal_label (asm_out_file, "L",
7661 CODE_LABEL_NUMBER (xoperands[1]));
7662 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7665 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7667 else if (TARGET_PORTABLE_RUNTIME)
7669 /* Pure portable runtime doesn't allow be/ble; we also don't
7670 have PIC support in the assembler/linker, so this sequence
7671 is needed. */
7673 /* Get the address of our target into %r1. */
7674 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7675 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7677 /* Get our return address into %r31. */
7678 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7679 output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
7681 /* Jump to our target address in %r1. */
7682 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7684 else if (!flag_pic)
7686 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7687 if (TARGET_PA_20)
7688 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
7689 else
7690 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7692 else
7694 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7695 output_asm_insn ("addi 16,%%r1,%%r31", xoperands);
7697 if (TARGET_SOM || !TARGET_GAS)
7699 /* The HP assembler can generate relocations for the
7700 difference of two symbols. GAS can do this for a
7701 millicode symbol but not an arbitrary external
7702 symbol when generating SOM output. */
7703 xoperands[1] = gen_label_rtx ();
7704 targetm.asm_out.internal_label (asm_out_file, "L",
7705 CODE_LABEL_NUMBER (xoperands[1]));
7706 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7707 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7709 else
7711 output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands);
7712 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1",
7713 xoperands);
7716 /* Jump to our target address in %r1. */
7717 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7721 if (seq_length == 0)
7722 output_asm_insn ("nop", xoperands);
7724 return "";
7727 /* Return the attribute length of the call instruction INSN. The SIBCALL
7728 flag indicates whether INSN is a regular call or a sibling call. The
7729 length returned must be longer than the code actually generated by
7730 pa_output_call. Since branch shortening is done before delay branch
7731 sequencing, there is no way to determine whether or not the delay
7732 slot will be filled during branch shortening. Even when the delay
7733 slot is filled, we may have to add a nop if the delay slot contains
7734 a branch that can't reach its target. Thus, we always have to include
7735 the delay slot in the length estimate. This used to be done in
7736 pa_adjust_insn_length but we do it here now as some sequences always
7737 fill the delay slot and we can save four bytes in the estimate for
7738 these sequences. */
7741 pa_attr_length_call (rtx_insn *insn, int sibcall)
7743 int local_call;
7744 rtx call, call_dest;
7745 tree call_decl;
7746 int length = 0;
7747 rtx pat = PATTERN (insn);
7748 unsigned long distance = -1;
7750 gcc_assert (CALL_P (insn));
7752 if (INSN_ADDRESSES_SET_P ())
7754 unsigned long total;
7756 total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7757 distance = (total + insn_current_reference_address (insn));
7758 if (distance < total)
7759 distance = -1;
7762 gcc_assert (GET_CODE (pat) == PARALLEL);
7764 /* Get the call rtx. */
7765 call = XVECEXP (pat, 0, 0);
7766 if (GET_CODE (call) == SET)
7767 call = SET_SRC (call);
7769 gcc_assert (GET_CODE (call) == CALL);
7771 /* Determine if this is a local call. */
7772 call_dest = XEXP (XEXP (call, 0), 0);
7773 call_decl = SYMBOL_REF_DECL (call_dest);
7774 local_call = call_decl && targetm.binds_local_p (call_decl);
7776 /* pc-relative branch. */
7777 if (!TARGET_LONG_CALLS
7778 && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7779 || distance < MAX_PCREL17F_OFFSET))
7780 length += 8;
7782 /* 64-bit plabel sequence. */
7783 else if (TARGET_64BIT && !local_call)
7784 length += sibcall ? 28 : 24;
7786 /* non-pic long absolute branch sequence. */
7787 else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7788 length += 12;
7790 /* long pc-relative branch sequence. */
7791 else if (TARGET_LONG_PIC_SDIFF_CALL
7792 || (TARGET_GAS && !TARGET_SOM
7793 && (TARGET_LONG_PIC_PCREL_CALL || local_call)))
7795 length += 20;
7797 if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7798 length += 8;
7801 /* 32-bit plabel sequence. */
7802 else
7804 length += 32;
7806 if (TARGET_SOM)
7807 length += length_fp_args (insn);
7809 if (flag_pic)
7810 length += 4;
7812 if (!TARGET_PA_20)
7814 if (!sibcall)
7815 length += 8;
7817 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7818 length += 8;
7822 return length;
7825 /* INSN is a function call.
7827 CALL_DEST is the routine we are calling. */
7829 const char *
7830 pa_output_call (rtx_insn *insn, rtx call_dest, int sibcall)
7832 int seq_length = dbr_sequence_length ();
7833 tree call_decl = SYMBOL_REF_DECL (call_dest);
7834 int local_call = call_decl && targetm.binds_local_p (call_decl);
7835 rtx xoperands[2];
7837 xoperands[0] = call_dest;
7839 /* Handle the common case where we're sure that the branch will reach
7840 the beginning of the "$CODE$" subspace. This is the beginning of
7841 the current function if we are in a named section. */
7842 if (!TARGET_LONG_CALLS && pa_attr_length_call (insn, sibcall) == 8)
7844 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
7845 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7847 else
7849 if (TARGET_64BIT && !local_call)
7851 /* ??? As far as I can tell, the HP linker doesn't support the
7852 long pc-relative sequence described in the 64-bit runtime
7853 architecture. So, we use a slightly longer indirect call. */
7854 xoperands[0] = pa_get_deferred_plabel (call_dest);
7855 xoperands[1] = gen_label_rtx ();
7857 /* If this isn't a sibcall, we put the load of %r27 into the
7858 delay slot. We can't do this in a sibcall as we don't
7859 have a second call-clobbered scratch register available.
7860 We don't need to do anything when generating fast indirect
7861 calls. */
7862 if (seq_length != 0 && !sibcall)
7864 final_scan_insn (NEXT_INSN (insn), asm_out_file,
7865 optimize, 0, NULL);
7867 /* Now delete the delay insn. */
7868 SET_INSN_DELETED (NEXT_INSN (insn));
7869 seq_length = 0;
7872 output_asm_insn ("addil LT'%0,%%r27", xoperands);
7873 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
7874 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
7876 if (sibcall)
7878 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7879 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands);
7880 output_asm_insn ("bve (%%r1)", xoperands);
7882 else
7884 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
7885 output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
7886 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7887 seq_length = 1;
7890 else
7892 int indirect_call = 0;
7894 /* Emit a long call. There are several different sequences
7895 of increasing length and complexity. In most cases,
7896 they don't allow an instruction in the delay slot. */
7897 if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7898 && !TARGET_LONG_PIC_SDIFF_CALL
7899 && !(TARGET_GAS && !TARGET_SOM
7900 && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7901 && !TARGET_64BIT)
7902 indirect_call = 1;
7904 if (seq_length != 0
7905 && !sibcall
7906 && (!TARGET_PA_20
7907 || indirect_call
7908 || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)))
7910 /* A non-jump insn in the delay slot. By definition we can
7911 emit this insn before the call (and in fact before argument
7912 relocating. */
7913 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
7914 NULL);
7916 /* Now delete the delay insn. */
7917 SET_INSN_DELETED (NEXT_INSN (insn));
7918 seq_length = 0;
7921 if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7923 /* This is the best sequence for making long calls in
7924 non-pic code. Unfortunately, GNU ld doesn't provide
7925 the stub needed for external calls, and GAS's support
7926 for this with the SOM linker is buggy. It is safe
7927 to use this for local calls. */
7928 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7929 if (sibcall)
7930 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
7931 else
7933 if (TARGET_PA_20)
7934 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
7935 xoperands);
7936 else
7937 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7939 output_asm_insn ("copy %%r31,%%r2", xoperands);
7940 seq_length = 1;
7943 else
7945 if (TARGET_LONG_PIC_SDIFF_CALL)
7947 /* The HP assembler and linker can handle relocations
7948 for the difference of two symbols. The HP assembler
7949 recognizes the sequence as a pc-relative call and
7950 the linker provides stubs when needed. */
7951 xoperands[1] = gen_label_rtx ();
7952 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7953 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7954 targetm.asm_out.internal_label (asm_out_file, "L",
7955 CODE_LABEL_NUMBER (xoperands[1]));
7956 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7958 else if (TARGET_GAS && !TARGET_SOM
7959 && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7961 /* GAS currently can't generate the relocations that
7962 are needed for the SOM linker under HP-UX using this
7963 sequence. The GNU linker doesn't generate the stubs
7964 that are needed for external calls on TARGET_ELF32
7965 with this sequence. For now, we have to use a
7966 longer plabel sequence when using GAS. */
7967 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7968 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1",
7969 xoperands);
7970 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1",
7971 xoperands);
7973 else
7975 /* Emit a long plabel-based call sequence. This is
7976 essentially an inline implementation of $$dyncall.
7977 We don't actually try to call $$dyncall as this is
7978 as difficult as calling the function itself. */
7979 xoperands[0] = pa_get_deferred_plabel (call_dest);
7980 xoperands[1] = gen_label_rtx ();
7982 /* Since the call is indirect, FP arguments in registers
7983 need to be copied to the general registers. Then, the
7984 argument relocation stub will copy them back. */
7985 if (TARGET_SOM)
7986 copy_fp_args (insn);
7988 if (flag_pic)
7990 output_asm_insn ("addil LT'%0,%%r19", xoperands);
7991 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
7992 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands);
7994 else
7996 output_asm_insn ("addil LR'%0-$global$,%%r27",
7997 xoperands);
7998 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
7999 xoperands);
8002 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands);
8003 output_asm_insn ("depi 0,31,2,%%r1", xoperands);
8004 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands);
8005 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands);
8007 if (!sibcall && !TARGET_PA_20)
8009 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
8010 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8011 output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
8012 else
8013 output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
8017 if (TARGET_PA_20)
8019 if (sibcall)
8020 output_asm_insn ("bve (%%r1)", xoperands);
8021 else
8023 if (indirect_call)
8025 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8026 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
8027 seq_length = 1;
8029 else
8030 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8033 else
8035 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
8036 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
8037 xoperands);
8039 if (sibcall)
8041 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8042 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
8043 else
8044 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
8046 else
8048 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8049 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
8050 else
8051 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
8053 if (indirect_call)
8054 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
8055 else
8056 output_asm_insn ("copy %%r31,%%r2", xoperands);
8057 seq_length = 1;
8064 if (seq_length == 0)
8065 output_asm_insn ("nop", xoperands);
8067 return "";
8070 /* Return the attribute length of the indirect call instruction INSN.
8071 The length must match the code generated by output_indirect call.
8072 The returned length includes the delay slot. Currently, the delay
8073 slot of an indirect call sequence is not exposed and it is used by
8074 the sequence itself. */
8077 pa_attr_length_indirect_call (rtx_insn *insn)
8079 unsigned long distance = -1;
8080 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
8082 if (INSN_ADDRESSES_SET_P ())
8084 distance = (total + insn_current_reference_address (insn));
8085 if (distance < total)
8086 distance = -1;
8089 if (TARGET_64BIT)
8090 return 12;
8092 if (TARGET_FAST_INDIRECT_CALLS
8093 || (!TARGET_LONG_CALLS
8094 && !TARGET_PORTABLE_RUNTIME
8095 && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
8096 || distance < MAX_PCREL17F_OFFSET)))
8097 return 8;
8099 if (flag_pic)
8100 return 20;
8102 if (TARGET_PORTABLE_RUNTIME)
8103 return 16;
8105 /* Out of reach, can use ble. */
8106 return 12;
8109 const char *
8110 pa_output_indirect_call (rtx_insn *insn, rtx call_dest)
8112 rtx xoperands[1];
8114 if (TARGET_64BIT)
8116 xoperands[0] = call_dest;
8117 output_asm_insn ("ldd 16(%0),%%r2", xoperands);
8118 output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands);
8119 return "";
8122 /* First the special case for kernels, level 0 systems, etc. */
8123 if (TARGET_FAST_INDIRECT_CALLS)
8124 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8126 /* Now the normal case -- we can reach $$dyncall directly or
8127 we're sure that we can get there via a long-branch stub.
8129 No need to check target flags as the length uniquely identifies
8130 the remaining cases. */
8131 if (pa_attr_length_indirect_call (insn) == 8)
8133 /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
8134 $$dyncall. Since BLE uses %r31 as the link register, the 22-bit
8135 variant of the B,L instruction can't be used on the SOM target. */
8136 if (TARGET_PA_20 && !TARGET_SOM)
8137 return ".CALL\tARGW0=GR\n\tb,l $$dyncall,%%r2\n\tcopy %%r2,%%r31";
8138 else
8139 return ".CALL\tARGW0=GR\n\tbl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
8142 /* Long millicode call, but we are not generating PIC or portable runtime
8143 code. */
8144 if (pa_attr_length_indirect_call (insn) == 12)
8145 return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
8147 /* Long millicode call for portable runtime. */
8148 if (pa_attr_length_indirect_call (insn) == 16)
8149 return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)";
8151 /* We need a long PIC call to $$dyncall. */
8152 xoperands[0] = NULL_RTX;
8153 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
8154 if (TARGET_SOM || !TARGET_GAS)
8156 xoperands[0] = gen_label_rtx ();
8157 output_asm_insn ("addil L'$$dyncall-%0,%%r2", xoperands);
8158 targetm.asm_out.internal_label (asm_out_file, "L",
8159 CODE_LABEL_NUMBER (xoperands[0]));
8160 output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands);
8162 else
8164 output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r2", xoperands);
8165 output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
8166 xoperands);
8168 output_asm_insn ("bv %%r0(%%r1)", xoperands);
8169 output_asm_insn ("ldo 12(%%r2),%%r2", xoperands);
8170 return "";
8173 /* In HPUX 8.0's shared library scheme, special relocations are needed
8174 for function labels if they might be passed to a function
8175 in a shared library (because shared libraries don't live in code
8176 space), and special magic is needed to construct their address. */
8178 void
8179 pa_encode_label (rtx sym)
8181 const char *str = XSTR (sym, 0);
8182 int len = strlen (str) + 1;
8183 char *newstr, *p;
8185 p = newstr = XALLOCAVEC (char, len + 1);
8186 *p++ = '@';
8187 strcpy (p, str);
8189 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
8192 static void
8193 pa_encode_section_info (tree decl, rtx rtl, int first)
8195 int old_referenced = 0;
8197 if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF)
8198 old_referenced
8199 = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED;
8201 default_encode_section_info (decl, rtl, first);
8203 if (first && TEXT_SPACE_P (decl))
8205 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
8206 if (TREE_CODE (decl) == FUNCTION_DECL)
8207 pa_encode_label (XEXP (rtl, 0));
8209 else if (old_referenced)
8210 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced;
8213 /* This is sort of inverse to pa_encode_section_info. */
8215 static const char *
8216 pa_strip_name_encoding (const char *str)
8218 str += (*str == '@');
8219 str += (*str == '*');
8220 return str;
8223 /* Returns 1 if OP is a function label involved in a simple addition
8224 with a constant. Used to keep certain patterns from matching
8225 during instruction combination. */
8227 pa_is_function_label_plus_const (rtx op)
8229 /* Strip off any CONST. */
8230 if (GET_CODE (op) == CONST)
8231 op = XEXP (op, 0);
8233 return (GET_CODE (op) == PLUS
8234 && function_label_operand (XEXP (op, 0), VOIDmode)
8235 && GET_CODE (XEXP (op, 1)) == CONST_INT);
8238 /* Output assembly code for a thunk to FUNCTION. */
8240 static void
8241 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
8242 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
8243 tree function)
8245 static unsigned int current_thunk_number;
8246 int val_14 = VAL_14_BITS_P (delta);
8247 unsigned int old_last_address = last_address, nbytes = 0;
8248 char label[16];
8249 rtx xoperands[4];
8251 xoperands[0] = XEXP (DECL_RTL (function), 0);
8252 xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
8253 xoperands[2] = GEN_INT (delta);
8255 final_start_function (emit_barrier (), file, 1);
8257 /* Output the thunk. We know that the function is in the same
8258 translation unit (i.e., the same space) as the thunk, and that
8259 thunks are output after their method. Thus, we don't need an
8260 external branch to reach the function. With SOM and GAS,
8261 functions and thunks are effectively in different sections.
8262 Thus, we can always use a IA-relative branch and the linker
8263 will add a long branch stub if necessary.
8265 However, we have to be careful when generating PIC code on the
8266 SOM port to ensure that the sequence does not transfer to an
8267 import stub for the target function as this could clobber the
8268 return value saved at SP-24. This would also apply to the
8269 32-bit linux port if the multi-space model is implemented. */
8270 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8271 && !(flag_pic && TREE_PUBLIC (function))
8272 && (TARGET_GAS || last_address < 262132))
8273 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8274 && ((targetm_common.have_named_sections
8275 && DECL_SECTION_NAME (thunk_fndecl) != NULL
8276 /* The GNU 64-bit linker has rather poor stub management.
8277 So, we use a long branch from thunks that aren't in
8278 the same section as the target function. */
8279 && ((!TARGET_64BIT
8280 && (DECL_SECTION_NAME (thunk_fndecl)
8281 != DECL_SECTION_NAME (function)))
8282 || ((DECL_SECTION_NAME (thunk_fndecl)
8283 == DECL_SECTION_NAME (function))
8284 && last_address < 262132)))
8285 /* In this case, we need to be able to reach the start of
8286 the stub table even though the function is likely closer
8287 and can be jumped to directly. */
8288 || (targetm_common.have_named_sections
8289 && DECL_SECTION_NAME (thunk_fndecl) == NULL
8290 && DECL_SECTION_NAME (function) == NULL
8291 && total_code_bytes < MAX_PCREL17F_OFFSET)
8292 /* Likewise. */
8293 || (!targetm_common.have_named_sections
8294 && total_code_bytes < MAX_PCREL17F_OFFSET))))
8296 if (!val_14)
8297 output_asm_insn ("addil L'%2,%%r26", xoperands);
8299 output_asm_insn ("b %0", xoperands);
8301 if (val_14)
8303 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8304 nbytes += 8;
8306 else
8308 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8309 nbytes += 12;
8312 else if (TARGET_64BIT)
8314 /* We only have one call-clobbered scratch register, so we can't
8315 make use of the delay slot if delta doesn't fit in 14 bits. */
8316 if (!val_14)
8318 output_asm_insn ("addil L'%2,%%r26", xoperands);
8319 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8322 output_asm_insn ("b,l .+8,%%r1", xoperands);
8324 if (TARGET_GAS)
8326 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
8327 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
8329 else
8331 xoperands[3] = GEN_INT (val_14 ? 8 : 16);
8332 output_asm_insn ("addil L'%0-%1-%3,%%r1", xoperands);
8335 if (val_14)
8337 output_asm_insn ("bv %%r0(%%r1)", xoperands);
8338 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8339 nbytes += 20;
8341 else
8343 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8344 nbytes += 24;
8347 else if (TARGET_PORTABLE_RUNTIME)
8349 output_asm_insn ("ldil L'%0,%%r1", xoperands);
8350 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
8352 if (!val_14)
8353 output_asm_insn ("addil L'%2,%%r26", xoperands);
8355 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8357 if (val_14)
8359 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8360 nbytes += 16;
8362 else
8364 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8365 nbytes += 20;
8368 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8370 /* The function is accessible from outside this module. The only
8371 way to avoid an import stub between the thunk and function is to
8372 call the function directly with an indirect sequence similar to
8373 that used by $$dyncall. This is possible because $$dyncall acts
8374 as the import stub in an indirect call. */
8375 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8376 xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8377 output_asm_insn ("addil LT'%3,%%r19", xoperands);
8378 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8379 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8380 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8381 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8382 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8383 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8385 if (!val_14)
8387 output_asm_insn ("addil L'%2,%%r26", xoperands);
8388 nbytes += 4;
8391 if (TARGET_PA_20)
8393 output_asm_insn ("bve (%%r22)", xoperands);
8394 nbytes += 36;
8396 else if (TARGET_NO_SPACE_REGS)
8398 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
8399 nbytes += 36;
8401 else
8403 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8404 output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8405 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
8406 nbytes += 44;
8409 if (val_14)
8410 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8411 else
8412 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8414 else if (flag_pic)
8416 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
8418 if (TARGET_SOM || !TARGET_GAS)
8420 output_asm_insn ("addil L'%0-%1-8,%%r1", xoperands);
8421 output_asm_insn ("ldo R'%0-%1-8(%%r1),%%r22", xoperands);
8423 else
8425 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
8426 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r22", xoperands);
8429 if (!val_14)
8430 output_asm_insn ("addil L'%2,%%r26", xoperands);
8432 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8434 if (val_14)
8436 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8437 nbytes += 20;
8439 else
8441 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8442 nbytes += 24;
8445 else
8447 if (!val_14)
8448 output_asm_insn ("addil L'%2,%%r26", xoperands);
8450 output_asm_insn ("ldil L'%0,%%r22", xoperands);
8451 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8453 if (val_14)
8455 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8456 nbytes += 12;
8458 else
8460 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8461 nbytes += 16;
8465 final_end_function ();
8467 if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8469 switch_to_section (data_section);
8470 output_asm_insn (".align 4", xoperands);
8471 ASM_OUTPUT_LABEL (file, label);
8472 output_asm_insn (".word P'%0", xoperands);
8475 current_thunk_number++;
8476 nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
8477 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
8478 last_address += nbytes;
8479 if (old_last_address > last_address)
8480 last_address = UINT_MAX;
8481 update_total_code_bytes (nbytes);
8484 /* Only direct calls to static functions are allowed to be sibling (tail)
8485 call optimized.
8487 This restriction is necessary because some linker generated stubs will
8488 store return pointers into rp' in some cases which might clobber a
8489 live value already in rp'.
8491 In a sibcall the current function and the target function share stack
8492 space. Thus if the path to the current function and the path to the
8493 target function save a value in rp', they save the value into the
8494 same stack slot, which has undesirable consequences.
8496 Because of the deferred binding nature of shared libraries any function
8497 with external scope could be in a different load module and thus require
8498 rp' to be saved when calling that function. So sibcall optimizations
8499 can only be safe for static function.
8501 Note that GCC never needs return value relocations, so we don't have to
8502 worry about static calls with return value relocations (which require
8503 saving rp').
8505 It is safe to perform a sibcall optimization when the target function
8506 will never return. */
8507 static bool
8508 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8510 if (TARGET_PORTABLE_RUNTIME)
8511 return false;
8513 /* Sibcalls are ok for TARGET_ELF32 as along as the linker is used in
8514 single subspace mode and the call is not indirect. As far as I know,
8515 there is no operating system support for the multiple subspace mode.
8516 It might be possible to support indirect calls if we didn't use
8517 $$dyncall (see the indirect sequence generated in pa_output_call). */
8518 if (TARGET_ELF32)
8519 return (decl != NULL_TREE);
8521 /* Sibcalls are not ok because the arg pointer register is not a fixed
8522 register. This prevents the sibcall optimization from occurring. In
8523 addition, there are problems with stub placement using GNU ld. This
8524 is because a normal sibcall branch uses a 17-bit relocation while
8525 a regular call branch uses a 22-bit relocation. As a result, more
8526 care needs to be taken in the placement of long-branch stubs. */
8527 if (TARGET_64BIT)
8528 return false;
8530 /* Sibcalls are only ok within a translation unit. */
8531 return (decl && !TREE_PUBLIC (decl));
8534 /* ??? Addition is not commutative on the PA due to the weird implicit
8535 space register selection rules for memory addresses. Therefore, we
8536 don't consider a + b == b + a, as this might be inside a MEM. */
8537 static bool
8538 pa_commutative_p (const_rtx x, int outer_code)
8540 return (COMMUTATIVE_P (x)
8541 && (TARGET_NO_SPACE_REGS
8542 || (outer_code != UNKNOWN && outer_code != MEM)
8543 || GET_CODE (x) != PLUS));
8546 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8547 use in fmpyadd instructions. */
8549 pa_fmpyaddoperands (rtx *operands)
8551 machine_mode mode = GET_MODE (operands[0]);
8553 /* Must be a floating point mode. */
8554 if (mode != SFmode && mode != DFmode)
8555 return 0;
8557 /* All modes must be the same. */
8558 if (! (mode == GET_MODE (operands[1])
8559 && mode == GET_MODE (operands[2])
8560 && mode == GET_MODE (operands[3])
8561 && mode == GET_MODE (operands[4])
8562 && mode == GET_MODE (operands[5])))
8563 return 0;
8565 /* All operands must be registers. */
8566 if (! (GET_CODE (operands[1]) == REG
8567 && GET_CODE (operands[2]) == REG
8568 && GET_CODE (operands[3]) == REG
8569 && GET_CODE (operands[4]) == REG
8570 && GET_CODE (operands[5]) == REG))
8571 return 0;
8573 /* Only 2 real operands to the addition. One of the input operands must
8574 be the same as the output operand. */
8575 if (! rtx_equal_p (operands[3], operands[4])
8576 && ! rtx_equal_p (operands[3], operands[5]))
8577 return 0;
8579 /* Inout operand of add cannot conflict with any operands from multiply. */
8580 if (rtx_equal_p (operands[3], operands[0])
8581 || rtx_equal_p (operands[3], operands[1])
8582 || rtx_equal_p (operands[3], operands[2]))
8583 return 0;
8585 /* multiply cannot feed into addition operands. */
8586 if (rtx_equal_p (operands[4], operands[0])
8587 || rtx_equal_p (operands[5], operands[0]))
8588 return 0;
8590 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8591 if (mode == SFmode
8592 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8593 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8594 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8595 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8596 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8597 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8598 return 0;
8600 /* Passed. Operands are suitable for fmpyadd. */
8601 return 1;
8604 #if !defined(USE_COLLECT2)
8605 static void
8606 pa_asm_out_constructor (rtx symbol, int priority)
8608 if (!function_label_operand (symbol, VOIDmode))
8609 pa_encode_label (symbol);
8611 #ifdef CTORS_SECTION_ASM_OP
8612 default_ctor_section_asm_out_constructor (symbol, priority);
8613 #else
8614 # ifdef TARGET_ASM_NAMED_SECTION
8615 default_named_section_asm_out_constructor (symbol, priority);
8616 # else
8617 default_stabs_asm_out_constructor (symbol, priority);
8618 # endif
8619 #endif
8622 static void
8623 pa_asm_out_destructor (rtx symbol, int priority)
8625 if (!function_label_operand (symbol, VOIDmode))
8626 pa_encode_label (symbol);
8628 #ifdef DTORS_SECTION_ASM_OP
8629 default_dtor_section_asm_out_destructor (symbol, priority);
8630 #else
8631 # ifdef TARGET_ASM_NAMED_SECTION
8632 default_named_section_asm_out_destructor (symbol, priority);
8633 # else
8634 default_stabs_asm_out_destructor (symbol, priority);
8635 # endif
8636 #endif
8638 #endif
8640 /* This function places uninitialized global data in the bss section.
8641 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
8642 function on the SOM port to prevent uninitialized global data from
8643 being placed in the data section. */
8645 void
8646 pa_asm_output_aligned_bss (FILE *stream,
8647 const char *name,
8648 unsigned HOST_WIDE_INT size,
8649 unsigned int align)
8651 switch_to_section (bss_section);
8652 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8654 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
8655 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
8656 #endif
8658 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
8659 ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
8660 #endif
8662 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8663 ASM_OUTPUT_LABEL (stream, name);
8664 fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8667 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
8668 that doesn't allow the alignment of global common storage to be directly
8669 specified. The SOM linker aligns common storage based on the rounded
8670 value of the NUM_BYTES parameter in the .comm directive. It's not
8671 possible to use the .align directive as it doesn't affect the alignment
8672 of the label associated with a .comm directive. */
8674 void
8675 pa_asm_output_aligned_common (FILE *stream,
8676 const char *name,
8677 unsigned HOST_WIDE_INT size,
8678 unsigned int align)
8680 unsigned int max_common_align;
8682 max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
8683 if (align > max_common_align)
8685 warning (0, "alignment (%u) for %s exceeds maximum alignment "
8686 "for global common data. Using %u",
8687 align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT);
8688 align = max_common_align;
8691 switch_to_section (bss_section);
8693 assemble_name (stream, name);
8694 fprintf (stream, "\t.comm " HOST_WIDE_INT_PRINT_UNSIGNED"\n",
8695 MAX (size, align / BITS_PER_UNIT));
8698 /* We can't use .comm for local common storage as the SOM linker effectively
8699 treats the symbol as universal and uses the same storage for local symbols
8700 with the same name in different object files. The .block directive
8701 reserves an uninitialized block of storage. However, it's not common
8702 storage. Fortunately, GCC never requests common storage with the same
8703 name in any given translation unit. */
8705 void
8706 pa_asm_output_aligned_local (FILE *stream,
8707 const char *name,
8708 unsigned HOST_WIDE_INT size,
8709 unsigned int align)
8711 switch_to_section (bss_section);
8712 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8714 #ifdef LOCAL_ASM_OP
8715 fprintf (stream, "%s", LOCAL_ASM_OP);
8716 assemble_name (stream, name);
8717 fprintf (stream, "\n");
8718 #endif
8720 ASM_OUTPUT_LABEL (stream, name);
8721 fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8724 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8725 use in fmpysub instructions. */
8727 pa_fmpysuboperands (rtx *operands)
8729 machine_mode mode = GET_MODE (operands[0]);
8731 /* Must be a floating point mode. */
8732 if (mode != SFmode && mode != DFmode)
8733 return 0;
8735 /* All modes must be the same. */
8736 if (! (mode == GET_MODE (operands[1])
8737 && mode == GET_MODE (operands[2])
8738 && mode == GET_MODE (operands[3])
8739 && mode == GET_MODE (operands[4])
8740 && mode == GET_MODE (operands[5])))
8741 return 0;
8743 /* All operands must be registers. */
8744 if (! (GET_CODE (operands[1]) == REG
8745 && GET_CODE (operands[2]) == REG
8746 && GET_CODE (operands[3]) == REG
8747 && GET_CODE (operands[4]) == REG
8748 && GET_CODE (operands[5]) == REG))
8749 return 0;
8751 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
8752 operation, so operands[4] must be the same as operand[3]. */
8753 if (! rtx_equal_p (operands[3], operands[4]))
8754 return 0;
8756 /* multiply cannot feed into subtraction. */
8757 if (rtx_equal_p (operands[5], operands[0]))
8758 return 0;
8760 /* Inout operand of sub cannot conflict with any operands from multiply. */
8761 if (rtx_equal_p (operands[3], operands[0])
8762 || rtx_equal_p (operands[3], operands[1])
8763 || rtx_equal_p (operands[3], operands[2]))
8764 return 0;
8766 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8767 if (mode == SFmode
8768 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8769 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8770 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8771 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8772 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8773 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8774 return 0;
8776 /* Passed. Operands are suitable for fmpysub. */
8777 return 1;
8780 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
8781 constants for a MULT embedded inside a memory address. */
8783 pa_mem_shadd_constant_p (int val)
8785 if (val == 2 || val == 4 || val == 8)
8786 return 1;
8787 else
8788 return 0;
8791 /* Return 1 if the given constant is 1, 2, or 3. These are the valid
8792 constants for shadd instructions. */
8794 pa_shadd_constant_p (int val)
8796 if (val == 1 || val == 2 || val == 3)
8797 return 1;
8798 else
8799 return 0;
8802 /* Return TRUE if INSN branches forward. */
8804 static bool
8805 forward_branch_p (rtx_insn *insn)
8807 rtx lab = JUMP_LABEL (insn);
8809 /* The INSN must have a jump label. */
8810 gcc_assert (lab != NULL_RTX);
8812 if (INSN_ADDRESSES_SET_P ())
8813 return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn));
8815 while (insn)
8817 if (insn == lab)
8818 return true;
8819 else
8820 insn = NEXT_INSN (insn);
8823 return false;
8826 /* Output an unconditional move and branch insn. */
8828 const char *
8829 pa_output_parallel_movb (rtx *operands, rtx_insn *insn)
8831 int length = get_attr_length (insn);
8833 /* These are the cases in which we win. */
8834 if (length == 4)
8835 return "mov%I1b,tr %1,%0,%2";
8837 /* None of the following cases win, but they don't lose either. */
8838 if (length == 8)
8840 if (dbr_sequence_length () == 0)
8842 /* Nothing in the delay slot, fake it by putting the combined
8843 insn (the copy or add) in the delay slot of a bl. */
8844 if (GET_CODE (operands[1]) == CONST_INT)
8845 return "b %2\n\tldi %1,%0";
8846 else
8847 return "b %2\n\tcopy %1,%0";
8849 else
8851 /* Something in the delay slot, but we've got a long branch. */
8852 if (GET_CODE (operands[1]) == CONST_INT)
8853 return "ldi %1,%0\n\tb %2";
8854 else
8855 return "copy %1,%0\n\tb %2";
8859 if (GET_CODE (operands[1]) == CONST_INT)
8860 output_asm_insn ("ldi %1,%0", operands);
8861 else
8862 output_asm_insn ("copy %1,%0", operands);
8863 return pa_output_lbranch (operands[2], insn, 1);
8866 /* Output an unconditional add and branch insn. */
8868 const char *
8869 pa_output_parallel_addb (rtx *operands, rtx_insn *insn)
8871 int length = get_attr_length (insn);
8873 /* To make life easy we want operand0 to be the shared input/output
8874 operand and operand1 to be the readonly operand. */
8875 if (operands[0] == operands[1])
8876 operands[1] = operands[2];
8878 /* These are the cases in which we win. */
8879 if (length == 4)
8880 return "add%I1b,tr %1,%0,%3";
8882 /* None of the following cases win, but they don't lose either. */
8883 if (length == 8)
8885 if (dbr_sequence_length () == 0)
8886 /* Nothing in the delay slot, fake it by putting the combined
8887 insn (the copy or add) in the delay slot of a bl. */
8888 return "b %3\n\tadd%I1 %1,%0,%0";
8889 else
8890 /* Something in the delay slot, but we've got a long branch. */
8891 return "add%I1 %1,%0,%0\n\tb %3";
8894 output_asm_insn ("add%I1 %1,%0,%0", operands);
8895 return pa_output_lbranch (operands[3], insn, 1);
8898 /* We use this hook to perform a PA specific optimization which is difficult
8899 to do in earlier passes. */
8901 static void
8902 pa_reorg (void)
8904 remove_useless_addtr_insns (1);
8906 if (pa_cpu < PROCESSOR_8000)
8907 pa_combine_instructions ();
8910 /* The PA has a number of odd instructions which can perform multiple
8911 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
8912 it may be profitable to combine two instructions into one instruction
8913 with two outputs. It's not profitable PA2.0 machines because the
8914 two outputs would take two slots in the reorder buffers.
8916 This routine finds instructions which can be combined and combines
8917 them. We only support some of the potential combinations, and we
8918 only try common ways to find suitable instructions.
8920 * addb can add two registers or a register and a small integer
8921 and jump to a nearby (+-8k) location. Normally the jump to the
8922 nearby location is conditional on the result of the add, but by
8923 using the "true" condition we can make the jump unconditional.
8924 Thus addb can perform two independent operations in one insn.
8926 * movb is similar to addb in that it can perform a reg->reg
8927 or small immediate->reg copy and jump to a nearby (+-8k location).
8929 * fmpyadd and fmpysub can perform a FP multiply and either an
8930 FP add or FP sub if the operands of the multiply and add/sub are
8931 independent (there are other minor restrictions). Note both
8932 the fmpy and fadd/fsub can in theory move to better spots according
8933 to data dependencies, but for now we require the fmpy stay at a
8934 fixed location.
8936 * Many of the memory operations can perform pre & post updates
8937 of index registers. GCC's pre/post increment/decrement addressing
8938 is far too simple to take advantage of all the possibilities. This
8939 pass may not be suitable since those insns may not be independent.
8941 * comclr can compare two ints or an int and a register, nullify
8942 the following instruction and zero some other register. This
8943 is more difficult to use as it's harder to find an insn which
8944 will generate a comclr than finding something like an unconditional
8945 branch. (conditional moves & long branches create comclr insns).
8947 * Most arithmetic operations can conditionally skip the next
8948 instruction. They can be viewed as "perform this operation
8949 and conditionally jump to this nearby location" (where nearby
8950 is an insns away). These are difficult to use due to the
8951 branch length restrictions. */
8953 static void
8954 pa_combine_instructions (void)
8956 rtx_insn *anchor;
8958 /* This can get expensive since the basic algorithm is on the
8959 order of O(n^2) (or worse). Only do it for -O2 or higher
8960 levels of optimization. */
8961 if (optimize < 2)
8962 return;
8964 /* Walk down the list of insns looking for "anchor" insns which
8965 may be combined with "floating" insns. As the name implies,
8966 "anchor" instructions don't move, while "floating" insns may
8967 move around. */
8968 rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
8969 rtx_insn *new_rtx = make_insn_raw (par);
8971 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
8973 enum attr_pa_combine_type anchor_attr;
8974 enum attr_pa_combine_type floater_attr;
8976 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
8977 Also ignore any special USE insns. */
8978 if ((! NONJUMP_INSN_P (anchor) && ! JUMP_P (anchor) && ! CALL_P (anchor))
8979 || GET_CODE (PATTERN (anchor)) == USE
8980 || GET_CODE (PATTERN (anchor)) == CLOBBER)
8981 continue;
8983 anchor_attr = get_attr_pa_combine_type (anchor);
8984 /* See if anchor is an insn suitable for combination. */
8985 if (anchor_attr == PA_COMBINE_TYPE_FMPY
8986 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
8987 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
8988 && ! forward_branch_p (anchor)))
8990 rtx_insn *floater;
8992 for (floater = PREV_INSN (anchor);
8993 floater;
8994 floater = PREV_INSN (floater))
8996 if (NOTE_P (floater)
8997 || (NONJUMP_INSN_P (floater)
8998 && (GET_CODE (PATTERN (floater)) == USE
8999 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9000 continue;
9002 /* Anything except a regular INSN will stop our search. */
9003 if (! NONJUMP_INSN_P (floater))
9005 floater = NULL;
9006 break;
9009 /* See if FLOATER is suitable for combination with the
9010 anchor. */
9011 floater_attr = get_attr_pa_combine_type (floater);
9012 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9013 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9014 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9015 && floater_attr == PA_COMBINE_TYPE_FMPY))
9017 /* If ANCHOR and FLOATER can be combined, then we're
9018 done with this pass. */
9019 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9020 SET_DEST (PATTERN (floater)),
9021 XEXP (SET_SRC (PATTERN (floater)), 0),
9022 XEXP (SET_SRC (PATTERN (floater)), 1)))
9023 break;
9026 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9027 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
9029 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
9031 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9032 SET_DEST (PATTERN (floater)),
9033 XEXP (SET_SRC (PATTERN (floater)), 0),
9034 XEXP (SET_SRC (PATTERN (floater)), 1)))
9035 break;
9037 else
9039 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9040 SET_DEST (PATTERN (floater)),
9041 SET_SRC (PATTERN (floater)),
9042 SET_SRC (PATTERN (floater))))
9043 break;
9048 /* If we didn't find anything on the backwards scan try forwards. */
9049 if (!floater
9050 && (anchor_attr == PA_COMBINE_TYPE_FMPY
9051 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
9053 for (floater = anchor; floater; floater = NEXT_INSN (floater))
9055 if (NOTE_P (floater)
9056 || (NONJUMP_INSN_P (floater)
9057 && (GET_CODE (PATTERN (floater)) == USE
9058 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9060 continue;
9062 /* Anything except a regular INSN will stop our search. */
9063 if (! NONJUMP_INSN_P (floater))
9065 floater = NULL;
9066 break;
9069 /* See if FLOATER is suitable for combination with the
9070 anchor. */
9071 floater_attr = get_attr_pa_combine_type (floater);
9072 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9073 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9074 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9075 && floater_attr == PA_COMBINE_TYPE_FMPY))
9077 /* If ANCHOR and FLOATER can be combined, then we're
9078 done with this pass. */
9079 if (pa_can_combine_p (new_rtx, anchor, floater, 1,
9080 SET_DEST (PATTERN (floater)),
9081 XEXP (SET_SRC (PATTERN (floater)),
9083 XEXP (SET_SRC (PATTERN (floater)),
9084 1)))
9085 break;
9090 /* FLOATER will be nonzero if we found a suitable floating
9091 insn for combination with ANCHOR. */
9092 if (floater
9093 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9094 || anchor_attr == PA_COMBINE_TYPE_FMPY))
9096 /* Emit the new instruction and delete the old anchor. */
9097 emit_insn_before (gen_rtx_PARALLEL
9098 (VOIDmode,
9099 gen_rtvec (2, PATTERN (anchor),
9100 PATTERN (floater))),
9101 anchor);
9103 SET_INSN_DELETED (anchor);
9105 /* Emit a special USE insn for FLOATER, then delete
9106 the floating insn. */
9107 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
9108 delete_insn (floater);
9110 continue;
9112 else if (floater
9113 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
9115 rtx temp;
9116 /* Emit the new_jump instruction and delete the old anchor. */
9117 temp
9118 = emit_jump_insn_before (gen_rtx_PARALLEL
9119 (VOIDmode,
9120 gen_rtvec (2, PATTERN (anchor),
9121 PATTERN (floater))),
9122 anchor);
9124 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
9125 SET_INSN_DELETED (anchor);
9127 /* Emit a special USE insn for FLOATER, then delete
9128 the floating insn. */
9129 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
9130 delete_insn (floater);
9131 continue;
9137 static int
9138 pa_can_combine_p (rtx_insn *new_rtx, rtx_insn *anchor, rtx_insn *floater,
9139 int reversed, rtx dest,
9140 rtx src1, rtx src2)
9142 int insn_code_number;
9143 rtx_insn *start, *end;
9145 /* Create a PARALLEL with the patterns of ANCHOR and
9146 FLOATER, try to recognize it, then test constraints
9147 for the resulting pattern.
9149 If the pattern doesn't match or the constraints
9150 aren't met keep searching for a suitable floater
9151 insn. */
9152 XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor);
9153 XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater);
9154 INSN_CODE (new_rtx) = -1;
9155 insn_code_number = recog_memoized (new_rtx);
9156 basic_block bb = BLOCK_FOR_INSN (anchor);
9157 if (insn_code_number < 0
9158 || (extract_insn (new_rtx),
9159 !constrain_operands (1, get_preferred_alternatives (new_rtx, bb))))
9160 return 0;
9162 if (reversed)
9164 start = anchor;
9165 end = floater;
9167 else
9169 start = floater;
9170 end = anchor;
9173 /* There's up to three operands to consider. One
9174 output and two inputs.
9176 The output must not be used between FLOATER & ANCHOR
9177 exclusive. The inputs must not be set between
9178 FLOATER and ANCHOR exclusive. */
9180 if (reg_used_between_p (dest, start, end))
9181 return 0;
9183 if (reg_set_between_p (src1, start, end))
9184 return 0;
9186 if (reg_set_between_p (src2, start, end))
9187 return 0;
9189 /* If we get here, then everything is good. */
9190 return 1;
9193 /* Return nonzero if references for INSN are delayed.
9195 Millicode insns are actually function calls with some special
9196 constraints on arguments and register usage.
9198 Millicode calls always expect their arguments in the integer argument
9199 registers, and always return their result in %r29 (ret1). They
9200 are expected to clobber their arguments, %r1, %r29, and the return
9201 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9203 This function tells reorg that the references to arguments and
9204 millicode calls do not appear to happen until after the millicode call.
9205 This allows reorg to put insns which set the argument registers into the
9206 delay slot of the millicode call -- thus they act more like traditional
9207 CALL_INSNs.
9209 Note we cannot consider side effects of the insn to be delayed because
9210 the branch and link insn will clobber the return pointer. If we happened
9211 to use the return pointer in the delay slot of the call, then we lose.
9213 get_attr_type will try to recognize the given insn, so make sure to
9214 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9215 in particular. */
9217 pa_insn_refs_are_delayed (rtx_insn *insn)
9219 return ((NONJUMP_INSN_P (insn)
9220 && GET_CODE (PATTERN (insn)) != SEQUENCE
9221 && GET_CODE (PATTERN (insn)) != USE
9222 && GET_CODE (PATTERN (insn)) != CLOBBER
9223 && get_attr_type (insn) == TYPE_MILLI));
9226 /* Promote the return value, but not the arguments. */
9228 static machine_mode
9229 pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9230 machine_mode mode,
9231 int *punsignedp ATTRIBUTE_UNUSED,
9232 const_tree fntype ATTRIBUTE_UNUSED,
9233 int for_return)
9235 if (for_return == 0)
9236 return mode;
9237 return promote_mode (type, mode, punsignedp);
9240 /* On the HP-PA the value is found in register(s) 28(-29), unless
9241 the mode is SF or DF. Then the value is returned in fr4 (32).
9243 This must perform the same promotions as PROMOTE_MODE, else promoting
9244 return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
9246 Small structures must be returned in a PARALLEL on PA64 in order
9247 to match the HP Compiler ABI. */
9249 static rtx
9250 pa_function_value (const_tree valtype,
9251 const_tree func ATTRIBUTE_UNUSED,
9252 bool outgoing ATTRIBUTE_UNUSED)
9254 machine_mode valmode;
9256 if (AGGREGATE_TYPE_P (valtype)
9257 || TREE_CODE (valtype) == COMPLEX_TYPE
9258 || TREE_CODE (valtype) == VECTOR_TYPE)
9260 HOST_WIDE_INT valsize = int_size_in_bytes (valtype);
9262 /* Handle aggregates that fit exactly in a word or double word. */
9263 if ((valsize & (UNITS_PER_WORD - 1)) == 0)
9264 return gen_rtx_REG (TYPE_MODE (valtype), 28);
9266 if (TARGET_64BIT)
9268 /* Aggregates with a size less than or equal to 128 bits are
9269 returned in GR 28(-29). They are left justified. The pad
9270 bits are undefined. Larger aggregates are returned in
9271 memory. */
9272 rtx loc[2];
9273 int i, offset = 0;
9274 int ub = valsize <= UNITS_PER_WORD ? 1 : 2;
9276 for (i = 0; i < ub; i++)
9278 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9279 gen_rtx_REG (DImode, 28 + i),
9280 GEN_INT (offset));
9281 offset += 8;
9284 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
9286 else if (valsize > UNITS_PER_WORD)
9288 /* Aggregates 5 to 8 bytes in size are returned in general
9289 registers r28-r29 in the same manner as other non
9290 floating-point objects. The data is right-justified and
9291 zero-extended to 64 bits. This is opposite to the normal
9292 justification used on big endian targets and requires
9293 special treatment. */
9294 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9295 gen_rtx_REG (DImode, 28), const0_rtx);
9296 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9300 if ((INTEGRAL_TYPE_P (valtype)
9301 && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD)
9302 || POINTER_TYPE_P (valtype))
9303 valmode = word_mode;
9304 else
9305 valmode = TYPE_MODE (valtype);
9307 if (TREE_CODE (valtype) == REAL_TYPE
9308 && !AGGREGATE_TYPE_P (valtype)
9309 && TYPE_MODE (valtype) != TFmode
9310 && !TARGET_SOFT_FLOAT)
9311 return gen_rtx_REG (valmode, 32);
9313 return gen_rtx_REG (valmode, 28);
9316 /* Implement the TARGET_LIBCALL_VALUE hook. */
9318 static rtx
9319 pa_libcall_value (machine_mode mode,
9320 const_rtx fun ATTRIBUTE_UNUSED)
9322 if (! TARGET_SOFT_FLOAT
9323 && (mode == SFmode || mode == DFmode))
9324 return gen_rtx_REG (mode, 32);
9325 else
9326 return gen_rtx_REG (mode, 28);
9329 /* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook. */
9331 static bool
9332 pa_function_value_regno_p (const unsigned int regno)
9334 if (regno == 28
9335 || (! TARGET_SOFT_FLOAT && regno == 32))
9336 return true;
9338 return false;
9341 /* Update the data in CUM to advance over an argument
9342 of mode MODE and data type TYPE.
9343 (TYPE is null for libcalls where that information may not be available.) */
9345 static void
9346 pa_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
9347 const_tree type, bool named ATTRIBUTE_UNUSED)
9349 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9350 int arg_size = FUNCTION_ARG_SIZE (mode, type);
9352 cum->nargs_prototype--;
9353 cum->words += (arg_size
9354 + ((cum->words & 01)
9355 && type != NULL_TREE
9356 && arg_size > 1));
9359 /* Return the location of a parameter that is passed in a register or NULL
9360 if the parameter has any component that is passed in memory.
9362 This is new code and will be pushed to into the net sources after
9363 further testing.
9365 ??? We might want to restructure this so that it looks more like other
9366 ports. */
9367 static rtx
9368 pa_function_arg (cumulative_args_t cum_v, machine_mode mode,
9369 const_tree type, bool named ATTRIBUTE_UNUSED)
9371 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9372 int max_arg_words = (TARGET_64BIT ? 8 : 4);
9373 int alignment = 0;
9374 int arg_size;
9375 int fpr_reg_base;
9376 int gpr_reg_base;
9377 rtx retval;
9379 if (mode == VOIDmode)
9380 return NULL_RTX;
9382 arg_size = FUNCTION_ARG_SIZE (mode, type);
9384 /* If this arg would be passed partially or totally on the stack, then
9385 this routine should return zero. pa_arg_partial_bytes will
9386 handle arguments which are split between regs and stack slots if
9387 the ABI mandates split arguments. */
9388 if (!TARGET_64BIT)
9390 /* The 32-bit ABI does not split arguments. */
9391 if (cum->words + arg_size > max_arg_words)
9392 return NULL_RTX;
9394 else
9396 if (arg_size > 1)
9397 alignment = cum->words & 1;
9398 if (cum->words + alignment >= max_arg_words)
9399 return NULL_RTX;
9402 /* The 32bit ABIs and the 64bit ABIs are rather different,
9403 particularly in their handling of FP registers. We might
9404 be able to cleverly share code between them, but I'm not
9405 going to bother in the hope that splitting them up results
9406 in code that is more easily understood. */
9408 if (TARGET_64BIT)
9410 /* Advance the base registers to their current locations.
9412 Remember, gprs grow towards smaller register numbers while
9413 fprs grow to higher register numbers. Also remember that
9414 although FP regs are 32-bit addressable, we pretend that
9415 the registers are 64-bits wide. */
9416 gpr_reg_base = 26 - cum->words;
9417 fpr_reg_base = 32 + cum->words;
9419 /* Arguments wider than one word and small aggregates need special
9420 treatment. */
9421 if (arg_size > 1
9422 || mode == BLKmode
9423 || (type && (AGGREGATE_TYPE_P (type)
9424 || TREE_CODE (type) == COMPLEX_TYPE
9425 || TREE_CODE (type) == VECTOR_TYPE)))
9427 /* Double-extended precision (80-bit), quad-precision (128-bit)
9428 and aggregates including complex numbers are aligned on
9429 128-bit boundaries. The first eight 64-bit argument slots
9430 are associated one-to-one, with general registers r26
9431 through r19, and also with floating-point registers fr4
9432 through fr11. Arguments larger than one word are always
9433 passed in general registers.
9435 Using a PARALLEL with a word mode register results in left
9436 justified data on a big-endian target. */
9438 rtx loc[8];
9439 int i, offset = 0, ub = arg_size;
9441 /* Align the base register. */
9442 gpr_reg_base -= alignment;
9444 ub = MIN (ub, max_arg_words - cum->words - alignment);
9445 for (i = 0; i < ub; i++)
9447 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9448 gen_rtx_REG (DImode, gpr_reg_base),
9449 GEN_INT (offset));
9450 gpr_reg_base -= 1;
9451 offset += 8;
9454 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
9457 else
9459 /* If the argument is larger than a word, then we know precisely
9460 which registers we must use. */
9461 if (arg_size > 1)
9463 if (cum->words)
9465 gpr_reg_base = 23;
9466 fpr_reg_base = 38;
9468 else
9470 gpr_reg_base = 25;
9471 fpr_reg_base = 34;
9474 /* Structures 5 to 8 bytes in size are passed in the general
9475 registers in the same manner as other non floating-point
9476 objects. The data is right-justified and zero-extended
9477 to 64 bits. This is opposite to the normal justification
9478 used on big endian targets and requires special treatment.
9479 We now define BLOCK_REG_PADDING to pad these objects.
9480 Aggregates, complex and vector types are passed in the same
9481 manner as structures. */
9482 if (mode == BLKmode
9483 || (type && (AGGREGATE_TYPE_P (type)
9484 || TREE_CODE (type) == COMPLEX_TYPE
9485 || TREE_CODE (type) == VECTOR_TYPE)))
9487 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9488 gen_rtx_REG (DImode, gpr_reg_base),
9489 const0_rtx);
9490 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9493 else
9495 /* We have a single word (32 bits). A simple computation
9496 will get us the register #s we need. */
9497 gpr_reg_base = 26 - cum->words;
9498 fpr_reg_base = 32 + 2 * cum->words;
9502 /* Determine if the argument needs to be passed in both general and
9503 floating point registers. */
9504 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
9505 /* If we are doing soft-float with portable runtime, then there
9506 is no need to worry about FP regs. */
9507 && !TARGET_SOFT_FLOAT
9508 /* The parameter must be some kind of scalar float, else we just
9509 pass it in integer registers. */
9510 && GET_MODE_CLASS (mode) == MODE_FLOAT
9511 /* The target function must not have a prototype. */
9512 && cum->nargs_prototype <= 0
9513 /* libcalls do not need to pass items in both FP and general
9514 registers. */
9515 && type != NULL_TREE
9516 /* All this hair applies to "outgoing" args only. This includes
9517 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
9518 && !cum->incoming)
9519 /* Also pass outgoing floating arguments in both registers in indirect
9520 calls with the 32 bit ABI and the HP assembler since there is no
9521 way to the specify argument locations in static functions. */
9522 || (!TARGET_64BIT
9523 && !TARGET_GAS
9524 && !cum->incoming
9525 && cum->indirect
9526 && GET_MODE_CLASS (mode) == MODE_FLOAT))
9528 retval
9529 = gen_rtx_PARALLEL
9530 (mode,
9531 gen_rtvec (2,
9532 gen_rtx_EXPR_LIST (VOIDmode,
9533 gen_rtx_REG (mode, fpr_reg_base),
9534 const0_rtx),
9535 gen_rtx_EXPR_LIST (VOIDmode,
9536 gen_rtx_REG (mode, gpr_reg_base),
9537 const0_rtx)));
9539 else
9541 /* See if we should pass this parameter in a general register. */
9542 if (TARGET_SOFT_FLOAT
9543 /* Indirect calls in the normal 32bit ABI require all arguments
9544 to be passed in general registers. */
9545 || (!TARGET_PORTABLE_RUNTIME
9546 && !TARGET_64BIT
9547 && !TARGET_ELF32
9548 && cum->indirect)
9549 /* If the parameter is not a scalar floating-point parameter,
9550 then it belongs in GPRs. */
9551 || GET_MODE_CLASS (mode) != MODE_FLOAT
9552 /* Structure with single SFmode field belongs in GPR. */
9553 || (type && AGGREGATE_TYPE_P (type)))
9554 retval = gen_rtx_REG (mode, gpr_reg_base);
9555 else
9556 retval = gen_rtx_REG (mode, fpr_reg_base);
9558 return retval;
9561 /* Arguments larger than one word are double word aligned. */
9563 static unsigned int
9564 pa_function_arg_boundary (machine_mode mode, const_tree type)
9566 bool singleword = (type
9567 ? (integer_zerop (TYPE_SIZE (type))
9568 || !TREE_CONSTANT (TYPE_SIZE (type))
9569 || int_size_in_bytes (type) <= UNITS_PER_WORD)
9570 : GET_MODE_SIZE (mode) <= UNITS_PER_WORD);
9572 return singleword ? PARM_BOUNDARY : MAX_PARM_BOUNDARY;
9575 /* If this arg would be passed totally in registers or totally on the stack,
9576 then this routine should return zero. */
9578 static int
9579 pa_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
9580 tree type, bool named ATTRIBUTE_UNUSED)
9582 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9583 unsigned int max_arg_words = 8;
9584 unsigned int offset = 0;
9586 if (!TARGET_64BIT)
9587 return 0;
9589 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
9590 offset = 1;
9592 if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
9593 /* Arg fits fully into registers. */
9594 return 0;
9595 else if (cum->words + offset >= max_arg_words)
9596 /* Arg fully on the stack. */
9597 return 0;
9598 else
9599 /* Arg is split. */
9600 return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
9604 /* A get_unnamed_section callback for switching to the text section.
9606 This function is only used with SOM. Because we don't support
9607 named subspaces, we can only create a new subspace or switch back
9608 to the default text subspace. */
9610 static void
9611 som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED)
9613 gcc_assert (TARGET_SOM);
9614 if (TARGET_GAS)
9616 if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
9618 /* We only want to emit a .nsubspa directive once at the
9619 start of the function. */
9620 cfun->machine->in_nsubspa = 1;
9622 /* Create a new subspace for the text. This provides
9623 better stub placement and one-only functions. */
9624 if (cfun->decl
9625 && DECL_ONE_ONLY (cfun->decl)
9626 && !DECL_WEAK (cfun->decl))
9628 output_section_asm_op ("\t.SPACE $TEXT$\n"
9629 "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
9630 "ACCESS=44,SORT=24,COMDAT");
9631 return;
9634 else
9636 /* There isn't a current function or the body of the current
9637 function has been completed. So, we are changing to the
9638 text section to output debugging information. Thus, we
9639 need to forget that we are in the text section so that
9640 varasm.c will call us when text_section is selected again. */
9641 gcc_assert (!cfun || !cfun->machine
9642 || cfun->machine->in_nsubspa == 2);
9643 in_section = NULL;
9645 output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
9646 return;
9648 output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
9651 /* A get_unnamed_section callback for switching to comdat data
9652 sections. This function is only used with SOM. */
9654 static void
9655 som_output_comdat_data_section_asm_op (const void *data)
9657 in_section = NULL;
9658 output_section_asm_op (data);
9661 /* Implement TARGET_ASM_INITIALIZE_SECTIONS */
9663 static void
9664 pa_som_asm_init_sections (void)
9666 text_section
9667 = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
9669 /* SOM puts readonly data in the default $LIT$ subspace when PIC code
9670 is not being generated. */
9671 som_readonly_data_section
9672 = get_unnamed_section (0, output_section_asm_op,
9673 "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
9675 /* When secondary definitions are not supported, SOM makes readonly
9676 data one-only by creating a new $LIT$ subspace in $TEXT$ with
9677 the comdat flag. */
9678 som_one_only_readonly_data_section
9679 = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
9680 "\t.SPACE $TEXT$\n"
9681 "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
9682 "ACCESS=0x2c,SORT=16,COMDAT");
9685 /* When secondary definitions are not supported, SOM makes data one-only
9686 by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */
9687 som_one_only_data_section
9688 = get_unnamed_section (SECTION_WRITE,
9689 som_output_comdat_data_section_asm_op,
9690 "\t.SPACE $PRIVATE$\n"
9691 "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
9692 "ACCESS=31,SORT=24,COMDAT");
9694 if (flag_tm)
9695 som_tm_clone_table_section
9696 = get_unnamed_section (0, output_section_asm_op,
9697 "\t.SPACE $PRIVATE$\n\t.SUBSPA $TM_CLONE_TABLE$");
9699 /* FIXME: HPUX ld generates incorrect GOT entries for "T" fixups
9700 which reference data within the $TEXT$ space (for example constant
9701 strings in the $LIT$ subspace).
9703 The assemblers (GAS and HP as) both have problems with handling
9704 the difference of two symbols which is the other correct way to
9705 reference constant data during PIC code generation.
9707 So, there's no way to reference constant data which is in the
9708 $TEXT$ space during PIC generation. Instead place all constant
9709 data into the $PRIVATE$ subspace (this reduces sharing, but it
9710 works correctly). */
9711 readonly_data_section = flag_pic ? data_section : som_readonly_data_section;
9713 /* We must not have a reference to an external symbol defined in a
9714 shared library in a readonly section, else the SOM linker will
9715 complain.
9717 So, we force exception information into the data section. */
9718 exception_section = data_section;
9721 /* Implement TARGET_ASM_TM_CLONE_TABLE_SECTION. */
9723 static section *
9724 pa_som_tm_clone_table_section (void)
9726 return som_tm_clone_table_section;
9729 /* On hpux10, the linker will give an error if we have a reference
9730 in the read-only data section to a symbol defined in a shared
9731 library. Therefore, expressions that might require a reloc can
9732 not be placed in the read-only data section. */
9734 static section *
9735 pa_select_section (tree exp, int reloc,
9736 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
9738 if (TREE_CODE (exp) == VAR_DECL
9739 && TREE_READONLY (exp)
9740 && !TREE_THIS_VOLATILE (exp)
9741 && DECL_INITIAL (exp)
9742 && (DECL_INITIAL (exp) == error_mark_node
9743 || TREE_CONSTANT (DECL_INITIAL (exp)))
9744 && !reloc)
9746 if (TARGET_SOM
9747 && DECL_ONE_ONLY (exp)
9748 && !DECL_WEAK (exp))
9749 return som_one_only_readonly_data_section;
9750 else
9751 return readonly_data_section;
9753 else if (CONSTANT_CLASS_P (exp) && !reloc)
9754 return readonly_data_section;
9755 else if (TARGET_SOM
9756 && TREE_CODE (exp) == VAR_DECL
9757 && DECL_ONE_ONLY (exp)
9758 && !DECL_WEAK (exp))
9759 return som_one_only_data_section;
9760 else
9761 return data_section;
9764 /* Implement pa_reloc_rw_mask. */
9766 static int
9767 pa_reloc_rw_mask (void)
9769 /* We force (const (plus (symbol) (const_int))) to memory when the
9770 const_int doesn't fit in a 14-bit integer. The SOM linker can't
9771 handle this construct in read-only memory and we want to avoid
9772 this for ELF. So, we always force an RTX needing relocation to
9773 the data section. */
9774 return 3;
9777 static void
9778 pa_globalize_label (FILE *stream, const char *name)
9780 /* We only handle DATA objects here, functions are globalized in
9781 ASM_DECLARE_FUNCTION_NAME. */
9782 if (! FUNCTION_NAME_P (name))
9784 fputs ("\t.EXPORT ", stream);
9785 assemble_name (stream, name);
9786 fputs (",DATA\n", stream);
9790 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
9792 static rtx
9793 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
9794 int incoming ATTRIBUTE_UNUSED)
9796 return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
9799 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9801 bool
9802 pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9804 /* SOM ABI says that objects larger than 64 bits are returned in memory.
9805 PA64 ABI says that objects larger than 128 bits are returned in memory.
9806 Note, int_size_in_bytes can return -1 if the size of the object is
9807 variable or larger than the maximum value that can be expressed as
9808 a HOST_WIDE_INT. It can also return zero for an empty type. The
9809 simplest way to handle variable and empty types is to pass them in
9810 memory. This avoids problems in defining the boundaries of argument
9811 slots, allocating registers, etc. */
9812 return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
9813 || int_size_in_bytes (type) <= 0);
9816 /* Structure to hold declaration and name of external symbols that are
9817 emitted by GCC. We generate a vector of these symbols and output them
9818 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
9819 This avoids putting out names that are never really used. */
9821 typedef struct GTY(()) extern_symbol
9823 tree decl;
9824 const char *name;
9825 } extern_symbol;
9827 /* Define gc'd vector type for extern_symbol. */
9829 /* Vector of extern_symbol pointers. */
9830 static GTY(()) vec<extern_symbol, va_gc> *extern_symbols;
9832 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9833 /* Mark DECL (name NAME) as an external reference (assembler output
9834 file FILE). This saves the names to output at the end of the file
9835 if actually referenced. */
9837 void
9838 pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
9840 gcc_assert (file == asm_out_file);
9841 extern_symbol p = {decl, name};
9842 vec_safe_push (extern_symbols, p);
9845 /* Output text required at the end of an assembler file.
9846 This includes deferred plabels and .import directives for
9847 all external symbols that were actually referenced. */
9849 static void
9850 pa_hpux_file_end (void)
9852 unsigned int i;
9853 extern_symbol *p;
9855 if (!NO_DEFERRED_PROFILE_COUNTERS)
9856 output_deferred_profile_counters ();
9858 output_deferred_plabels ();
9860 for (i = 0; vec_safe_iterate (extern_symbols, i, &p); i++)
9862 tree decl = p->decl;
9864 if (!TREE_ASM_WRITTEN (decl)
9865 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
9866 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
9869 vec_free (extern_symbols);
9871 #endif
9873 /* Return true if a change from mode FROM to mode TO for a register
9874 in register class RCLASS is invalid. */
9876 bool
9877 pa_cannot_change_mode_class (machine_mode from, machine_mode to,
9878 enum reg_class rclass)
9880 if (from == to)
9881 return false;
9883 /* Reject changes to/from complex and vector modes. */
9884 if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from)
9885 || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to))
9886 return true;
9888 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to))
9889 return false;
9891 /* There is no way to load QImode or HImode values directly from
9892 memory. SImode loads to the FP registers are not zero extended.
9893 On the 64-bit target, this conflicts with the definition of
9894 LOAD_EXTEND_OP. Thus, we can't allow changing between modes
9895 with different sizes in the floating-point registers. */
9896 if (MAYBE_FP_REG_CLASS_P (rclass))
9897 return true;
9899 /* HARD_REGNO_MODE_OK places modes with sizes larger than a word
9900 in specific sets of registers. Thus, we cannot allow changing
9901 to a larger mode when it's larger than a word. */
9902 if (GET_MODE_SIZE (to) > UNITS_PER_WORD
9903 && GET_MODE_SIZE (to) > GET_MODE_SIZE (from))
9904 return true;
9906 return false;
9909 /* Returns TRUE if it is a good idea to tie two pseudo registers
9910 when one has mode MODE1 and one has mode MODE2.
9911 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
9912 for any hard reg, then this must be FALSE for correct output.
9914 We should return FALSE for QImode and HImode because these modes
9915 are not ok in the floating-point registers. However, this prevents
9916 tieing these modes to SImode and DImode in the general registers.
9917 So, this isn't a good idea. We rely on HARD_REGNO_MODE_OK and
9918 CANNOT_CHANGE_MODE_CLASS to prevent these modes from being used
9919 in the floating-point registers. */
9921 bool
9922 pa_modes_tieable_p (machine_mode mode1, machine_mode mode2)
9924 /* Don't tie modes in different classes. */
9925 if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2))
9926 return false;
9928 return true;
9932 /* Length in units of the trampoline instruction code. */
9934 #define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 32 : 40))
9937 /* Output assembler code for a block containing the constant parts
9938 of a trampoline, leaving space for the variable parts.\
9940 The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
9941 and then branches to the specified routine.
9943 This code template is copied from text segment to stack location
9944 and then patched with pa_trampoline_init to contain valid values,
9945 and then entered as a subroutine.
9947 It is best to keep this as small as possible to avoid having to
9948 flush multiple lines in the cache. */
9950 static void
9951 pa_asm_trampoline_template (FILE *f)
9953 if (!TARGET_64BIT)
9955 fputs ("\tldw 36(%r22),%r21\n", f);
9956 fputs ("\tbb,>=,n %r21,30,.+16\n", f);
9957 if (ASSEMBLER_DIALECT == 0)
9958 fputs ("\tdepi 0,31,2,%r21\n", f);
9959 else
9960 fputs ("\tdepwi 0,31,2,%r21\n", f);
9961 fputs ("\tldw 4(%r21),%r19\n", f);
9962 fputs ("\tldw 0(%r21),%r21\n", f);
9963 if (TARGET_PA_20)
9965 fputs ("\tbve (%r21)\n", f);
9966 fputs ("\tldw 40(%r22),%r29\n", f);
9967 fputs ("\t.word 0\n", f);
9968 fputs ("\t.word 0\n", f);
9970 else
9972 fputs ("\tldsid (%r21),%r1\n", f);
9973 fputs ("\tmtsp %r1,%sr0\n", f);
9974 fputs ("\tbe 0(%sr0,%r21)\n", f);
9975 fputs ("\tldw 40(%r22),%r29\n", f);
9977 fputs ("\t.word 0\n", f);
9978 fputs ("\t.word 0\n", f);
9979 fputs ("\t.word 0\n", f);
9980 fputs ("\t.word 0\n", f);
9982 else
9984 fputs ("\t.dword 0\n", f);
9985 fputs ("\t.dword 0\n", f);
9986 fputs ("\t.dword 0\n", f);
9987 fputs ("\t.dword 0\n", f);
9988 fputs ("\tmfia %r31\n", f);
9989 fputs ("\tldd 24(%r31),%r1\n", f);
9990 fputs ("\tldd 24(%r1),%r27\n", f);
9991 fputs ("\tldd 16(%r1),%r1\n", f);
9992 fputs ("\tbve (%r1)\n", f);
9993 fputs ("\tldd 32(%r31),%r31\n", f);
9994 fputs ("\t.dword 0 ; fptr\n", f);
9995 fputs ("\t.dword 0 ; static link\n", f);
9999 /* Emit RTL insns to initialize the variable parts of a trampoline.
10000 FNADDR is an RTX for the address of the function's pure code.
10001 CXT is an RTX for the static chain value for the function.
10003 Move the function address to the trampoline template at offset 36.
10004 Move the static chain value to trampoline template at offset 40.
10005 Move the trampoline address to trampoline template at offset 44.
10006 Move r19 to trampoline template at offset 48. The latter two
10007 words create a plabel for the indirect call to the trampoline.
10009 A similar sequence is used for the 64-bit port but the plabel is
10010 at the beginning of the trampoline.
10012 Finally, the cache entries for the trampoline code are flushed.
10013 This is necessary to ensure that the trampoline instruction sequence
10014 is written to memory prior to any attempts at prefetching the code
10015 sequence. */
10017 static void
10018 pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
10020 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10021 rtx start_addr = gen_reg_rtx (Pmode);
10022 rtx end_addr = gen_reg_rtx (Pmode);
10023 rtx line_length = gen_reg_rtx (Pmode);
10024 rtx r_tramp, tmp;
10026 emit_block_move (m_tramp, assemble_trampoline_template (),
10027 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
10028 r_tramp = force_reg (Pmode, XEXP (m_tramp, 0));
10030 if (!TARGET_64BIT)
10032 tmp = adjust_address (m_tramp, Pmode, 36);
10033 emit_move_insn (tmp, fnaddr);
10034 tmp = adjust_address (m_tramp, Pmode, 40);
10035 emit_move_insn (tmp, chain_value);
10037 /* Create a fat pointer for the trampoline. */
10038 tmp = adjust_address (m_tramp, Pmode, 44);
10039 emit_move_insn (tmp, r_tramp);
10040 tmp = adjust_address (m_tramp, Pmode, 48);
10041 emit_move_insn (tmp, gen_rtx_REG (Pmode, 19));
10043 /* fdc and fic only use registers for the address to flush,
10044 they do not accept integer displacements. We align the
10045 start and end addresses to the beginning of their respective
10046 cache lines to minimize the number of lines flushed. */
10047 emit_insn (gen_andsi3 (start_addr, r_tramp,
10048 GEN_INT (-MIN_CACHELINE_SIZE)));
10049 tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp,
10050 TRAMPOLINE_CODE_SIZE-1));
10051 emit_insn (gen_andsi3 (end_addr, tmp,
10052 GEN_INT (-MIN_CACHELINE_SIZE)));
10053 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10054 emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length));
10055 emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length,
10056 gen_reg_rtx (Pmode),
10057 gen_reg_rtx (Pmode)));
10059 else
10061 tmp = adjust_address (m_tramp, Pmode, 56);
10062 emit_move_insn (tmp, fnaddr);
10063 tmp = adjust_address (m_tramp, Pmode, 64);
10064 emit_move_insn (tmp, chain_value);
10066 /* Create a fat pointer for the trampoline. */
10067 tmp = adjust_address (m_tramp, Pmode, 16);
10068 emit_move_insn (tmp, force_reg (Pmode, plus_constant (Pmode,
10069 r_tramp, 32)));
10070 tmp = adjust_address (m_tramp, Pmode, 24);
10071 emit_move_insn (tmp, gen_rtx_REG (Pmode, 27));
10073 /* fdc and fic only use registers for the address to flush,
10074 they do not accept integer displacements. We align the
10075 start and end addresses to the beginning of their respective
10076 cache lines to minimize the number of lines flushed. */
10077 tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp, 32));
10078 emit_insn (gen_anddi3 (start_addr, tmp,
10079 GEN_INT (-MIN_CACHELINE_SIZE)));
10080 tmp = force_reg (Pmode, plus_constant (Pmode, tmp,
10081 TRAMPOLINE_CODE_SIZE - 1));
10082 emit_insn (gen_anddi3 (end_addr, tmp,
10083 GEN_INT (-MIN_CACHELINE_SIZE)));
10084 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10085 emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length));
10086 emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length,
10087 gen_reg_rtx (Pmode),
10088 gen_reg_rtx (Pmode)));
10091 #ifdef HAVE_ENABLE_EXECUTE_STACK
10092  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
10093      LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
10094 #endif
10097 /* Perform any machine-specific adjustment in the address of the trampoline.
10098 ADDR contains the address that was passed to pa_trampoline_init.
10099 Adjust the trampoline address to point to the plabel at offset 44. */
10101 static rtx
10102 pa_trampoline_adjust_address (rtx addr)
10104 if (!TARGET_64BIT)
10105 addr = memory_address (Pmode, plus_constant (Pmode, addr, 46));
10106 return addr;
10109 static rtx
10110 pa_delegitimize_address (rtx orig_x)
10112 rtx x = delegitimize_mem_from_attrs (orig_x);
10114 if (GET_CODE (x) == LO_SUM
10115 && GET_CODE (XEXP (x, 1)) == UNSPEC
10116 && XINT (XEXP (x, 1), 1) == UNSPEC_DLTIND14R)
10117 return gen_const_mem (Pmode, XVECEXP (XEXP (x, 1), 0, 0));
10118 return x;
10121 static rtx
10122 pa_internal_arg_pointer (void)
10124 /* The argument pointer and the hard frame pointer are the same in
10125 the 32-bit runtime, so we don't need a copy. */
10126 if (TARGET_64BIT)
10127 return copy_to_reg (virtual_incoming_args_rtx);
10128 else
10129 return virtual_incoming_args_rtx;
10132 /* Given FROM and TO register numbers, say whether this elimination is allowed.
10133 Frame pointer elimination is automatically handled. */
10135 static bool
10136 pa_can_eliminate (const int from, const int to)
10138 /* The argument cannot be eliminated in the 64-bit runtime. */
10139 if (TARGET_64BIT && from == ARG_POINTER_REGNUM)
10140 return false;
10142 return (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
10143 ? ! frame_pointer_needed
10144 : true);
10147 /* Define the offset between two registers, FROM to be eliminated and its
10148 replacement TO, at the start of a routine. */
10149 HOST_WIDE_INT
10150 pa_initial_elimination_offset (int from, int to)
10152 HOST_WIDE_INT offset;
10154 if ((from == HARD_FRAME_POINTER_REGNUM || from == FRAME_POINTER_REGNUM)
10155 && to == STACK_POINTER_REGNUM)
10156 offset = -pa_compute_frame_size (get_frame_size (), 0);
10157 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10158 offset = 0;
10159 else
10160 gcc_unreachable ();
10162 return offset;
10165 static void
10166 pa_conditional_register_usage (void)
10168 int i;
10170 if (!TARGET_64BIT && !TARGET_PA_11)
10172 for (i = 56; i <= FP_REG_LAST; i++)
10173 fixed_regs[i] = call_used_regs[i] = 1;
10174 for (i = 33; i < 56; i += 2)
10175 fixed_regs[i] = call_used_regs[i] = 1;
10177 if (TARGET_DISABLE_FPREGS || TARGET_SOFT_FLOAT)
10179 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
10180 fixed_regs[i] = call_used_regs[i] = 1;
10182 if (flag_pic)
10183 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
10186 /* Target hook for c_mode_for_suffix. */
10188 static machine_mode
10189 pa_c_mode_for_suffix (char suffix)
10191 if (HPUX_LONG_DOUBLE_LIBRARY)
10193 if (suffix == 'q')
10194 return TFmode;
10197 return VOIDmode;
10200 /* Target hook for function_section. */
10202 static section *
10203 pa_function_section (tree decl, enum node_frequency freq,
10204 bool startup, bool exit)
10206 /* Put functions in text section if target doesn't have named sections. */
10207 if (!targetm_common.have_named_sections)
10208 return text_section;
10210 /* Force nested functions into the same section as the containing
10211 function. */
10212 if (decl
10213 && DECL_SECTION_NAME (decl) == NULL
10214 && DECL_CONTEXT (decl) != NULL_TREE
10215 && TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL
10216 && DECL_SECTION_NAME (DECL_CONTEXT (decl)) == NULL)
10217 return function_section (DECL_CONTEXT (decl));
10219 /* Otherwise, use the default function section. */
10220 return default_function_section (decl, freq, startup, exit);
10223 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
10225 In 64-bit mode, we reject CONST_DOUBLES. We also reject CONST_INTS
10226 that need more than three instructions to load prior to reload. This
10227 limit is somewhat arbitrary. It takes three instructions to load a
10228 CONST_INT from memory but two are memory accesses. It may be better
10229 to increase the allowed range for CONST_INTS. We may also be able
10230 to handle CONST_DOUBLES. */
10232 static bool
10233 pa_legitimate_constant_p (machine_mode mode, rtx x)
10235 if (GET_MODE_CLASS (mode) == MODE_FLOAT && x != CONST0_RTX (mode))
10236 return false;
10238 if (!NEW_HP_ASSEMBLER && !TARGET_GAS && GET_CODE (x) == LABEL_REF)
10239 return false;
10241 /* TLS_MODEL_GLOBAL_DYNAMIC and TLS_MODEL_LOCAL_DYNAMIC are not
10242 legitimate constants. The other variants can't be handled by
10243 the move patterns after reload starts. */
10244 if (tls_referenced_p (x))
10245 return false;
10247 if (TARGET_64BIT && GET_CODE (x) == CONST_DOUBLE)
10248 return false;
10250 if (TARGET_64BIT
10251 && HOST_BITS_PER_WIDE_INT > 32
10252 && GET_CODE (x) == CONST_INT
10253 && !reload_in_progress
10254 && !reload_completed
10255 && !LEGITIMATE_64BIT_CONST_INT_P (INTVAL (x))
10256 && !pa_cint_ok_for_move (INTVAL (x)))
10257 return false;
10259 if (function_label_operand (x, mode))
10260 return false;
10262 return true;
10265 /* Implement TARGET_SECTION_TYPE_FLAGS. */
10267 static unsigned int
10268 pa_section_type_flags (tree decl, const char *name, int reloc)
10270 unsigned int flags;
10272 flags = default_section_type_flags (decl, name, reloc);
10274 /* Function labels are placed in the constant pool. This can
10275 cause a section conflict if decls are put in ".data.rel.ro"
10276 or ".data.rel.ro.local" using the __attribute__ construct. */
10277 if (strcmp (name, ".data.rel.ro") == 0
10278 || strcmp (name, ".data.rel.ro.local") == 0)
10279 flags |= SECTION_WRITE | SECTION_RELRO;
10281 return flags;
10284 /* pa_legitimate_address_p recognizes an RTL expression that is a
10285 valid memory address for an instruction. The MODE argument is the
10286 machine mode for the MEM expression that wants to use this address.
10288 On HP PA-RISC, the legitimate address forms are REG+SMALLINT,
10289 REG+REG, and REG+(REG*SCALE). The indexed address forms are only
10290 available with floating point loads and stores, and integer loads.
10291 We get better code by allowing indexed addresses in the initial
10292 RTL generation.
10294 The acceptance of indexed addresses as legitimate implies that we
10295 must provide patterns for doing indexed integer stores, or the move
10296 expanders must force the address of an indexed store to a register.
10297 We have adopted the latter approach.
10299 Another function of pa_legitimate_address_p is to ensure that
10300 the base register is a valid pointer for indexed instructions.
10301 On targets that have non-equivalent space registers, we have to
10302 know at the time of assembler output which register in a REG+REG
10303 pair is the base register. The REG_POINTER flag is sometimes lost
10304 in reload and the following passes, so it can't be relied on during
10305 code generation. Thus, we either have to canonicalize the order
10306 of the registers in REG+REG indexed addresses, or treat REG+REG
10307 addresses separately and provide patterns for both permutations.
10309 The latter approach requires several hundred additional lines of
10310 code in pa.md. The downside to canonicalizing is that a PLUS
10311 in the wrong order can't combine to form to make a scaled indexed
10312 memory operand. As we won't need to canonicalize the operands if
10313 the REG_POINTER lossage can be fixed, it seems better canonicalize.
10315 We initially break out scaled indexed addresses in canonical order
10316 in pa_emit_move_sequence. LEGITIMIZE_ADDRESS also canonicalizes
10317 scaled indexed addresses during RTL generation. However, fold_rtx
10318 has its own opinion on how the operands of a PLUS should be ordered.
10319 If one of the operands is equivalent to a constant, it will make
10320 that operand the second operand. As the base register is likely to
10321 be equivalent to a SYMBOL_REF, we have made it the second operand.
10323 pa_legitimate_address_p accepts REG+REG as legitimate when the
10324 operands are in the order INDEX+BASE on targets with non-equivalent
10325 space registers, and in any order on targets with equivalent space
10326 registers. It accepts both MULT+BASE and BASE+MULT for scaled indexing.
10328 We treat a SYMBOL_REF as legitimate if it is part of the current
10329 function's constant-pool, because such addresses can actually be
10330 output as REG+SMALLINT. */
10332 static bool
10333 pa_legitimate_address_p (machine_mode mode, rtx x, bool strict)
10335 if ((REG_P (x)
10336 && (strict ? STRICT_REG_OK_FOR_BASE_P (x)
10337 : REG_OK_FOR_BASE_P (x)))
10338 || ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC
10339 || GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC)
10340 && REG_P (XEXP (x, 0))
10341 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10342 : REG_OK_FOR_BASE_P (XEXP (x, 0)))))
10343 return true;
10345 if (GET_CODE (x) == PLUS)
10347 rtx base, index;
10349 /* For REG+REG, the base register should be in XEXP (x, 1),
10350 so check it first. */
10351 if (REG_P (XEXP (x, 1))
10352 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 1))
10353 : REG_OK_FOR_BASE_P (XEXP (x, 1))))
10354 base = XEXP (x, 1), index = XEXP (x, 0);
10355 else if (REG_P (XEXP (x, 0))
10356 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10357 : REG_OK_FOR_BASE_P (XEXP (x, 0))))
10358 base = XEXP (x, 0), index = XEXP (x, 1);
10359 else
10360 return false;
10362 if (GET_CODE (index) == CONST_INT)
10364 if (INT_5_BITS (index))
10365 return true;
10367 /* When INT14_OK_STRICT is false, a secondary reload is needed
10368 to adjust the displacement of SImode and DImode floating point
10369 instructions but this may fail when the register also needs
10370 reloading. So, we return false when STRICT is true. We
10371 also reject long displacements for float mode addresses since
10372 the majority of accesses will use floating point instructions
10373 that don't support 14-bit offsets. */
10374 if (!INT14_OK_STRICT
10375 && (strict || !(reload_in_progress || reload_completed))
10376 && mode != QImode
10377 && mode != HImode)
10378 return false;
10380 return base14_operand (index, mode);
10383 if (!TARGET_DISABLE_INDEXING
10384 /* Only accept the "canonical" INDEX+BASE operand order
10385 on targets with non-equivalent space registers. */
10386 && (TARGET_NO_SPACE_REGS
10387 ? REG_P (index)
10388 : (base == XEXP (x, 1) && REG_P (index)
10389 && (reload_completed
10390 || (reload_in_progress && HARD_REGISTER_P (base))
10391 || REG_POINTER (base))
10392 && (reload_completed
10393 || (reload_in_progress && HARD_REGISTER_P (index))
10394 || !REG_POINTER (index))))
10395 && MODE_OK_FOR_UNSCALED_INDEXING_P (mode)
10396 && (strict ? STRICT_REG_OK_FOR_INDEX_P (index)
10397 : REG_OK_FOR_INDEX_P (index))
10398 && borx_reg_operand (base, Pmode)
10399 && borx_reg_operand (index, Pmode))
10400 return true;
10402 if (!TARGET_DISABLE_INDEXING
10403 && GET_CODE (index) == MULT
10404 && MODE_OK_FOR_SCALED_INDEXING_P (mode)
10405 && REG_P (XEXP (index, 0))
10406 && GET_MODE (XEXP (index, 0)) == Pmode
10407 && (strict ? STRICT_REG_OK_FOR_INDEX_P (XEXP (index, 0))
10408 : REG_OK_FOR_INDEX_P (XEXP (index, 0)))
10409 && GET_CODE (XEXP (index, 1)) == CONST_INT
10410 && INTVAL (XEXP (index, 1))
10411 == (HOST_WIDE_INT) GET_MODE_SIZE (mode)
10412 && borx_reg_operand (base, Pmode))
10413 return true;
10415 return false;
10418 if (GET_CODE (x) == LO_SUM)
10420 rtx y = XEXP (x, 0);
10422 if (GET_CODE (y) == SUBREG)
10423 y = SUBREG_REG (y);
10425 if (REG_P (y)
10426 && (strict ? STRICT_REG_OK_FOR_BASE_P (y)
10427 : REG_OK_FOR_BASE_P (y)))
10429 /* Needed for -fPIC */
10430 if (mode == Pmode
10431 && GET_CODE (XEXP (x, 1)) == UNSPEC)
10432 return true;
10434 if (!INT14_OK_STRICT
10435 && (strict || !(reload_in_progress || reload_completed))
10436 && mode != QImode
10437 && mode != HImode)
10438 return false;
10440 if (CONSTANT_P (XEXP (x, 1)))
10441 return true;
10443 return false;
10446 if (GET_CODE (x) == CONST_INT && INT_5_BITS (x))
10447 return true;
10449 return false;
10452 /* Look for machine dependent ways to make the invalid address AD a
10453 valid address.
10455 For the PA, transform:
10457 memory(X + <large int>)
10459 into:
10461 if (<large int> & mask) >= 16
10462 Y = (<large int> & ~mask) + mask + 1 Round up.
10463 else
10464 Y = (<large int> & ~mask) Round down.
10465 Z = X + Y
10466 memory (Z + (<large int> - Y));
10468 This makes reload inheritance and reload_cse work better since Z
10469 can be reused.
10471 There may be more opportunities to improve code with this hook. */
10474 pa_legitimize_reload_address (rtx ad, machine_mode mode,
10475 int opnum, int type,
10476 int ind_levels ATTRIBUTE_UNUSED)
10478 long offset, newoffset, mask;
10479 rtx new_rtx, temp = NULL_RTX;
10481 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
10482 && !INT14_OK_STRICT ? 0x1f : 0x3fff);
10484 if (optimize && GET_CODE (ad) == PLUS)
10485 temp = simplify_binary_operation (PLUS, Pmode,
10486 XEXP (ad, 0), XEXP (ad, 1));
10488 new_rtx = temp ? temp : ad;
10490 if (optimize
10491 && GET_CODE (new_rtx) == PLUS
10492 && GET_CODE (XEXP (new_rtx, 0)) == REG
10493 && GET_CODE (XEXP (new_rtx, 1)) == CONST_INT)
10495 offset = INTVAL (XEXP ((new_rtx), 1));
10497 /* Choose rounding direction. Round up if we are >= halfway. */
10498 if ((offset & mask) >= ((mask + 1) / 2))
10499 newoffset = (offset & ~mask) + mask + 1;
10500 else
10501 newoffset = offset & ~mask;
10503 /* Ensure that long displacements are aligned. */
10504 if (mask == 0x3fff
10505 && (GET_MODE_CLASS (mode) == MODE_FLOAT
10506 || (TARGET_64BIT && (mode) == DImode)))
10507 newoffset &= ~(GET_MODE_SIZE (mode) - 1);
10509 if (newoffset != 0 && VAL_14_BITS_P (newoffset))
10511 temp = gen_rtx_PLUS (Pmode, XEXP (new_rtx, 0),
10512 GEN_INT (newoffset));
10513 ad = gen_rtx_PLUS (Pmode, temp, GEN_INT (offset - newoffset));
10514 push_reload (XEXP (ad, 0), 0, &XEXP (ad, 0), 0,
10515 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
10516 opnum, (enum reload_type) type);
10517 return ad;
10521 return NULL_RTX;
10524 /* Output address vector. */
10526 void
10527 pa_output_addr_vec (rtx lab, rtx body)
10529 int idx, vlen = XVECLEN (body, 0);
10531 targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10532 if (TARGET_GAS)
10533 fputs ("\t.begin_brtab\n", asm_out_file);
10534 for (idx = 0; idx < vlen; idx++)
10536 ASM_OUTPUT_ADDR_VEC_ELT
10537 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
10539 if (TARGET_GAS)
10540 fputs ("\t.end_brtab\n", asm_out_file);
10543 /* Output address difference vector. */
10545 void
10546 pa_output_addr_diff_vec (rtx lab, rtx body)
10548 rtx base = XEXP (XEXP (body, 0), 0);
10549 int idx, vlen = XVECLEN (body, 1);
10551 targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10552 if (TARGET_GAS)
10553 fputs ("\t.begin_brtab\n", asm_out_file);
10554 for (idx = 0; idx < vlen; idx++)
10556 ASM_OUTPUT_ADDR_DIFF_ELT
10557 (asm_out_file,
10558 body,
10559 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
10560 CODE_LABEL_NUMBER (base));
10562 if (TARGET_GAS)
10563 fputs ("\t.end_brtab\n", asm_out_file);
10566 #include "gt-pa.h"