* config/pa/pa.c (pa_output_global_address): Handle LABEL_REF plus
[official-gcc.git] / gcc / config / pa / pa.c
blob6c7c3f69223fadf449e014c7c49dec69923c2ebf
1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992-2015 Free Software Foundation, Inc.
3 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "regs.h"
27 #include "hard-reg-set.h"
28 #include "insn-config.h"
29 #include "conditions.h"
30 #include "insn-attr.h"
31 #include "flags.h"
32 #include "input.h"
33 #include "alias.h"
34 #include "symtab.h"
35 #include "tree.h"
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "stringpool.h"
39 #include "varasm.h"
40 #include "calls.h"
41 #include "output.h"
42 #include "dbxout.h"
43 #include "except.h"
44 #include "function.h"
45 #include "expmed.h"
46 #include "dojump.h"
47 #include "explow.h"
48 #include "emit-rtl.h"
49 #include "stmt.h"
50 #include "expr.h"
51 #include "insn-codes.h"
52 #include "optabs.h"
53 #include "reload.h"
54 #include "diagnostic-core.h"
55 #include "recog.h"
56 #include "predict.h"
57 #include "tm_p.h"
58 #include "target.h"
59 #include "common/common-target.h"
60 #include "target-def.h"
61 #include "langhooks.h"
62 #include "dominance.h"
63 #include "cfg.h"
64 #include "cfgrtl.h"
65 #include "cfganal.h"
66 #include "lcm.h"
67 #include "cfgbuild.h"
68 #include "cfgcleanup.h"
69 #include "basic-block.h"
70 #include "df.h"
71 #include "opts.h"
72 #include "builtins.h"
74 /* Return nonzero if there is a bypass for the output of
75 OUT_INSN and the fp store IN_INSN. */
76 int
77 pa_fpstore_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
79 machine_mode store_mode;
80 machine_mode other_mode;
81 rtx set;
83 if (recog_memoized (in_insn) < 0
84 || (get_attr_type (in_insn) != TYPE_FPSTORE
85 && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
86 || recog_memoized (out_insn) < 0)
87 return 0;
89 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
91 set = single_set (out_insn);
92 if (!set)
93 return 0;
95 other_mode = GET_MODE (SET_SRC (set));
97 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
101 #ifndef DO_FRAME_NOTES
102 #ifdef INCOMING_RETURN_ADDR_RTX
103 #define DO_FRAME_NOTES 1
104 #else
105 #define DO_FRAME_NOTES 0
106 #endif
107 #endif
109 static void pa_option_override (void);
110 static void copy_reg_pointer (rtx, rtx);
111 static void fix_range (const char *);
112 static int hppa_register_move_cost (machine_mode mode, reg_class_t,
113 reg_class_t);
114 static int hppa_address_cost (rtx, machine_mode mode, addr_space_t, bool);
115 static bool hppa_rtx_costs (rtx, int, int, int, int *, bool);
116 static inline rtx force_mode (machine_mode, rtx);
117 static void pa_reorg (void);
118 static void pa_combine_instructions (void);
119 static int pa_can_combine_p (rtx_insn *, rtx_insn *, rtx_insn *, int, rtx,
120 rtx, rtx);
121 static bool forward_branch_p (rtx_insn *);
122 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
123 static void compute_zdepdi_operands (unsigned HOST_WIDE_INT, unsigned *);
124 static int compute_movmem_length (rtx_insn *);
125 static int compute_clrmem_length (rtx_insn *);
126 static bool pa_assemble_integer (rtx, unsigned int, int);
127 static void remove_useless_addtr_insns (int);
128 static void store_reg (int, HOST_WIDE_INT, int);
129 static void store_reg_modify (int, int, HOST_WIDE_INT);
130 static void load_reg (int, HOST_WIDE_INT, int);
131 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
132 static rtx pa_function_value (const_tree, const_tree, bool);
133 static rtx pa_libcall_value (machine_mode, const_rtx);
134 static bool pa_function_value_regno_p (const unsigned int);
135 static void pa_output_function_prologue (FILE *, HOST_WIDE_INT);
136 static void update_total_code_bytes (unsigned int);
137 static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT);
138 static int pa_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
139 static int pa_adjust_priority (rtx_insn *, int);
140 static int pa_issue_rate (void);
141 static int pa_reloc_rw_mask (void);
142 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
143 static section *pa_som_tm_clone_table_section (void) ATTRIBUTE_UNUSED;
144 static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
145 ATTRIBUTE_UNUSED;
146 static void pa_encode_section_info (tree, rtx, int);
147 static const char *pa_strip_name_encoding (const char *);
148 static bool pa_function_ok_for_sibcall (tree, tree);
149 static void pa_globalize_label (FILE *, const char *)
150 ATTRIBUTE_UNUSED;
151 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
152 HOST_WIDE_INT, tree);
153 #if !defined(USE_COLLECT2)
154 static void pa_asm_out_constructor (rtx, int);
155 static void pa_asm_out_destructor (rtx, int);
156 #endif
157 static void pa_init_builtins (void);
158 static rtx pa_expand_builtin (tree, rtx, rtx, machine_mode mode, int);
159 static rtx hppa_builtin_saveregs (void);
160 static void hppa_va_start (tree, rtx);
161 static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
162 static bool pa_scalar_mode_supported_p (machine_mode);
163 static bool pa_commutative_p (const_rtx x, int outer_code);
164 static void copy_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
165 static int length_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
166 static rtx hppa_legitimize_address (rtx, rtx, machine_mode);
167 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
168 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
169 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
170 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
171 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
172 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
173 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
174 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
175 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
176 static void output_deferred_plabels (void);
177 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
178 #ifdef ASM_OUTPUT_EXTERNAL_REAL
179 static void pa_hpux_file_end (void);
180 #endif
181 static void pa_init_libfuncs (void);
182 static rtx pa_struct_value_rtx (tree, int);
183 static bool pa_pass_by_reference (cumulative_args_t, machine_mode,
184 const_tree, bool);
185 static int pa_arg_partial_bytes (cumulative_args_t, machine_mode,
186 tree, bool);
187 static void pa_function_arg_advance (cumulative_args_t, machine_mode,
188 const_tree, bool);
189 static rtx pa_function_arg (cumulative_args_t, machine_mode,
190 const_tree, bool);
191 static unsigned int pa_function_arg_boundary (machine_mode, const_tree);
192 static struct machine_function * pa_init_machine_status (void);
193 static reg_class_t pa_secondary_reload (bool, rtx, reg_class_t,
194 machine_mode,
195 secondary_reload_info *);
196 static void pa_extra_live_on_entry (bitmap);
197 static machine_mode pa_promote_function_mode (const_tree,
198 machine_mode, int *,
199 const_tree, int);
201 static void pa_asm_trampoline_template (FILE *);
202 static void pa_trampoline_init (rtx, tree, rtx);
203 static rtx pa_trampoline_adjust_address (rtx);
204 static rtx pa_delegitimize_address (rtx);
205 static bool pa_print_operand_punct_valid_p (unsigned char);
206 static rtx pa_internal_arg_pointer (void);
207 static bool pa_can_eliminate (const int, const int);
208 static void pa_conditional_register_usage (void);
209 static machine_mode pa_c_mode_for_suffix (char);
210 static section *pa_function_section (tree, enum node_frequency, bool, bool);
211 static bool pa_cannot_force_const_mem (machine_mode, rtx);
212 static bool pa_legitimate_constant_p (machine_mode, rtx);
213 static unsigned int pa_section_type_flags (tree, const char *, int);
214 static bool pa_legitimate_address_p (machine_mode, rtx, bool);
216 /* The following extra sections are only used for SOM. */
217 static GTY(()) section *som_readonly_data_section;
218 static GTY(()) section *som_one_only_readonly_data_section;
219 static GTY(()) section *som_one_only_data_section;
220 static GTY(()) section *som_tm_clone_table_section;
222 /* Counts for the number of callee-saved general and floating point
223 registers which were saved by the current function's prologue. */
224 static int gr_saved, fr_saved;
226 /* Boolean indicating whether the return pointer was saved by the
227 current function's prologue. */
228 static bool rp_saved;
230 static rtx find_addr_reg (rtx);
232 /* Keep track of the number of bytes we have output in the CODE subspace
233 during this compilation so we'll know when to emit inline long-calls. */
234 unsigned long total_code_bytes;
236 /* The last address of the previous function plus the number of bytes in
237 associated thunks that have been output. This is used to determine if
238 a thunk can use an IA-relative branch to reach its target function. */
239 static unsigned int last_address;
241 /* Variables to handle plabels that we discover are necessary at assembly
242 output time. They are output after the current function. */
243 struct GTY(()) deferred_plabel
245 rtx internal_label;
246 rtx symbol;
248 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
249 deferred_plabels;
250 static size_t n_deferred_plabels = 0;
252 /* Initialize the GCC target structure. */
254 #undef TARGET_OPTION_OVERRIDE
255 #define TARGET_OPTION_OVERRIDE pa_option_override
257 #undef TARGET_ASM_ALIGNED_HI_OP
258 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
259 #undef TARGET_ASM_ALIGNED_SI_OP
260 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
261 #undef TARGET_ASM_ALIGNED_DI_OP
262 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
263 #undef TARGET_ASM_UNALIGNED_HI_OP
264 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
265 #undef TARGET_ASM_UNALIGNED_SI_OP
266 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
267 #undef TARGET_ASM_UNALIGNED_DI_OP
268 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
269 #undef TARGET_ASM_INTEGER
270 #define TARGET_ASM_INTEGER pa_assemble_integer
272 #undef TARGET_ASM_FUNCTION_PROLOGUE
273 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
274 #undef TARGET_ASM_FUNCTION_EPILOGUE
275 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
277 #undef TARGET_FUNCTION_VALUE
278 #define TARGET_FUNCTION_VALUE pa_function_value
279 #undef TARGET_LIBCALL_VALUE
280 #define TARGET_LIBCALL_VALUE pa_libcall_value
281 #undef TARGET_FUNCTION_VALUE_REGNO_P
282 #define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p
284 #undef TARGET_LEGITIMIZE_ADDRESS
285 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
287 #undef TARGET_SCHED_ADJUST_COST
288 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
289 #undef TARGET_SCHED_ADJUST_PRIORITY
290 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
291 #undef TARGET_SCHED_ISSUE_RATE
292 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
294 #undef TARGET_ENCODE_SECTION_INFO
295 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
296 #undef TARGET_STRIP_NAME_ENCODING
297 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
299 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
300 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
302 #undef TARGET_COMMUTATIVE_P
303 #define TARGET_COMMUTATIVE_P pa_commutative_p
305 #undef TARGET_ASM_OUTPUT_MI_THUNK
306 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
307 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
308 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
310 #undef TARGET_ASM_FILE_END
311 #ifdef ASM_OUTPUT_EXTERNAL_REAL
312 #define TARGET_ASM_FILE_END pa_hpux_file_end
313 #else
314 #define TARGET_ASM_FILE_END output_deferred_plabels
315 #endif
317 #undef TARGET_ASM_RELOC_RW_MASK
318 #define TARGET_ASM_RELOC_RW_MASK pa_reloc_rw_mask
320 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
321 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p
323 #if !defined(USE_COLLECT2)
324 #undef TARGET_ASM_CONSTRUCTOR
325 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
326 #undef TARGET_ASM_DESTRUCTOR
327 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
328 #endif
330 #undef TARGET_INIT_BUILTINS
331 #define TARGET_INIT_BUILTINS pa_init_builtins
333 #undef TARGET_EXPAND_BUILTIN
334 #define TARGET_EXPAND_BUILTIN pa_expand_builtin
336 #undef TARGET_REGISTER_MOVE_COST
337 #define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
338 #undef TARGET_RTX_COSTS
339 #define TARGET_RTX_COSTS hppa_rtx_costs
340 #undef TARGET_ADDRESS_COST
341 #define TARGET_ADDRESS_COST hppa_address_cost
343 #undef TARGET_MACHINE_DEPENDENT_REORG
344 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
346 #undef TARGET_INIT_LIBFUNCS
347 #define TARGET_INIT_LIBFUNCS pa_init_libfuncs
349 #undef TARGET_PROMOTE_FUNCTION_MODE
350 #define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
351 #undef TARGET_PROMOTE_PROTOTYPES
352 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
354 #undef TARGET_STRUCT_VALUE_RTX
355 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
356 #undef TARGET_RETURN_IN_MEMORY
357 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
358 #undef TARGET_MUST_PASS_IN_STACK
359 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
360 #undef TARGET_PASS_BY_REFERENCE
361 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
362 #undef TARGET_CALLEE_COPIES
363 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true
364 #undef TARGET_ARG_PARTIAL_BYTES
365 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
366 #undef TARGET_FUNCTION_ARG
367 #define TARGET_FUNCTION_ARG pa_function_arg
368 #undef TARGET_FUNCTION_ARG_ADVANCE
369 #define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance
370 #undef TARGET_FUNCTION_ARG_BOUNDARY
371 #define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary
373 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
374 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
375 #undef TARGET_EXPAND_BUILTIN_VA_START
376 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
377 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
378 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
380 #undef TARGET_SCALAR_MODE_SUPPORTED_P
381 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
383 #undef TARGET_CANNOT_FORCE_CONST_MEM
384 #define TARGET_CANNOT_FORCE_CONST_MEM pa_cannot_force_const_mem
386 #undef TARGET_SECONDARY_RELOAD
387 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
389 #undef TARGET_EXTRA_LIVE_ON_ENTRY
390 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
392 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
393 #define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
394 #undef TARGET_TRAMPOLINE_INIT
395 #define TARGET_TRAMPOLINE_INIT pa_trampoline_init
396 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
397 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
398 #undef TARGET_DELEGITIMIZE_ADDRESS
399 #define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
400 #undef TARGET_INTERNAL_ARG_POINTER
401 #define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer
402 #undef TARGET_CAN_ELIMINATE
403 #define TARGET_CAN_ELIMINATE pa_can_eliminate
404 #undef TARGET_CONDITIONAL_REGISTER_USAGE
405 #define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage
406 #undef TARGET_C_MODE_FOR_SUFFIX
407 #define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix
408 #undef TARGET_ASM_FUNCTION_SECTION
409 #define TARGET_ASM_FUNCTION_SECTION pa_function_section
411 #undef TARGET_LEGITIMATE_CONSTANT_P
412 #define TARGET_LEGITIMATE_CONSTANT_P pa_legitimate_constant_p
413 #undef TARGET_SECTION_TYPE_FLAGS
414 #define TARGET_SECTION_TYPE_FLAGS pa_section_type_flags
415 #undef TARGET_LEGITIMATE_ADDRESS_P
416 #define TARGET_LEGITIMATE_ADDRESS_P pa_legitimate_address_p
418 struct gcc_target targetm = TARGET_INITIALIZER;
420 /* Parse the -mfixed-range= option string. */
422 static void
423 fix_range (const char *const_str)
425 int i, first, last;
426 char *str, *dash, *comma;
428 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
429 REG2 are either register names or register numbers. The effect
430 of this option is to mark the registers in the range from REG1 to
431 REG2 as ``fixed'' so they won't be used by the compiler. This is
432 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */
434 i = strlen (const_str);
435 str = (char *) alloca (i + 1);
436 memcpy (str, const_str, i + 1);
438 while (1)
440 dash = strchr (str, '-');
441 if (!dash)
443 warning (0, "value of -mfixed-range must have form REG1-REG2");
444 return;
446 *dash = '\0';
448 comma = strchr (dash + 1, ',');
449 if (comma)
450 *comma = '\0';
452 first = decode_reg_name (str);
453 if (first < 0)
455 warning (0, "unknown register name: %s", str);
456 return;
459 last = decode_reg_name (dash + 1);
460 if (last < 0)
462 warning (0, "unknown register name: %s", dash + 1);
463 return;
466 *dash = '-';
468 if (first > last)
470 warning (0, "%s-%s is an empty range", str, dash + 1);
471 return;
474 for (i = first; i <= last; ++i)
475 fixed_regs[i] = call_used_regs[i] = 1;
477 if (!comma)
478 break;
480 *comma = ',';
481 str = comma + 1;
484 /* Check if all floating point registers have been fixed. */
485 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
486 if (!fixed_regs[i])
487 break;
489 if (i > FP_REG_LAST)
490 target_flags |= MASK_DISABLE_FPREGS;
493 /* Implement the TARGET_OPTION_OVERRIDE hook. */
495 static void
496 pa_option_override (void)
498 unsigned int i;
499 cl_deferred_option *opt;
500 vec<cl_deferred_option> *v
501 = (vec<cl_deferred_option> *) pa_deferred_options;
503 if (v)
504 FOR_EACH_VEC_ELT (*v, i, opt)
506 switch (opt->opt_index)
508 case OPT_mfixed_range_:
509 fix_range (opt->arg);
510 break;
512 default:
513 gcc_unreachable ();
517 if (flag_pic && TARGET_PORTABLE_RUNTIME)
519 warning (0, "PIC code generation is not supported in the portable runtime model");
522 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
524 warning (0, "PIC code generation is not compatible with fast indirect calls");
527 if (! TARGET_GAS && write_symbols != NO_DEBUG)
529 warning (0, "-g is only supported when using GAS on this processor,");
530 warning (0, "-g option disabled");
531 write_symbols = NO_DEBUG;
534 /* We only support the "big PIC" model now. And we always generate PIC
535 code when in 64bit mode. */
536 if (flag_pic == 1 || TARGET_64BIT)
537 flag_pic = 2;
539 /* Disable -freorder-blocks-and-partition as we don't support hot and
540 cold partitioning. */
541 if (flag_reorder_blocks_and_partition)
543 inform (input_location,
544 "-freorder-blocks-and-partition does not work "
545 "on this architecture");
546 flag_reorder_blocks_and_partition = 0;
547 flag_reorder_blocks = 1;
550 /* We can't guarantee that .dword is available for 32-bit targets. */
551 if (UNITS_PER_WORD == 4)
552 targetm.asm_out.aligned_op.di = NULL;
554 /* The unaligned ops are only available when using GAS. */
555 if (!TARGET_GAS)
557 targetm.asm_out.unaligned_op.hi = NULL;
558 targetm.asm_out.unaligned_op.si = NULL;
559 targetm.asm_out.unaligned_op.di = NULL;
562 init_machine_status = pa_init_machine_status;
565 enum pa_builtins
567 PA_BUILTIN_COPYSIGNQ,
568 PA_BUILTIN_FABSQ,
569 PA_BUILTIN_INFQ,
570 PA_BUILTIN_HUGE_VALQ,
571 PA_BUILTIN_max
574 static GTY(()) tree pa_builtins[(int) PA_BUILTIN_max];
576 static void
577 pa_init_builtins (void)
579 #ifdef DONT_HAVE_FPUTC_UNLOCKED
581 tree decl = builtin_decl_explicit (BUILT_IN_PUTC_UNLOCKED);
582 set_builtin_decl (BUILT_IN_FPUTC_UNLOCKED, decl,
583 builtin_decl_implicit_p (BUILT_IN_PUTC_UNLOCKED));
585 #endif
586 #if TARGET_HPUX_11
588 tree decl;
590 if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
591 set_user_assembler_name (decl, "_Isfinite");
592 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
593 set_user_assembler_name (decl, "_Isfinitef");
595 #endif
597 if (HPUX_LONG_DOUBLE_LIBRARY)
599 tree decl, ftype;
601 /* Under HPUX, the __float128 type is a synonym for "long double". */
602 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
603 "__float128");
605 /* TFmode support builtins. */
606 ftype = build_function_type_list (long_double_type_node,
607 long_double_type_node,
608 NULL_TREE);
609 decl = add_builtin_function ("__builtin_fabsq", ftype,
610 PA_BUILTIN_FABSQ, BUILT_IN_MD,
611 "_U_Qfabs", NULL_TREE);
612 TREE_READONLY (decl) = 1;
613 pa_builtins[PA_BUILTIN_FABSQ] = decl;
615 ftype = build_function_type_list (long_double_type_node,
616 long_double_type_node,
617 long_double_type_node,
618 NULL_TREE);
619 decl = add_builtin_function ("__builtin_copysignq", ftype,
620 PA_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
621 "_U_Qfcopysign", NULL_TREE);
622 TREE_READONLY (decl) = 1;
623 pa_builtins[PA_BUILTIN_COPYSIGNQ] = decl;
625 ftype = build_function_type_list (long_double_type_node, NULL_TREE);
626 decl = add_builtin_function ("__builtin_infq", ftype,
627 PA_BUILTIN_INFQ, BUILT_IN_MD,
628 NULL, NULL_TREE);
629 pa_builtins[PA_BUILTIN_INFQ] = decl;
631 decl = add_builtin_function ("__builtin_huge_valq", ftype,
632 PA_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
633 NULL, NULL_TREE);
634 pa_builtins[PA_BUILTIN_HUGE_VALQ] = decl;
638 static rtx
639 pa_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
640 machine_mode mode ATTRIBUTE_UNUSED,
641 int ignore ATTRIBUTE_UNUSED)
643 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
644 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
646 switch (fcode)
648 case PA_BUILTIN_FABSQ:
649 case PA_BUILTIN_COPYSIGNQ:
650 return expand_call (exp, target, ignore);
652 case PA_BUILTIN_INFQ:
653 case PA_BUILTIN_HUGE_VALQ:
655 machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
656 REAL_VALUE_TYPE inf;
657 rtx tmp;
659 real_inf (&inf);
660 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, target_mode);
662 tmp = validize_mem (force_const_mem (target_mode, tmp));
664 if (target == 0)
665 target = gen_reg_rtx (target_mode);
667 emit_move_insn (target, tmp);
668 return target;
671 default:
672 gcc_unreachable ();
675 return NULL_RTX;
678 /* Function to init struct machine_function.
679 This will be called, via a pointer variable,
680 from push_function_context. */
682 static struct machine_function *
683 pa_init_machine_status (void)
685 return ggc_cleared_alloc<machine_function> ();
688 /* If FROM is a probable pointer register, mark TO as a probable
689 pointer register with the same pointer alignment as FROM. */
691 static void
692 copy_reg_pointer (rtx to, rtx from)
694 if (REG_POINTER (from))
695 mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
698 /* Return 1 if X contains a symbolic expression. We know these
699 expressions will have one of a few well defined forms, so
700 we need only check those forms. */
702 pa_symbolic_expression_p (rtx x)
705 /* Strip off any HIGH. */
706 if (GET_CODE (x) == HIGH)
707 x = XEXP (x, 0);
709 return symbolic_operand (x, VOIDmode);
712 /* Accept any constant that can be moved in one instruction into a
713 general register. */
715 pa_cint_ok_for_move (HOST_WIDE_INT ival)
717 /* OK if ldo, ldil, or zdepi, can be used. */
718 return (VAL_14_BITS_P (ival)
719 || pa_ldil_cint_p (ival)
720 || pa_zdepi_cint_p (ival));
723 /* True iff ldil can be used to load this CONST_INT. The least
724 significant 11 bits of the value must be zero and the value must
725 not change sign when extended from 32 to 64 bits. */
727 pa_ldil_cint_p (HOST_WIDE_INT ival)
729 HOST_WIDE_INT x = ival & (((HOST_WIDE_INT) -1 << 31) | 0x7ff);
731 return x == 0 || x == ((HOST_WIDE_INT) -1 << 31);
734 /* True iff zdepi can be used to generate this CONST_INT.
735 zdepi first sign extends a 5-bit signed number to a given field
736 length, then places this field anywhere in a zero. */
738 pa_zdepi_cint_p (unsigned HOST_WIDE_INT x)
740 unsigned HOST_WIDE_INT lsb_mask, t;
742 /* This might not be obvious, but it's at least fast.
743 This function is critical; we don't have the time loops would take. */
744 lsb_mask = x & -x;
745 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
746 /* Return true iff t is a power of two. */
747 return ((t & (t - 1)) == 0);
750 /* True iff depi or extru can be used to compute (reg & mask).
751 Accept bit pattern like these:
752 0....01....1
753 1....10....0
754 1..10..01..1 */
756 pa_and_mask_p (unsigned HOST_WIDE_INT mask)
758 mask = ~mask;
759 mask += mask & -mask;
760 return (mask & (mask - 1)) == 0;
763 /* True iff depi can be used to compute (reg | MASK). */
765 pa_ior_mask_p (unsigned HOST_WIDE_INT mask)
767 mask += mask & -mask;
768 return (mask & (mask - 1)) == 0;
771 /* Legitimize PIC addresses. If the address is already
772 position-independent, we return ORIG. Newly generated
773 position-independent addresses go to REG. If we need more
774 than one register, we lose. */
776 static rtx
777 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
779 rtx pic_ref = orig;
781 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
783 /* Labels need special handling. */
784 if (pic_label_operand (orig, mode))
786 rtx_insn *insn;
788 /* We do not want to go through the movXX expanders here since that
789 would create recursion.
791 Nor do we really want to call a generator for a named pattern
792 since that requires multiple patterns if we want to support
793 multiple word sizes.
795 So instead we just emit the raw set, which avoids the movXX
796 expanders completely. */
797 mark_reg_pointer (reg, BITS_PER_UNIT);
798 insn = emit_insn (gen_rtx_SET (reg, orig));
800 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
801 add_reg_note (insn, REG_EQUAL, orig);
803 /* During and after reload, we need to generate a REG_LABEL_OPERAND note
804 and update LABEL_NUSES because this is not done automatically. */
805 if (reload_in_progress || reload_completed)
807 /* Extract LABEL_REF. */
808 if (GET_CODE (orig) == CONST)
809 orig = XEXP (XEXP (orig, 0), 0);
810 /* Extract CODE_LABEL. */
811 orig = XEXP (orig, 0);
812 add_reg_note (insn, REG_LABEL_OPERAND, orig);
813 /* Make sure we have label and not a note. */
814 if (LABEL_P (orig))
815 LABEL_NUSES (orig)++;
817 crtl->uses_pic_offset_table = 1;
818 return reg;
820 if (GET_CODE (orig) == SYMBOL_REF)
822 rtx_insn *insn;
823 rtx tmp_reg;
825 gcc_assert (reg);
827 /* Before reload, allocate a temporary register for the intermediate
828 result. This allows the sequence to be deleted when the final
829 result is unused and the insns are trivially dead. */
830 tmp_reg = ((reload_in_progress || reload_completed)
831 ? reg : gen_reg_rtx (Pmode));
833 if (function_label_operand (orig, VOIDmode))
835 /* Force function label into memory in word mode. */
836 orig = XEXP (force_const_mem (word_mode, orig), 0);
837 /* Load plabel address from DLT. */
838 emit_move_insn (tmp_reg,
839 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
840 gen_rtx_HIGH (word_mode, orig)));
841 pic_ref
842 = gen_const_mem (Pmode,
843 gen_rtx_LO_SUM (Pmode, tmp_reg,
844 gen_rtx_UNSPEC (Pmode,
845 gen_rtvec (1, orig),
846 UNSPEC_DLTIND14R)));
847 emit_move_insn (reg, pic_ref);
848 /* Now load address of function descriptor. */
849 pic_ref = gen_rtx_MEM (Pmode, reg);
851 else
853 /* Load symbol reference from DLT. */
854 emit_move_insn (tmp_reg,
855 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
856 gen_rtx_HIGH (word_mode, orig)));
857 pic_ref
858 = gen_const_mem (Pmode,
859 gen_rtx_LO_SUM (Pmode, tmp_reg,
860 gen_rtx_UNSPEC (Pmode,
861 gen_rtvec (1, orig),
862 UNSPEC_DLTIND14R)));
865 crtl->uses_pic_offset_table = 1;
866 mark_reg_pointer (reg, BITS_PER_UNIT);
867 insn = emit_move_insn (reg, pic_ref);
869 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
870 set_unique_reg_note (insn, REG_EQUAL, orig);
872 return reg;
874 else if (GET_CODE (orig) == CONST)
876 rtx base;
878 if (GET_CODE (XEXP (orig, 0)) == PLUS
879 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
880 return orig;
882 gcc_assert (reg);
883 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
885 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
886 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
887 base == reg ? 0 : reg);
889 if (GET_CODE (orig) == CONST_INT)
891 if (INT_14_BITS (orig))
892 return plus_constant (Pmode, base, INTVAL (orig));
893 orig = force_reg (Pmode, orig);
895 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
896 /* Likewise, should we set special REG_NOTEs here? */
899 return pic_ref;
902 static GTY(()) rtx gen_tls_tga;
904 static rtx
905 gen_tls_get_addr (void)
907 if (!gen_tls_tga)
908 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
909 return gen_tls_tga;
912 static rtx
913 hppa_tls_call (rtx arg)
915 rtx ret;
917 ret = gen_reg_rtx (Pmode);
918 emit_library_call_value (gen_tls_get_addr (), ret,
919 LCT_CONST, Pmode, 1, arg, Pmode);
921 return ret;
924 static rtx
925 legitimize_tls_address (rtx addr)
927 rtx ret, tmp, t1, t2, tp;
928 rtx_insn *insn;
930 /* Currently, we can't handle anything but a SYMBOL_REF. */
931 if (GET_CODE (addr) != SYMBOL_REF)
932 return addr;
934 switch (SYMBOL_REF_TLS_MODEL (addr))
936 case TLS_MODEL_GLOBAL_DYNAMIC:
937 tmp = gen_reg_rtx (Pmode);
938 if (flag_pic)
939 emit_insn (gen_tgd_load_pic (tmp, addr));
940 else
941 emit_insn (gen_tgd_load (tmp, addr));
942 ret = hppa_tls_call (tmp);
943 break;
945 case TLS_MODEL_LOCAL_DYNAMIC:
946 ret = gen_reg_rtx (Pmode);
947 tmp = gen_reg_rtx (Pmode);
948 start_sequence ();
949 if (flag_pic)
950 emit_insn (gen_tld_load_pic (tmp, addr));
951 else
952 emit_insn (gen_tld_load (tmp, addr));
953 t1 = hppa_tls_call (tmp);
954 insn = get_insns ();
955 end_sequence ();
956 t2 = gen_reg_rtx (Pmode);
957 emit_libcall_block (insn, t2, t1,
958 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
959 UNSPEC_TLSLDBASE));
960 emit_insn (gen_tld_offset_load (ret, addr, t2));
961 break;
963 case TLS_MODEL_INITIAL_EXEC:
964 tp = gen_reg_rtx (Pmode);
965 tmp = gen_reg_rtx (Pmode);
966 ret = gen_reg_rtx (Pmode);
967 emit_insn (gen_tp_load (tp));
968 if (flag_pic)
969 emit_insn (gen_tie_load_pic (tmp, addr));
970 else
971 emit_insn (gen_tie_load (tmp, addr));
972 emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
973 break;
975 case TLS_MODEL_LOCAL_EXEC:
976 tp = gen_reg_rtx (Pmode);
977 ret = gen_reg_rtx (Pmode);
978 emit_insn (gen_tp_load (tp));
979 emit_insn (gen_tle_load (ret, addr, tp));
980 break;
982 default:
983 gcc_unreachable ();
986 return ret;
989 /* Helper for hppa_legitimize_address. Given X, return true if it
990 is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8.
992 This respectively represent canonical shift-add rtxs or scaled
993 memory addresses. */
994 static bool
995 mem_shadd_or_shadd_rtx_p (rtx x)
997 return ((GET_CODE (x) == ASHIFT
998 || GET_CODE (x) == MULT)
999 && GET_CODE (XEXP (x, 1)) == CONST_INT
1000 && ((GET_CODE (x) == ASHIFT
1001 && pa_shadd_constant_p (INTVAL (XEXP (x, 1))))
1002 || (GET_CODE (x) == MULT
1003 && pa_mem_shadd_constant_p (INTVAL (XEXP (x, 1))))));
1006 /* Try machine-dependent ways of modifying an illegitimate address
1007 to be legitimate. If we find one, return the new, valid address.
1008 This macro is used in only one place: `memory_address' in explow.c.
1010 OLDX is the address as it was before break_out_memory_refs was called.
1011 In some cases it is useful to look at this to decide what needs to be done.
1013 It is always safe for this macro to do nothing. It exists to recognize
1014 opportunities to optimize the output.
1016 For the PA, transform:
1018 memory(X + <large int>)
1020 into:
1022 if (<large int> & mask) >= 16
1023 Y = (<large int> & ~mask) + mask + 1 Round up.
1024 else
1025 Y = (<large int> & ~mask) Round down.
1026 Z = X + Y
1027 memory (Z + (<large int> - Y));
1029 This is for CSE to find several similar references, and only use one Z.
1031 X can either be a SYMBOL_REF or REG, but because combine cannot
1032 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
1033 D will not fit in 14 bits.
1035 MODE_FLOAT references allow displacements which fit in 5 bits, so use
1036 0x1f as the mask.
1038 MODE_INT references allow displacements which fit in 14 bits, so use
1039 0x3fff as the mask.
1041 This relies on the fact that most mode MODE_FLOAT references will use FP
1042 registers and most mode MODE_INT references will use integer registers.
1043 (In the rare case of an FP register used in an integer MODE, we depend
1044 on secondary reloads to clean things up.)
1047 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
1048 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
1049 addressing modes to be used).
1051 Note that the addresses passed into hppa_legitimize_address always
1052 come from a MEM, so we only have to match the MULT form on incoming
1053 addresses. But to be future proof we also match the ASHIFT form.
1055 However, this routine always places those shift-add sequences into
1056 registers, so we have to generate the ASHIFT form as our output.
1058 Put X and Z into registers. Then put the entire expression into
1059 a register. */
1062 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1063 machine_mode mode)
1065 rtx orig = x;
1067 /* We need to canonicalize the order of operands in unscaled indexed
1068 addresses since the code that checks if an address is valid doesn't
1069 always try both orders. */
1070 if (!TARGET_NO_SPACE_REGS
1071 && GET_CODE (x) == PLUS
1072 && GET_MODE (x) == Pmode
1073 && REG_P (XEXP (x, 0))
1074 && REG_P (XEXP (x, 1))
1075 && REG_POINTER (XEXP (x, 0))
1076 && !REG_POINTER (XEXP (x, 1)))
1077 return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
1079 if (tls_referenced_p (x))
1080 return legitimize_tls_address (x);
1081 else if (flag_pic)
1082 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
1084 /* Strip off CONST. */
1085 if (GET_CODE (x) == CONST)
1086 x = XEXP (x, 0);
1088 /* Special case. Get the SYMBOL_REF into a register and use indexing.
1089 That should always be safe. */
1090 if (GET_CODE (x) == PLUS
1091 && GET_CODE (XEXP (x, 0)) == REG
1092 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
1094 rtx reg = force_reg (Pmode, XEXP (x, 1));
1095 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
1098 /* Note we must reject symbols which represent function addresses
1099 since the assembler/linker can't handle arithmetic on plabels. */
1100 if (GET_CODE (x) == PLUS
1101 && GET_CODE (XEXP (x, 1)) == CONST_INT
1102 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
1103 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
1104 || GET_CODE (XEXP (x, 0)) == REG))
1106 rtx int_part, ptr_reg;
1107 int newoffset;
1108 int offset = INTVAL (XEXP (x, 1));
1109 int mask;
1111 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
1112 && !INT14_OK_STRICT ? 0x1f : 0x3fff);
1114 /* Choose which way to round the offset. Round up if we
1115 are >= halfway to the next boundary. */
1116 if ((offset & mask) >= ((mask + 1) / 2))
1117 newoffset = (offset & ~ mask) + mask + 1;
1118 else
1119 newoffset = (offset & ~ mask);
1121 /* If the newoffset will not fit in 14 bits (ldo), then
1122 handling this would take 4 or 5 instructions (2 to load
1123 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1124 add the new offset and the SYMBOL_REF.) Combine can
1125 not handle 4->2 or 5->2 combinations, so do not create
1126 them. */
1127 if (! VAL_14_BITS_P (newoffset)
1128 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1130 rtx const_part = plus_constant (Pmode, XEXP (x, 0), newoffset);
1131 rtx tmp_reg
1132 = force_reg (Pmode,
1133 gen_rtx_HIGH (Pmode, const_part));
1134 ptr_reg
1135 = force_reg (Pmode,
1136 gen_rtx_LO_SUM (Pmode,
1137 tmp_reg, const_part));
1139 else
1141 if (! VAL_14_BITS_P (newoffset))
1142 int_part = force_reg (Pmode, GEN_INT (newoffset));
1143 else
1144 int_part = GEN_INT (newoffset);
1146 ptr_reg = force_reg (Pmode,
1147 gen_rtx_PLUS (Pmode,
1148 force_reg (Pmode, XEXP (x, 0)),
1149 int_part));
1151 return plus_constant (Pmode, ptr_reg, offset - newoffset);
1154 /* Handle (plus (mult (a) (mem_shadd_constant)) (b)). */
1156 if (GET_CODE (x) == PLUS
1157 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1158 && (OBJECT_P (XEXP (x, 1))
1159 || GET_CODE (XEXP (x, 1)) == SUBREG)
1160 && GET_CODE (XEXP (x, 1)) != CONST)
1162 /* If we were given a MULT, we must fix the constant
1163 as we're going to create the ASHIFT form. */
1164 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1165 if (GET_CODE (XEXP (x, 0)) == MULT)
1166 shift_val = exact_log2 (shift_val);
1168 rtx reg1, reg2;
1169 reg1 = XEXP (x, 1);
1170 if (GET_CODE (reg1) != REG)
1171 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1173 reg2 = XEXP (XEXP (x, 0), 0);
1174 if (GET_CODE (reg2) != REG)
1175 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1177 return force_reg (Pmode,
1178 gen_rtx_PLUS (Pmode,
1179 gen_rtx_ASHIFT (Pmode, reg2,
1180 GEN_INT (shift_val)),
1181 reg1));
1184 /* Similarly for (plus (plus (mult (a) (mem_shadd_constant)) (b)) (c)).
1186 Only do so for floating point modes since this is more speculative
1187 and we lose if it's an integer store. */
1188 if (GET_CODE (x) == PLUS
1189 && GET_CODE (XEXP (x, 0)) == PLUS
1190 && mem_shadd_or_shadd_rtx_p (XEXP (XEXP (x, 0), 0))
1191 && (mode == SFmode || mode == DFmode))
1193 int shift_val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
1195 /* If we were given a MULT, we must fix the constant
1196 as we're going to create the ASHIFT form. */
1197 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
1198 shift_val = exact_log2 (shift_val);
1200 /* Try and figure out what to use as a base register. */
1201 rtx reg1, reg2, base, idx;
1203 reg1 = XEXP (XEXP (x, 0), 1);
1204 reg2 = XEXP (x, 1);
1205 base = NULL_RTX;
1206 idx = NULL_RTX;
1208 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1209 then pa_emit_move_sequence will turn on REG_POINTER so we'll know
1210 it's a base register below. */
1211 if (GET_CODE (reg1) != REG)
1212 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1214 if (GET_CODE (reg2) != REG)
1215 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1217 /* Figure out what the base and index are. */
1219 if (GET_CODE (reg1) == REG
1220 && REG_POINTER (reg1))
1222 base = reg1;
1223 idx = gen_rtx_PLUS (Pmode,
1224 gen_rtx_ASHIFT (Pmode,
1225 XEXP (XEXP (XEXP (x, 0), 0), 0),
1226 GEN_INT (shift_val)),
1227 XEXP (x, 1));
1229 else if (GET_CODE (reg2) == REG
1230 && REG_POINTER (reg2))
1232 base = reg2;
1233 idx = XEXP (x, 0);
1236 if (base == 0)
1237 return orig;
1239 /* If the index adds a large constant, try to scale the
1240 constant so that it can be loaded with only one insn. */
1241 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1242 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1243 / INTVAL (XEXP (XEXP (idx, 0), 1)))
1244 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1246 /* Divide the CONST_INT by the scale factor, then add it to A. */
1247 int val = INTVAL (XEXP (idx, 1));
1248 val /= (1 << shift_val);
1250 reg1 = XEXP (XEXP (idx, 0), 0);
1251 if (GET_CODE (reg1) != REG)
1252 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1254 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1256 /* We can now generate a simple scaled indexed address. */
1257 return
1258 force_reg
1259 (Pmode, gen_rtx_PLUS (Pmode,
1260 gen_rtx_ASHIFT (Pmode, reg1,
1261 GEN_INT (shift_val)),
1262 base));
1265 /* If B + C is still a valid base register, then add them. */
1266 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1267 && INTVAL (XEXP (idx, 1)) <= 4096
1268 && INTVAL (XEXP (idx, 1)) >= -4096)
1270 rtx reg1, reg2;
1272 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1274 reg2 = XEXP (XEXP (idx, 0), 0);
1275 if (GET_CODE (reg2) != CONST_INT)
1276 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1278 return force_reg (Pmode,
1279 gen_rtx_PLUS (Pmode,
1280 gen_rtx_ASHIFT (Pmode, reg2,
1281 GEN_INT (shift_val)),
1282 reg1));
1285 /* Get the index into a register, then add the base + index and
1286 return a register holding the result. */
1288 /* First get A into a register. */
1289 reg1 = XEXP (XEXP (idx, 0), 0);
1290 if (GET_CODE (reg1) != REG)
1291 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1293 /* And get B into a register. */
1294 reg2 = XEXP (idx, 1);
1295 if (GET_CODE (reg2) != REG)
1296 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1298 reg1 = force_reg (Pmode,
1299 gen_rtx_PLUS (Pmode,
1300 gen_rtx_ASHIFT (Pmode, reg1,
1301 GEN_INT (shift_val)),
1302 reg2));
1304 /* Add the result to our base register and return. */
1305 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1309 /* Uh-oh. We might have an address for x[n-100000]. This needs
1310 special handling to avoid creating an indexed memory address
1311 with x-100000 as the base.
1313 If the constant part is small enough, then it's still safe because
1314 there is a guard page at the beginning and end of the data segment.
1316 Scaled references are common enough that we want to try and rearrange the
1317 terms so that we can use indexing for these addresses too. Only
1318 do the optimization for floatint point modes. */
1320 if (GET_CODE (x) == PLUS
1321 && pa_symbolic_expression_p (XEXP (x, 1)))
1323 /* Ugly. We modify things here so that the address offset specified
1324 by the index expression is computed first, then added to x to form
1325 the entire address. */
1327 rtx regx1, regx2, regy1, regy2, y;
1329 /* Strip off any CONST. */
1330 y = XEXP (x, 1);
1331 if (GET_CODE (y) == CONST)
1332 y = XEXP (y, 0);
1334 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1336 /* See if this looks like
1337 (plus (mult (reg) (mem_shadd_const))
1338 (const (plus (symbol_ref) (const_int))))
1340 Where const_int is small. In that case the const
1341 expression is a valid pointer for indexing.
1343 If const_int is big, but can be divided evenly by shadd_const
1344 and added to (reg). This allows more scaled indexed addresses. */
1345 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1346 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1347 && GET_CODE (XEXP (y, 1)) == CONST_INT
1348 && INTVAL (XEXP (y, 1)) >= -4096
1349 && INTVAL (XEXP (y, 1)) <= 4095)
1351 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1353 /* If we were given a MULT, we must fix the constant
1354 as we're going to create the ASHIFT form. */
1355 if (GET_CODE (XEXP (x, 0)) == MULT)
1356 shift_val = exact_log2 (shift_val);
1358 rtx reg1, reg2;
1360 reg1 = XEXP (x, 1);
1361 if (GET_CODE (reg1) != REG)
1362 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1364 reg2 = XEXP (XEXP (x, 0), 0);
1365 if (GET_CODE (reg2) != REG)
1366 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1368 return
1369 force_reg (Pmode,
1370 gen_rtx_PLUS (Pmode,
1371 gen_rtx_ASHIFT (Pmode,
1372 reg2,
1373 GEN_INT (shift_val)),
1374 reg1));
1376 else if ((mode == DFmode || mode == SFmode)
1377 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1378 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1379 && GET_CODE (XEXP (y, 1)) == CONST_INT
1380 && INTVAL (XEXP (y, 1)) % (1 << INTVAL (XEXP (XEXP (x, 0), 1))) == 0)
1382 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1384 /* If we were given a MULT, we must fix the constant
1385 as we're going to create the ASHIFT form. */
1386 if (GET_CODE (XEXP (x, 0)) == MULT)
1387 shift_val = exact_log2 (shift_val);
1389 regx1
1390 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1391 / INTVAL (XEXP (XEXP (x, 0), 1))));
1392 regx2 = XEXP (XEXP (x, 0), 0);
1393 if (GET_CODE (regx2) != REG)
1394 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1395 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1396 regx2, regx1));
1397 return
1398 force_reg (Pmode,
1399 gen_rtx_PLUS (Pmode,
1400 gen_rtx_ASHIFT (Pmode, regx2,
1401 GEN_INT (shift_val)),
1402 force_reg (Pmode, XEXP (y, 0))));
1404 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1405 && INTVAL (XEXP (y, 1)) >= -4096
1406 && INTVAL (XEXP (y, 1)) <= 4095)
1408 /* This is safe because of the guard page at the
1409 beginning and end of the data space. Just
1410 return the original address. */
1411 return orig;
1413 else
1415 /* Doesn't look like one we can optimize. */
1416 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1417 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1418 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1419 regx1 = force_reg (Pmode,
1420 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1421 regx1, regy2));
1422 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1427 return orig;
1430 /* Implement the TARGET_REGISTER_MOVE_COST hook.
1432 Compute extra cost of moving data between one register class
1433 and another.
1435 Make moves from SAR so expensive they should never happen. We used to
1436 have 0xffff here, but that generates overflow in rare cases.
1438 Copies involving a FP register and a non-FP register are relatively
1439 expensive because they must go through memory.
1441 Other copies are reasonably cheap. */
1443 static int
1444 hppa_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
1445 reg_class_t from, reg_class_t to)
1447 if (from == SHIFT_REGS)
1448 return 0x100;
1449 else if (to == SHIFT_REGS && FP_REG_CLASS_P (from))
1450 return 18;
1451 else if ((FP_REG_CLASS_P (from) && ! FP_REG_CLASS_P (to))
1452 || (FP_REG_CLASS_P (to) && ! FP_REG_CLASS_P (from)))
1453 return 16;
1454 else
1455 return 2;
1458 /* For the HPPA, REG and REG+CONST is cost 0
1459 and addresses involving symbolic constants are cost 2.
1461 PIC addresses are very expensive.
1463 It is no coincidence that this has the same structure
1464 as pa_legitimate_address_p. */
1466 static int
1467 hppa_address_cost (rtx X, machine_mode mode ATTRIBUTE_UNUSED,
1468 addr_space_t as ATTRIBUTE_UNUSED,
1469 bool speed ATTRIBUTE_UNUSED)
1471 switch (GET_CODE (X))
1473 case REG:
1474 case PLUS:
1475 case LO_SUM:
1476 return 1;
1477 case HIGH:
1478 return 2;
1479 default:
1480 return 4;
1484 /* Compute a (partial) cost for rtx X. Return true if the complete
1485 cost has been computed, and false if subexpressions should be
1486 scanned. In either case, *TOTAL contains the cost result. */
1488 static bool
1489 hppa_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
1490 int *total, bool speed ATTRIBUTE_UNUSED)
1492 int factor;
1494 switch (code)
1496 case CONST_INT:
1497 if (INTVAL (x) == 0)
1498 *total = 0;
1499 else if (INT_14_BITS (x))
1500 *total = 1;
1501 else
1502 *total = 2;
1503 return true;
1505 case HIGH:
1506 *total = 2;
1507 return true;
1509 case CONST:
1510 case LABEL_REF:
1511 case SYMBOL_REF:
1512 *total = 4;
1513 return true;
1515 case CONST_DOUBLE:
1516 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1517 && outer_code != SET)
1518 *total = 0;
1519 else
1520 *total = 8;
1521 return true;
1523 case MULT:
1524 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1526 *total = COSTS_N_INSNS (3);
1527 return true;
1530 /* A mode size N times larger than SImode needs O(N*N) more insns. */
1531 factor = GET_MODE_SIZE (GET_MODE (x)) / 4;
1532 if (factor == 0)
1533 factor = 1;
1535 if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1536 *total = factor * factor * COSTS_N_INSNS (8);
1537 else
1538 *total = factor * factor * COSTS_N_INSNS (20);
1539 return true;
1541 case DIV:
1542 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1544 *total = COSTS_N_INSNS (14);
1545 return true;
1547 /* FALLTHRU */
1549 case UDIV:
1550 case MOD:
1551 case UMOD:
1552 /* A mode size N times larger than SImode needs O(N*N) more insns. */
1553 factor = GET_MODE_SIZE (GET_MODE (x)) / 4;
1554 if (factor == 0)
1555 factor = 1;
1557 *total = factor * factor * COSTS_N_INSNS (60);
1558 return true;
1560 case PLUS: /* this includes shNadd insns */
1561 case MINUS:
1562 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1564 *total = COSTS_N_INSNS (3);
1565 return true;
1568 /* A size N times larger than UNITS_PER_WORD needs N times as
1569 many insns, taking N times as long. */
1570 factor = GET_MODE_SIZE (GET_MODE (x)) / UNITS_PER_WORD;
1571 if (factor == 0)
1572 factor = 1;
1573 *total = factor * COSTS_N_INSNS (1);
1574 return true;
1576 case ASHIFT:
1577 case ASHIFTRT:
1578 case LSHIFTRT:
1579 *total = COSTS_N_INSNS (1);
1580 return true;
1582 default:
1583 return false;
1587 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1588 new rtx with the correct mode. */
1589 static inline rtx
1590 force_mode (machine_mode mode, rtx orig)
1592 if (mode == GET_MODE (orig))
1593 return orig;
1595 gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
1597 return gen_rtx_REG (mode, REGNO (orig));
1600 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
1602 static bool
1603 pa_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1605 return tls_referenced_p (x);
1608 /* Emit insns to move operands[1] into operands[0].
1610 Return 1 if we have written out everything that needs to be done to
1611 do the move. Otherwise, return 0 and the caller will emit the move
1612 normally.
1614 Note SCRATCH_REG may not be in the proper mode depending on how it
1615 will be used. This routine is responsible for creating a new copy
1616 of SCRATCH_REG in the proper mode. */
1619 pa_emit_move_sequence (rtx *operands, machine_mode mode, rtx scratch_reg)
1621 register rtx operand0 = operands[0];
1622 register rtx operand1 = operands[1];
1623 register rtx tem;
1625 /* We can only handle indexed addresses in the destination operand
1626 of floating point stores. Thus, we need to break out indexed
1627 addresses from the destination operand. */
1628 if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1630 gcc_assert (can_create_pseudo_p ());
1632 tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1633 operand0 = replace_equiv_address (operand0, tem);
1636 /* On targets with non-equivalent space registers, break out unscaled
1637 indexed addresses from the source operand before the final CSE.
1638 We have to do this because the REG_POINTER flag is not correctly
1639 carried through various optimization passes and CSE may substitute
1640 a pseudo without the pointer set for one with the pointer set. As
1641 a result, we loose various opportunities to create insns with
1642 unscaled indexed addresses. */
1643 if (!TARGET_NO_SPACE_REGS
1644 && !cse_not_expected
1645 && GET_CODE (operand1) == MEM
1646 && GET_CODE (XEXP (operand1, 0)) == PLUS
1647 && REG_P (XEXP (XEXP (operand1, 0), 0))
1648 && REG_P (XEXP (XEXP (operand1, 0), 1)))
1649 operand1
1650 = replace_equiv_address (operand1,
1651 copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1653 if (scratch_reg
1654 && reload_in_progress && GET_CODE (operand0) == REG
1655 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1656 operand0 = reg_equiv_mem (REGNO (operand0));
1657 else if (scratch_reg
1658 && reload_in_progress && GET_CODE (operand0) == SUBREG
1659 && GET_CODE (SUBREG_REG (operand0)) == REG
1660 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1662 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1663 the code which tracks sets/uses for delete_output_reload. */
1664 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1665 reg_equiv_mem (REGNO (SUBREG_REG (operand0))),
1666 SUBREG_BYTE (operand0));
1667 operand0 = alter_subreg (&temp, true);
1670 if (scratch_reg
1671 && reload_in_progress && GET_CODE (operand1) == REG
1672 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1673 operand1 = reg_equiv_mem (REGNO (operand1));
1674 else if (scratch_reg
1675 && reload_in_progress && GET_CODE (operand1) == SUBREG
1676 && GET_CODE (SUBREG_REG (operand1)) == REG
1677 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1679 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1680 the code which tracks sets/uses for delete_output_reload. */
1681 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1682 reg_equiv_mem (REGNO (SUBREG_REG (operand1))),
1683 SUBREG_BYTE (operand1));
1684 operand1 = alter_subreg (&temp, true);
1687 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1688 && ((tem = find_replacement (&XEXP (operand0, 0)))
1689 != XEXP (operand0, 0)))
1690 operand0 = replace_equiv_address (operand0, tem);
1692 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1693 && ((tem = find_replacement (&XEXP (operand1, 0)))
1694 != XEXP (operand1, 0)))
1695 operand1 = replace_equiv_address (operand1, tem);
1697 /* Handle secondary reloads for loads/stores of FP registers from
1698 REG+D addresses where D does not fit in 5 or 14 bits, including
1699 (subreg (mem (addr))) cases. */
1700 if (scratch_reg
1701 && fp_reg_operand (operand0, mode)
1702 && (MEM_P (operand1)
1703 || (GET_CODE (operand1) == SUBREG
1704 && MEM_P (XEXP (operand1, 0))))
1705 && !floating_point_store_memory_operand (operand1, mode))
1707 if (GET_CODE (operand1) == SUBREG)
1708 operand1 = XEXP (operand1, 0);
1710 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1711 it in WORD_MODE regardless of what mode it was originally given
1712 to us. */
1713 scratch_reg = force_mode (word_mode, scratch_reg);
1715 /* D might not fit in 14 bits either; for such cases load D into
1716 scratch reg. */
1717 if (reg_plus_base_memory_operand (operand1, mode)
1718 && !(TARGET_PA_20
1719 && !TARGET_ELF32
1720 && INT_14_BITS (XEXP (XEXP (operand1, 0), 1))))
1722 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1723 emit_move_insn (scratch_reg,
1724 gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
1725 Pmode,
1726 XEXP (XEXP (operand1, 0), 0),
1727 scratch_reg));
1729 else
1730 emit_move_insn (scratch_reg, XEXP (operand1, 0));
1731 emit_insn (gen_rtx_SET (operand0,
1732 replace_equiv_address (operand1, scratch_reg)));
1733 return 1;
1735 else if (scratch_reg
1736 && fp_reg_operand (operand1, mode)
1737 && (MEM_P (operand0)
1738 || (GET_CODE (operand0) == SUBREG
1739 && MEM_P (XEXP (operand0, 0))))
1740 && !floating_point_store_memory_operand (operand0, mode))
1742 if (GET_CODE (operand0) == SUBREG)
1743 operand0 = XEXP (operand0, 0);
1745 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1746 it in WORD_MODE regardless of what mode it was originally given
1747 to us. */
1748 scratch_reg = force_mode (word_mode, scratch_reg);
1750 /* D might not fit in 14 bits either; for such cases load D into
1751 scratch reg. */
1752 if (reg_plus_base_memory_operand (operand0, mode)
1753 && !(TARGET_PA_20
1754 && !TARGET_ELF32
1755 && INT_14_BITS (XEXP (XEXP (operand0, 0), 1))))
1757 emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
1758 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0,
1759 0)),
1760 Pmode,
1761 XEXP (XEXP (operand0, 0),
1763 scratch_reg));
1765 else
1766 emit_move_insn (scratch_reg, XEXP (operand0, 0));
1767 emit_insn (gen_rtx_SET (replace_equiv_address (operand0, scratch_reg),
1768 operand1));
1769 return 1;
1771 /* Handle secondary reloads for loads of FP registers from constant
1772 expressions by forcing the constant into memory. For the most part,
1773 this is only necessary for SImode and DImode.
1775 Use scratch_reg to hold the address of the memory location. */
1776 else if (scratch_reg
1777 && CONSTANT_P (operand1)
1778 && fp_reg_operand (operand0, mode))
1780 rtx const_mem, xoperands[2];
1782 if (operand1 == CONST0_RTX (mode))
1784 emit_insn (gen_rtx_SET (operand0, operand1));
1785 return 1;
1788 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1789 it in WORD_MODE regardless of what mode it was originally given
1790 to us. */
1791 scratch_reg = force_mode (word_mode, scratch_reg);
1793 /* Force the constant into memory and put the address of the
1794 memory location into scratch_reg. */
1795 const_mem = force_const_mem (mode, operand1);
1796 xoperands[0] = scratch_reg;
1797 xoperands[1] = XEXP (const_mem, 0);
1798 pa_emit_move_sequence (xoperands, Pmode, 0);
1800 /* Now load the destination register. */
1801 emit_insn (gen_rtx_SET (operand0,
1802 replace_equiv_address (const_mem, scratch_reg)));
1803 return 1;
1805 /* Handle secondary reloads for SAR. These occur when trying to load
1806 the SAR from memory or a constant. */
1807 else if (scratch_reg
1808 && GET_CODE (operand0) == REG
1809 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1810 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1811 && (GET_CODE (operand1) == MEM || GET_CODE (operand1) == CONST_INT))
1813 /* D might not fit in 14 bits either; for such cases load D into
1814 scratch reg. */
1815 if (GET_CODE (operand1) == MEM
1816 && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0)))
1818 /* We are reloading the address into the scratch register, so we
1819 want to make sure the scratch register is a full register. */
1820 scratch_reg = force_mode (word_mode, scratch_reg);
1822 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1823 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1824 0)),
1825 Pmode,
1826 XEXP (XEXP (operand1, 0),
1828 scratch_reg));
1830 /* Now we are going to load the scratch register from memory,
1831 we want to load it in the same width as the original MEM,
1832 which must be the same as the width of the ultimate destination,
1833 OPERAND0. */
1834 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1836 emit_move_insn (scratch_reg,
1837 replace_equiv_address (operand1, scratch_reg));
1839 else
1841 /* We want to load the scratch register using the same mode as
1842 the ultimate destination. */
1843 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1845 emit_move_insn (scratch_reg, operand1);
1848 /* And emit the insn to set the ultimate destination. We know that
1849 the scratch register has the same mode as the destination at this
1850 point. */
1851 emit_move_insn (operand0, scratch_reg);
1852 return 1;
1854 /* Handle the most common case: storing into a register. */
1855 else if (register_operand (operand0, mode))
1857 /* Legitimize TLS symbol references. This happens for references
1858 that aren't a legitimate constant. */
1859 if (PA_SYMBOL_REF_TLS_P (operand1))
1860 operand1 = legitimize_tls_address (operand1);
1862 if (register_operand (operand1, mode)
1863 || (GET_CODE (operand1) == CONST_INT
1864 && pa_cint_ok_for_move (INTVAL (operand1)))
1865 || (operand1 == CONST0_RTX (mode))
1866 || (GET_CODE (operand1) == HIGH
1867 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1868 /* Only `general_operands' can come here, so MEM is ok. */
1869 || GET_CODE (operand1) == MEM)
1871 /* Various sets are created during RTL generation which don't
1872 have the REG_POINTER flag correctly set. After the CSE pass,
1873 instruction recognition can fail if we don't consistently
1874 set this flag when performing register copies. This should
1875 also improve the opportunities for creating insns that use
1876 unscaled indexing. */
1877 if (REG_P (operand0) && REG_P (operand1))
1879 if (REG_POINTER (operand1)
1880 && !REG_POINTER (operand0)
1881 && !HARD_REGISTER_P (operand0))
1882 copy_reg_pointer (operand0, operand1);
1885 /* When MEMs are broken out, the REG_POINTER flag doesn't
1886 get set. In some cases, we can set the REG_POINTER flag
1887 from the declaration for the MEM. */
1888 if (REG_P (operand0)
1889 && GET_CODE (operand1) == MEM
1890 && !REG_POINTER (operand0))
1892 tree decl = MEM_EXPR (operand1);
1894 /* Set the register pointer flag and register alignment
1895 if the declaration for this memory reference is a
1896 pointer type. */
1897 if (decl)
1899 tree type;
1901 /* If this is a COMPONENT_REF, use the FIELD_DECL from
1902 tree operand 1. */
1903 if (TREE_CODE (decl) == COMPONENT_REF)
1904 decl = TREE_OPERAND (decl, 1);
1906 type = TREE_TYPE (decl);
1907 type = strip_array_types (type);
1909 if (POINTER_TYPE_P (type))
1911 int align;
1913 type = TREE_TYPE (type);
1914 /* Using TYPE_ALIGN_OK is rather conservative as
1915 only the ada frontend actually sets it. */
1916 align = (TYPE_ALIGN_OK (type) ? TYPE_ALIGN (type)
1917 : BITS_PER_UNIT);
1918 mark_reg_pointer (operand0, align);
1923 emit_insn (gen_rtx_SET (operand0, operand1));
1924 return 1;
1927 else if (GET_CODE (operand0) == MEM)
1929 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1930 && !(reload_in_progress || reload_completed))
1932 rtx temp = gen_reg_rtx (DFmode);
1934 emit_insn (gen_rtx_SET (temp, operand1));
1935 emit_insn (gen_rtx_SET (operand0, temp));
1936 return 1;
1938 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1940 /* Run this case quickly. */
1941 emit_insn (gen_rtx_SET (operand0, operand1));
1942 return 1;
1944 if (! (reload_in_progress || reload_completed))
1946 operands[0] = validize_mem (operand0);
1947 operands[1] = operand1 = force_reg (mode, operand1);
1951 /* Simplify the source if we need to.
1952 Note we do have to handle function labels here, even though we do
1953 not consider them legitimate constants. Loop optimizations can
1954 call the emit_move_xxx with one as a source. */
1955 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1956 || (GET_CODE (operand1) == HIGH
1957 && symbolic_operand (XEXP (operand1, 0), mode))
1958 || function_label_operand (operand1, VOIDmode)
1959 || tls_referenced_p (operand1))
1961 int ishighonly = 0;
1963 if (GET_CODE (operand1) == HIGH)
1965 ishighonly = 1;
1966 operand1 = XEXP (operand1, 0);
1968 if (symbolic_operand (operand1, mode))
1970 /* Argh. The assembler and linker can't handle arithmetic
1971 involving plabels.
1973 So we force the plabel into memory, load operand0 from
1974 the memory location, then add in the constant part. */
1975 if ((GET_CODE (operand1) == CONST
1976 && GET_CODE (XEXP (operand1, 0)) == PLUS
1977 && function_label_operand (XEXP (XEXP (operand1, 0), 0),
1978 VOIDmode))
1979 || function_label_operand (operand1, VOIDmode))
1981 rtx temp, const_part;
1983 /* Figure out what (if any) scratch register to use. */
1984 if (reload_in_progress || reload_completed)
1986 scratch_reg = scratch_reg ? scratch_reg : operand0;
1987 /* SCRATCH_REG will hold an address and maybe the actual
1988 data. We want it in WORD_MODE regardless of what mode it
1989 was originally given to us. */
1990 scratch_reg = force_mode (word_mode, scratch_reg);
1992 else if (flag_pic)
1993 scratch_reg = gen_reg_rtx (Pmode);
1995 if (GET_CODE (operand1) == CONST)
1997 /* Save away the constant part of the expression. */
1998 const_part = XEXP (XEXP (operand1, 0), 1);
1999 gcc_assert (GET_CODE (const_part) == CONST_INT);
2001 /* Force the function label into memory. */
2002 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
2004 else
2006 /* No constant part. */
2007 const_part = NULL_RTX;
2009 /* Force the function label into memory. */
2010 temp = force_const_mem (mode, operand1);
2014 /* Get the address of the memory location. PIC-ify it if
2015 necessary. */
2016 temp = XEXP (temp, 0);
2017 if (flag_pic)
2018 temp = legitimize_pic_address (temp, mode, scratch_reg);
2020 /* Put the address of the memory location into our destination
2021 register. */
2022 operands[1] = temp;
2023 pa_emit_move_sequence (operands, mode, scratch_reg);
2025 /* Now load from the memory location into our destination
2026 register. */
2027 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
2028 pa_emit_move_sequence (operands, mode, scratch_reg);
2030 /* And add back in the constant part. */
2031 if (const_part != NULL_RTX)
2032 expand_inc (operand0, const_part);
2034 return 1;
2037 if (flag_pic)
2039 rtx_insn *insn;
2040 rtx temp;
2042 if (reload_in_progress || reload_completed)
2044 temp = scratch_reg ? scratch_reg : operand0;
2045 /* TEMP will hold an address and maybe the actual
2046 data. We want it in WORD_MODE regardless of what mode it
2047 was originally given to us. */
2048 temp = force_mode (word_mode, temp);
2050 else
2051 temp = gen_reg_rtx (Pmode);
2053 /* Force (const (plus (symbol) (const_int))) to memory
2054 if the const_int will not fit in 14 bits. Although
2055 this requires a relocation, the instruction sequence
2056 needed to load the value is shorter. */
2057 if (GET_CODE (operand1) == CONST
2058 && GET_CODE (XEXP (operand1, 0)) == PLUS
2059 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
2060 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1)))
2062 rtx x, m = force_const_mem (mode, operand1);
2064 x = legitimize_pic_address (XEXP (m, 0), mode, temp);
2065 x = replace_equiv_address (m, x);
2066 insn = emit_move_insn (operand0, x);
2068 else
2070 operands[1] = legitimize_pic_address (operand1, mode, temp);
2071 if (REG_P (operand0) && REG_P (operands[1]))
2072 copy_reg_pointer (operand0, operands[1]);
2073 insn = emit_move_insn (operand0, operands[1]);
2076 /* Put a REG_EQUAL note on this insn. */
2077 set_unique_reg_note (insn, REG_EQUAL, operand1);
2079 /* On the HPPA, references to data space are supposed to use dp,
2080 register 27, but showing it in the RTL inhibits various cse
2081 and loop optimizations. */
2082 else
2084 rtx temp, set;
2086 if (reload_in_progress || reload_completed)
2088 temp = scratch_reg ? scratch_reg : operand0;
2089 /* TEMP will hold an address and maybe the actual
2090 data. We want it in WORD_MODE regardless of what mode it
2091 was originally given to us. */
2092 temp = force_mode (word_mode, temp);
2094 else
2095 temp = gen_reg_rtx (mode);
2097 /* Loading a SYMBOL_REF into a register makes that register
2098 safe to be used as the base in an indexed address.
2100 Don't mark hard registers though. That loses. */
2101 if (GET_CODE (operand0) == REG
2102 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
2103 mark_reg_pointer (operand0, BITS_PER_UNIT);
2104 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
2105 mark_reg_pointer (temp, BITS_PER_UNIT);
2107 if (ishighonly)
2108 set = gen_rtx_SET (operand0, temp);
2109 else
2110 set = gen_rtx_SET (operand0,
2111 gen_rtx_LO_SUM (mode, temp, operand1));
2113 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
2114 emit_insn (set);
2117 return 1;
2119 else if (tls_referenced_p (operand1))
2121 rtx tmp = operand1;
2122 rtx addend = NULL;
2124 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
2126 addend = XEXP (XEXP (tmp, 0), 1);
2127 tmp = XEXP (XEXP (tmp, 0), 0);
2130 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
2131 tmp = legitimize_tls_address (tmp);
2132 if (addend)
2134 tmp = gen_rtx_PLUS (mode, tmp, addend);
2135 tmp = force_operand (tmp, operands[0]);
2137 operands[1] = tmp;
2139 else if (GET_CODE (operand1) != CONST_INT
2140 || !pa_cint_ok_for_move (INTVAL (operand1)))
2142 rtx temp;
2143 rtx_insn *insn;
2144 rtx op1 = operand1;
2145 HOST_WIDE_INT value = 0;
2146 HOST_WIDE_INT insv = 0;
2147 int insert = 0;
2149 if (GET_CODE (operand1) == CONST_INT)
2150 value = INTVAL (operand1);
2152 if (TARGET_64BIT
2153 && GET_CODE (operand1) == CONST_INT
2154 && HOST_BITS_PER_WIDE_INT > 32
2155 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
2157 HOST_WIDE_INT nval;
2159 /* Extract the low order 32 bits of the value and sign extend.
2160 If the new value is the same as the original value, we can
2161 can use the original value as-is. If the new value is
2162 different, we use it and insert the most-significant 32-bits
2163 of the original value into the final result. */
2164 nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
2165 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
2166 if (value != nval)
2168 #if HOST_BITS_PER_WIDE_INT > 32
2169 insv = value >= 0 ? value >> 32 : ~(~value >> 32);
2170 #endif
2171 insert = 1;
2172 value = nval;
2173 operand1 = GEN_INT (nval);
2177 if (reload_in_progress || reload_completed)
2178 temp = scratch_reg ? scratch_reg : operand0;
2179 else
2180 temp = gen_reg_rtx (mode);
2182 /* We don't directly split DImode constants on 32-bit targets
2183 because PLUS uses an 11-bit immediate and the insn sequence
2184 generated is not as efficient as the one using HIGH/LO_SUM. */
2185 if (GET_CODE (operand1) == CONST_INT
2186 && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
2187 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
2188 && !insert)
2190 /* Directly break constant into high and low parts. This
2191 provides better optimization opportunities because various
2192 passes recognize constants split with PLUS but not LO_SUM.
2193 We use a 14-bit signed low part except when the addition
2194 of 0x4000 to the high part might change the sign of the
2195 high part. */
2196 HOST_WIDE_INT low = value & 0x3fff;
2197 HOST_WIDE_INT high = value & ~ 0x3fff;
2199 if (low >= 0x2000)
2201 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
2202 high += 0x2000;
2203 else
2204 high += 0x4000;
2207 low = value - high;
2209 emit_insn (gen_rtx_SET (temp, GEN_INT (high)));
2210 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
2212 else
2214 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
2215 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
2218 insn = emit_move_insn (operands[0], operands[1]);
2220 /* Now insert the most significant 32 bits of the value
2221 into the register. When we don't have a second register
2222 available, it could take up to nine instructions to load
2223 a 64-bit integer constant. Prior to reload, we force
2224 constants that would take more than three instructions
2225 to load to the constant pool. During and after reload,
2226 we have to handle all possible values. */
2227 if (insert)
2229 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2230 register and the value to be inserted is outside the
2231 range that can be loaded with three depdi instructions. */
2232 if (temp != operand0 && (insv >= 16384 || insv < -16384))
2234 operand1 = GEN_INT (insv);
2236 emit_insn (gen_rtx_SET (temp,
2237 gen_rtx_HIGH (mode, operand1)));
2238 emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
2239 if (mode == DImode)
2240 insn = emit_insn (gen_insvdi (operand0, GEN_INT (32),
2241 const0_rtx, temp));
2242 else
2243 insn = emit_insn (gen_insvsi (operand0, GEN_INT (32),
2244 const0_rtx, temp));
2246 else
2248 int len = 5, pos = 27;
2250 /* Insert the bits using the depdi instruction. */
2251 while (pos >= 0)
2253 HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2254 HOST_WIDE_INT sign = v5 < 0;
2256 /* Left extend the insertion. */
2257 insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2258 while (pos > 0 && (insv & 1) == sign)
2260 insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2261 len += 1;
2262 pos -= 1;
2265 if (mode == DImode)
2266 insn = emit_insn (gen_insvdi (operand0,
2267 GEN_INT (len),
2268 GEN_INT (pos),
2269 GEN_INT (v5)));
2270 else
2271 insn = emit_insn (gen_insvsi (operand0,
2272 GEN_INT (len),
2273 GEN_INT (pos),
2274 GEN_INT (v5)));
2276 len = pos > 0 && pos < 5 ? pos : 5;
2277 pos -= len;
2282 set_unique_reg_note (insn, REG_EQUAL, op1);
2284 return 1;
2287 /* Now have insn-emit do whatever it normally does. */
2288 return 0;
2291 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2292 it will need a link/runtime reloc). */
2295 pa_reloc_needed (tree exp)
2297 int reloc = 0;
2299 switch (TREE_CODE (exp))
2301 case ADDR_EXPR:
2302 return 1;
2304 case POINTER_PLUS_EXPR:
2305 case PLUS_EXPR:
2306 case MINUS_EXPR:
2307 reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2308 reloc |= pa_reloc_needed (TREE_OPERAND (exp, 1));
2309 break;
2311 CASE_CONVERT:
2312 case NON_LVALUE_EXPR:
2313 reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2314 break;
2316 case CONSTRUCTOR:
2318 tree value;
2319 unsigned HOST_WIDE_INT ix;
2321 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2322 if (value)
2323 reloc |= pa_reloc_needed (value);
2325 break;
2327 case ERROR_MARK:
2328 break;
2330 default:
2331 break;
2333 return reloc;
2337 /* Return the best assembler insn template
2338 for moving operands[1] into operands[0] as a fullword. */
2339 const char *
2340 pa_singlemove_string (rtx *operands)
2342 HOST_WIDE_INT intval;
2344 if (GET_CODE (operands[0]) == MEM)
2345 return "stw %r1,%0";
2346 if (GET_CODE (operands[1]) == MEM)
2347 return "ldw %1,%0";
2348 if (GET_CODE (operands[1]) == CONST_DOUBLE)
2350 long i;
2351 REAL_VALUE_TYPE d;
2353 gcc_assert (GET_MODE (operands[1]) == SFmode);
2355 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2356 bit pattern. */
2357 REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
2358 REAL_VALUE_TO_TARGET_SINGLE (d, i);
2360 operands[1] = GEN_INT (i);
2361 /* Fall through to CONST_INT case. */
2363 if (GET_CODE (operands[1]) == CONST_INT)
2365 intval = INTVAL (operands[1]);
2367 if (VAL_14_BITS_P (intval))
2368 return "ldi %1,%0";
2369 else if ((intval & 0x7ff) == 0)
2370 return "ldil L'%1,%0";
2371 else if (pa_zdepi_cint_p (intval))
2372 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2373 else
2374 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2376 return "copy %1,%0";
2380 /* Compute position (in OP[1]) and width (in OP[2])
2381 useful for copying IMM to a register using the zdepi
2382 instructions. Store the immediate value to insert in OP[0]. */
2383 static void
2384 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2386 int lsb, len;
2388 /* Find the least significant set bit in IMM. */
2389 for (lsb = 0; lsb < 32; lsb++)
2391 if ((imm & 1) != 0)
2392 break;
2393 imm >>= 1;
2396 /* Choose variants based on *sign* of the 5-bit field. */
2397 if ((imm & 0x10) == 0)
2398 len = (lsb <= 28) ? 4 : 32 - lsb;
2399 else
2401 /* Find the width of the bitstring in IMM. */
2402 for (len = 5; len < 32 - lsb; len++)
2404 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2405 break;
2408 /* Sign extend IMM as a 5-bit value. */
2409 imm = (imm & 0xf) - 0x10;
2412 op[0] = imm;
2413 op[1] = 31 - lsb;
2414 op[2] = len;
2417 /* Compute position (in OP[1]) and width (in OP[2])
2418 useful for copying IMM to a register using the depdi,z
2419 instructions. Store the immediate value to insert in OP[0]. */
2421 static void
2422 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2424 int lsb, len, maxlen;
2426 maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64);
2428 /* Find the least significant set bit in IMM. */
2429 for (lsb = 0; lsb < maxlen; lsb++)
2431 if ((imm & 1) != 0)
2432 break;
2433 imm >>= 1;
2436 /* Choose variants based on *sign* of the 5-bit field. */
2437 if ((imm & 0x10) == 0)
2438 len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb;
2439 else
2441 /* Find the width of the bitstring in IMM. */
2442 for (len = 5; len < maxlen - lsb; len++)
2444 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2445 break;
2448 /* Extend length if host is narrow and IMM is negative. */
2449 if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb)
2450 len += 32;
2452 /* Sign extend IMM as a 5-bit value. */
2453 imm = (imm & 0xf) - 0x10;
2456 op[0] = imm;
2457 op[1] = 63 - lsb;
2458 op[2] = len;
2461 /* Output assembler code to perform a doubleword move insn
2462 with operands OPERANDS. */
2464 const char *
2465 pa_output_move_double (rtx *operands)
2467 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2468 rtx latehalf[2];
2469 rtx addreg0 = 0, addreg1 = 0;
2471 /* First classify both operands. */
2473 if (REG_P (operands[0]))
2474 optype0 = REGOP;
2475 else if (offsettable_memref_p (operands[0]))
2476 optype0 = OFFSOP;
2477 else if (GET_CODE (operands[0]) == MEM)
2478 optype0 = MEMOP;
2479 else
2480 optype0 = RNDOP;
2482 if (REG_P (operands[1]))
2483 optype1 = REGOP;
2484 else if (CONSTANT_P (operands[1]))
2485 optype1 = CNSTOP;
2486 else if (offsettable_memref_p (operands[1]))
2487 optype1 = OFFSOP;
2488 else if (GET_CODE (operands[1]) == MEM)
2489 optype1 = MEMOP;
2490 else
2491 optype1 = RNDOP;
2493 /* Check for the cases that the operand constraints are not
2494 supposed to allow to happen. */
2495 gcc_assert (optype0 == REGOP || optype1 == REGOP);
2497 /* Handle copies between general and floating registers. */
2499 if (optype0 == REGOP && optype1 == REGOP
2500 && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2502 if (FP_REG_P (operands[0]))
2504 output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2505 output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2506 return "{fldds|fldd} -16(%%sp),%0";
2508 else
2510 output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2511 output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2512 return "{ldws|ldw} -12(%%sp),%R0";
2516 /* Handle auto decrementing and incrementing loads and stores
2517 specifically, since the structure of the function doesn't work
2518 for them without major modification. Do it better when we learn
2519 this port about the general inc/dec addressing of PA.
2520 (This was written by tege. Chide him if it doesn't work.) */
2522 if (optype0 == MEMOP)
2524 /* We have to output the address syntax ourselves, since print_operand
2525 doesn't deal with the addresses we want to use. Fix this later. */
2527 rtx addr = XEXP (operands[0], 0);
2528 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2530 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2532 operands[0] = XEXP (addr, 0);
2533 gcc_assert (GET_CODE (operands[1]) == REG
2534 && GET_CODE (operands[0]) == REG);
2536 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2538 /* No overlap between high target register and address
2539 register. (We do this in a non-obvious way to
2540 save a register file writeback) */
2541 if (GET_CODE (addr) == POST_INC)
2542 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2543 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2545 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2547 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2549 operands[0] = XEXP (addr, 0);
2550 gcc_assert (GET_CODE (operands[1]) == REG
2551 && GET_CODE (operands[0]) == REG);
2553 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2554 /* No overlap between high target register and address
2555 register. (We do this in a non-obvious way to save a
2556 register file writeback) */
2557 if (GET_CODE (addr) == PRE_INC)
2558 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2559 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2562 if (optype1 == MEMOP)
2564 /* We have to output the address syntax ourselves, since print_operand
2565 doesn't deal with the addresses we want to use. Fix this later. */
2567 rtx addr = XEXP (operands[1], 0);
2568 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2570 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2572 operands[1] = XEXP (addr, 0);
2573 gcc_assert (GET_CODE (operands[0]) == REG
2574 && GET_CODE (operands[1]) == REG);
2576 if (!reg_overlap_mentioned_p (high_reg, addr))
2578 /* No overlap between high target register and address
2579 register. (We do this in a non-obvious way to
2580 save a register file writeback) */
2581 if (GET_CODE (addr) == POST_INC)
2582 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2583 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2585 else
2587 /* This is an undefined situation. We should load into the
2588 address register *and* update that register. Probably
2589 we don't need to handle this at all. */
2590 if (GET_CODE (addr) == POST_INC)
2591 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2592 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2595 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2597 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2599 operands[1] = XEXP (addr, 0);
2600 gcc_assert (GET_CODE (operands[0]) == REG
2601 && GET_CODE (operands[1]) == REG);
2603 if (!reg_overlap_mentioned_p (high_reg, addr))
2605 /* No overlap between high target register and address
2606 register. (We do this in a non-obvious way to
2607 save a register file writeback) */
2608 if (GET_CODE (addr) == PRE_INC)
2609 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2610 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2612 else
2614 /* This is an undefined situation. We should load into the
2615 address register *and* update that register. Probably
2616 we don't need to handle this at all. */
2617 if (GET_CODE (addr) == PRE_INC)
2618 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2619 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2622 else if (GET_CODE (addr) == PLUS
2623 && GET_CODE (XEXP (addr, 0)) == MULT)
2625 rtx xoperands[4];
2627 /* Load address into left half of destination register. */
2628 xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2629 xoperands[1] = XEXP (addr, 1);
2630 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2631 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2632 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2633 xoperands);
2634 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2636 else if (GET_CODE (addr) == PLUS
2637 && REG_P (XEXP (addr, 0))
2638 && REG_P (XEXP (addr, 1)))
2640 rtx xoperands[3];
2642 /* Load address into left half of destination register. */
2643 xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2644 xoperands[1] = XEXP (addr, 0);
2645 xoperands[2] = XEXP (addr, 1);
2646 output_asm_insn ("{addl|add,l} %1,%2,%0",
2647 xoperands);
2648 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2652 /* If an operand is an unoffsettable memory ref, find a register
2653 we can increment temporarily to make it refer to the second word. */
2655 if (optype0 == MEMOP)
2656 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2658 if (optype1 == MEMOP)
2659 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2661 /* Ok, we can do one word at a time.
2662 Normally we do the low-numbered word first.
2664 In either case, set up in LATEHALF the operands to use
2665 for the high-numbered word and in some cases alter the
2666 operands in OPERANDS to be suitable for the low-numbered word. */
2668 if (optype0 == REGOP)
2669 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2670 else if (optype0 == OFFSOP)
2671 latehalf[0] = adjust_address_nv (operands[0], SImode, 4);
2672 else
2673 latehalf[0] = operands[0];
2675 if (optype1 == REGOP)
2676 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2677 else if (optype1 == OFFSOP)
2678 latehalf[1] = adjust_address_nv (operands[1], SImode, 4);
2679 else if (optype1 == CNSTOP)
2680 split_double (operands[1], &operands[1], &latehalf[1]);
2681 else
2682 latehalf[1] = operands[1];
2684 /* If the first move would clobber the source of the second one,
2685 do them in the other order.
2687 This can happen in two cases:
2689 mem -> register where the first half of the destination register
2690 is the same register used in the memory's address. Reload
2691 can create such insns.
2693 mem in this case will be either register indirect or register
2694 indirect plus a valid offset.
2696 register -> register move where REGNO(dst) == REGNO(src + 1)
2697 someone (Tim/Tege?) claimed this can happen for parameter loads.
2699 Handle mem -> register case first. */
2700 if (optype0 == REGOP
2701 && (optype1 == MEMOP || optype1 == OFFSOP)
2702 && refers_to_regno_p (REGNO (operands[0]), operands[1]))
2704 /* Do the late half first. */
2705 if (addreg1)
2706 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2707 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2709 /* Then clobber. */
2710 if (addreg1)
2711 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2712 return pa_singlemove_string (operands);
2715 /* Now handle register -> register case. */
2716 if (optype0 == REGOP && optype1 == REGOP
2717 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2719 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2720 return pa_singlemove_string (operands);
2723 /* Normal case: do the two words, low-numbered first. */
2725 output_asm_insn (pa_singlemove_string (operands), operands);
2727 /* Make any unoffsettable addresses point at high-numbered word. */
2728 if (addreg0)
2729 output_asm_insn ("ldo 4(%0),%0", &addreg0);
2730 if (addreg1)
2731 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2733 /* Do that word. */
2734 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2736 /* Undo the adds we just did. */
2737 if (addreg0)
2738 output_asm_insn ("ldo -4(%0),%0", &addreg0);
2739 if (addreg1)
2740 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2742 return "";
2745 const char *
2746 pa_output_fp_move_double (rtx *operands)
2748 if (FP_REG_P (operands[0]))
2750 if (FP_REG_P (operands[1])
2751 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2752 output_asm_insn ("fcpy,dbl %f1,%0", operands);
2753 else
2754 output_asm_insn ("fldd%F1 %1,%0", operands);
2756 else if (FP_REG_P (operands[1]))
2758 output_asm_insn ("fstd%F0 %1,%0", operands);
2760 else
2762 rtx xoperands[2];
2764 gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2766 /* This is a pain. You have to be prepared to deal with an
2767 arbitrary address here including pre/post increment/decrement.
2769 so avoid this in the MD. */
2770 gcc_assert (GET_CODE (operands[0]) == REG);
2772 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2773 xoperands[0] = operands[0];
2774 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2776 return "";
2779 /* Return a REG that occurs in ADDR with coefficient 1.
2780 ADDR can be effectively incremented by incrementing REG. */
2782 static rtx
2783 find_addr_reg (rtx addr)
2785 while (GET_CODE (addr) == PLUS)
2787 if (GET_CODE (XEXP (addr, 0)) == REG)
2788 addr = XEXP (addr, 0);
2789 else if (GET_CODE (XEXP (addr, 1)) == REG)
2790 addr = XEXP (addr, 1);
2791 else if (CONSTANT_P (XEXP (addr, 0)))
2792 addr = XEXP (addr, 1);
2793 else if (CONSTANT_P (XEXP (addr, 1)))
2794 addr = XEXP (addr, 0);
2795 else
2796 gcc_unreachable ();
2798 gcc_assert (GET_CODE (addr) == REG);
2799 return addr;
2802 /* Emit code to perform a block move.
2804 OPERANDS[0] is the destination pointer as a REG, clobbered.
2805 OPERANDS[1] is the source pointer as a REG, clobbered.
2806 OPERANDS[2] is a register for temporary storage.
2807 OPERANDS[3] is a register for temporary storage.
2808 OPERANDS[4] is the size as a CONST_INT
2809 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2810 OPERANDS[6] is another temporary register. */
2812 const char *
2813 pa_output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2815 int align = INTVAL (operands[5]);
2816 unsigned long n_bytes = INTVAL (operands[4]);
2818 /* We can't move more than a word at a time because the PA
2819 has no longer integer move insns. (Could use fp mem ops?) */
2820 if (align > (TARGET_64BIT ? 8 : 4))
2821 align = (TARGET_64BIT ? 8 : 4);
2823 /* Note that we know each loop below will execute at least twice
2824 (else we would have open-coded the copy). */
2825 switch (align)
2827 case 8:
2828 /* Pre-adjust the loop counter. */
2829 operands[4] = GEN_INT (n_bytes - 16);
2830 output_asm_insn ("ldi %4,%2", operands);
2832 /* Copying loop. */
2833 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2834 output_asm_insn ("ldd,ma 8(%1),%6", operands);
2835 output_asm_insn ("std,ma %3,8(%0)", operands);
2836 output_asm_insn ("addib,>= -16,%2,.-12", operands);
2837 output_asm_insn ("std,ma %6,8(%0)", operands);
2839 /* Handle the residual. There could be up to 7 bytes of
2840 residual to copy! */
2841 if (n_bytes % 16 != 0)
2843 operands[4] = GEN_INT (n_bytes % 8);
2844 if (n_bytes % 16 >= 8)
2845 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2846 if (n_bytes % 8 != 0)
2847 output_asm_insn ("ldd 0(%1),%6", operands);
2848 if (n_bytes % 16 >= 8)
2849 output_asm_insn ("std,ma %3,8(%0)", operands);
2850 if (n_bytes % 8 != 0)
2851 output_asm_insn ("stdby,e %6,%4(%0)", operands);
2853 return "";
2855 case 4:
2856 /* Pre-adjust the loop counter. */
2857 operands[4] = GEN_INT (n_bytes - 8);
2858 output_asm_insn ("ldi %4,%2", operands);
2860 /* Copying loop. */
2861 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2862 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2863 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2864 output_asm_insn ("addib,>= -8,%2,.-12", operands);
2865 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2867 /* Handle the residual. There could be up to 7 bytes of
2868 residual to copy! */
2869 if (n_bytes % 8 != 0)
2871 operands[4] = GEN_INT (n_bytes % 4);
2872 if (n_bytes % 8 >= 4)
2873 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2874 if (n_bytes % 4 != 0)
2875 output_asm_insn ("ldw 0(%1),%6", operands);
2876 if (n_bytes % 8 >= 4)
2877 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2878 if (n_bytes % 4 != 0)
2879 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2881 return "";
2883 case 2:
2884 /* Pre-adjust the loop counter. */
2885 operands[4] = GEN_INT (n_bytes - 4);
2886 output_asm_insn ("ldi %4,%2", operands);
2888 /* Copying loop. */
2889 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2890 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2891 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2892 output_asm_insn ("addib,>= -4,%2,.-12", operands);
2893 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2895 /* Handle the residual. */
2896 if (n_bytes % 4 != 0)
2898 if (n_bytes % 4 >= 2)
2899 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2900 if (n_bytes % 2 != 0)
2901 output_asm_insn ("ldb 0(%1),%6", operands);
2902 if (n_bytes % 4 >= 2)
2903 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2904 if (n_bytes % 2 != 0)
2905 output_asm_insn ("stb %6,0(%0)", operands);
2907 return "";
2909 case 1:
2910 /* Pre-adjust the loop counter. */
2911 operands[4] = GEN_INT (n_bytes - 2);
2912 output_asm_insn ("ldi %4,%2", operands);
2914 /* Copying loop. */
2915 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2916 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2917 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2918 output_asm_insn ("addib,>= -2,%2,.-12", operands);
2919 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2921 /* Handle the residual. */
2922 if (n_bytes % 2 != 0)
2924 output_asm_insn ("ldb 0(%1),%3", operands);
2925 output_asm_insn ("stb %3,0(%0)", operands);
2927 return "";
2929 default:
2930 gcc_unreachable ();
2934 /* Count the number of insns necessary to handle this block move.
2936 Basic structure is the same as emit_block_move, except that we
2937 count insns rather than emit them. */
2939 static int
2940 compute_movmem_length (rtx_insn *insn)
2942 rtx pat = PATTERN (insn);
2943 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2944 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2945 unsigned int n_insns = 0;
2947 /* We can't move more than four bytes at a time because the PA
2948 has no longer integer move insns. (Could use fp mem ops?) */
2949 if (align > (TARGET_64BIT ? 8 : 4))
2950 align = (TARGET_64BIT ? 8 : 4);
2952 /* The basic copying loop. */
2953 n_insns = 6;
2955 /* Residuals. */
2956 if (n_bytes % (2 * align) != 0)
2958 if ((n_bytes % (2 * align)) >= align)
2959 n_insns += 2;
2961 if ((n_bytes % align) != 0)
2962 n_insns += 2;
2965 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2966 return n_insns * 4;
2969 /* Emit code to perform a block clear.
2971 OPERANDS[0] is the destination pointer as a REG, clobbered.
2972 OPERANDS[1] is a register for temporary storage.
2973 OPERANDS[2] is the size as a CONST_INT
2974 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
2976 const char *
2977 pa_output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2979 int align = INTVAL (operands[3]);
2980 unsigned long n_bytes = INTVAL (operands[2]);
2982 /* We can't clear more than a word at a time because the PA
2983 has no longer integer move insns. */
2984 if (align > (TARGET_64BIT ? 8 : 4))
2985 align = (TARGET_64BIT ? 8 : 4);
2987 /* Note that we know each loop below will execute at least twice
2988 (else we would have open-coded the copy). */
2989 switch (align)
2991 case 8:
2992 /* Pre-adjust the loop counter. */
2993 operands[2] = GEN_INT (n_bytes - 16);
2994 output_asm_insn ("ldi %2,%1", operands);
2996 /* Loop. */
2997 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2998 output_asm_insn ("addib,>= -16,%1,.-4", operands);
2999 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3001 /* Handle the residual. There could be up to 7 bytes of
3002 residual to copy! */
3003 if (n_bytes % 16 != 0)
3005 operands[2] = GEN_INT (n_bytes % 8);
3006 if (n_bytes % 16 >= 8)
3007 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3008 if (n_bytes % 8 != 0)
3009 output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
3011 return "";
3013 case 4:
3014 /* Pre-adjust the loop counter. */
3015 operands[2] = GEN_INT (n_bytes - 8);
3016 output_asm_insn ("ldi %2,%1", operands);
3018 /* Loop. */
3019 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3020 output_asm_insn ("addib,>= -8,%1,.-4", operands);
3021 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3023 /* Handle the residual. There could be up to 7 bytes of
3024 residual to copy! */
3025 if (n_bytes % 8 != 0)
3027 operands[2] = GEN_INT (n_bytes % 4);
3028 if (n_bytes % 8 >= 4)
3029 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3030 if (n_bytes % 4 != 0)
3031 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
3033 return "";
3035 case 2:
3036 /* Pre-adjust the loop counter. */
3037 operands[2] = GEN_INT (n_bytes - 4);
3038 output_asm_insn ("ldi %2,%1", operands);
3040 /* Loop. */
3041 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3042 output_asm_insn ("addib,>= -4,%1,.-4", operands);
3043 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3045 /* Handle the residual. */
3046 if (n_bytes % 4 != 0)
3048 if (n_bytes % 4 >= 2)
3049 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3050 if (n_bytes % 2 != 0)
3051 output_asm_insn ("stb %%r0,0(%0)", operands);
3053 return "";
3055 case 1:
3056 /* Pre-adjust the loop counter. */
3057 operands[2] = GEN_INT (n_bytes - 2);
3058 output_asm_insn ("ldi %2,%1", operands);
3060 /* Loop. */
3061 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3062 output_asm_insn ("addib,>= -2,%1,.-4", operands);
3063 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3065 /* Handle the residual. */
3066 if (n_bytes % 2 != 0)
3067 output_asm_insn ("stb %%r0,0(%0)", operands);
3069 return "";
3071 default:
3072 gcc_unreachable ();
3076 /* Count the number of insns necessary to handle this block move.
3078 Basic structure is the same as emit_block_move, except that we
3079 count insns rather than emit them. */
3081 static int
3082 compute_clrmem_length (rtx_insn *insn)
3084 rtx pat = PATTERN (insn);
3085 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
3086 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
3087 unsigned int n_insns = 0;
3089 /* We can't clear more than a word at a time because the PA
3090 has no longer integer move insns. */
3091 if (align > (TARGET_64BIT ? 8 : 4))
3092 align = (TARGET_64BIT ? 8 : 4);
3094 /* The basic loop. */
3095 n_insns = 4;
3097 /* Residuals. */
3098 if (n_bytes % (2 * align) != 0)
3100 if ((n_bytes % (2 * align)) >= align)
3101 n_insns++;
3103 if ((n_bytes % align) != 0)
3104 n_insns++;
3107 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
3108 return n_insns * 4;
3112 const char *
3113 pa_output_and (rtx *operands)
3115 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3117 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3118 int ls0, ls1, ms0, p, len;
3120 for (ls0 = 0; ls0 < 32; ls0++)
3121 if ((mask & (1 << ls0)) == 0)
3122 break;
3124 for (ls1 = ls0; ls1 < 32; ls1++)
3125 if ((mask & (1 << ls1)) != 0)
3126 break;
3128 for (ms0 = ls1; ms0 < 32; ms0++)
3129 if ((mask & (1 << ms0)) == 0)
3130 break;
3132 gcc_assert (ms0 == 32);
3134 if (ls1 == 32)
3136 len = ls0;
3138 gcc_assert (len);
3140 operands[2] = GEN_INT (len);
3141 return "{extru|extrw,u} %1,31,%2,%0";
3143 else
3145 /* We could use this `depi' for the case above as well, but `depi'
3146 requires one more register file access than an `extru'. */
3148 p = 31 - ls0;
3149 len = ls1 - ls0;
3151 operands[2] = GEN_INT (p);
3152 operands[3] = GEN_INT (len);
3153 return "{depi|depwi} 0,%2,%3,%0";
3156 else
3157 return "and %1,%2,%0";
3160 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3161 storing the result in operands[0]. */
3162 const char *
3163 pa_output_64bit_and (rtx *operands)
3165 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3167 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3168 int ls0, ls1, ms0, p, len;
3170 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
3171 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
3172 break;
3174 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
3175 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
3176 break;
3178 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
3179 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
3180 break;
3182 gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
3184 if (ls1 == HOST_BITS_PER_WIDE_INT)
3186 len = ls0;
3188 gcc_assert (len);
3190 operands[2] = GEN_INT (len);
3191 return "extrd,u %1,63,%2,%0";
3193 else
3195 /* We could use this `depi' for the case above as well, but `depi'
3196 requires one more register file access than an `extru'. */
3198 p = 63 - ls0;
3199 len = ls1 - ls0;
3201 operands[2] = GEN_INT (p);
3202 operands[3] = GEN_INT (len);
3203 return "depdi 0,%2,%3,%0";
3206 else
3207 return "and %1,%2,%0";
3210 const char *
3211 pa_output_ior (rtx *operands)
3213 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3214 int bs0, bs1, p, len;
3216 if (INTVAL (operands[2]) == 0)
3217 return "copy %1,%0";
3219 for (bs0 = 0; bs0 < 32; bs0++)
3220 if ((mask & (1 << bs0)) != 0)
3221 break;
3223 for (bs1 = bs0; bs1 < 32; bs1++)
3224 if ((mask & (1 << bs1)) == 0)
3225 break;
3227 gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3229 p = 31 - bs0;
3230 len = bs1 - bs0;
3232 operands[2] = GEN_INT (p);
3233 operands[3] = GEN_INT (len);
3234 return "{depi|depwi} -1,%2,%3,%0";
3237 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3238 storing the result in operands[0]. */
3239 const char *
3240 pa_output_64bit_ior (rtx *operands)
3242 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3243 int bs0, bs1, p, len;
3245 if (INTVAL (operands[2]) == 0)
3246 return "copy %1,%0";
3248 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
3249 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
3250 break;
3252 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
3253 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
3254 break;
3256 gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
3257 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3259 p = 63 - bs0;
3260 len = bs1 - bs0;
3262 operands[2] = GEN_INT (p);
3263 operands[3] = GEN_INT (len);
3264 return "depdi -1,%2,%3,%0";
3267 /* Target hook for assembling integer objects. This code handles
3268 aligned SI and DI integers specially since function references
3269 must be preceded by P%. */
3271 static bool
3272 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
3274 if (size == UNITS_PER_WORD
3275 && aligned_p
3276 && function_label_operand (x, VOIDmode))
3278 fputs (size == 8? "\t.dword\t" : "\t.word\t", asm_out_file);
3280 /* We don't want an OPD when generating fast indirect calls. */
3281 if (!TARGET_FAST_INDIRECT_CALLS)
3282 fputs ("P%", asm_out_file);
3284 output_addr_const (asm_out_file, x);
3285 fputc ('\n', asm_out_file);
3286 return true;
3288 return default_assemble_integer (x, size, aligned_p);
3291 /* Output an ascii string. */
3292 void
3293 pa_output_ascii (FILE *file, const char *p, int size)
3295 int i;
3296 int chars_output;
3297 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
3299 /* The HP assembler can only take strings of 256 characters at one
3300 time. This is a limitation on input line length, *not* the
3301 length of the string. Sigh. Even worse, it seems that the
3302 restriction is in number of input characters (see \xnn &
3303 \whatever). So we have to do this very carefully. */
3305 fputs ("\t.STRING \"", file);
3307 chars_output = 0;
3308 for (i = 0; i < size; i += 4)
3310 int co = 0;
3311 int io = 0;
3312 for (io = 0, co = 0; io < MIN (4, size - i); io++)
3314 register unsigned int c = (unsigned char) p[i + io];
3316 if (c == '\"' || c == '\\')
3317 partial_output[co++] = '\\';
3318 if (c >= ' ' && c < 0177)
3319 partial_output[co++] = c;
3320 else
3322 unsigned int hexd;
3323 partial_output[co++] = '\\';
3324 partial_output[co++] = 'x';
3325 hexd = c / 16 - 0 + '0';
3326 if (hexd > '9')
3327 hexd -= '9' - 'a' + 1;
3328 partial_output[co++] = hexd;
3329 hexd = c % 16 - 0 + '0';
3330 if (hexd > '9')
3331 hexd -= '9' - 'a' + 1;
3332 partial_output[co++] = hexd;
3335 if (chars_output + co > 243)
3337 fputs ("\"\n\t.STRING \"", file);
3338 chars_output = 0;
3340 fwrite (partial_output, 1, (size_t) co, file);
3341 chars_output += co;
3342 co = 0;
3344 fputs ("\"\n", file);
3347 /* Try to rewrite floating point comparisons & branches to avoid
3348 useless add,tr insns.
3350 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3351 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3352 first attempt to remove useless add,tr insns. It is zero
3353 for the second pass as reorg sometimes leaves bogus REG_DEAD
3354 notes lying around.
3356 When CHECK_NOTES is zero we can only eliminate add,tr insns
3357 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3358 instructions. */
3359 static void
3360 remove_useless_addtr_insns (int check_notes)
3362 rtx_insn *insn;
3363 static int pass = 0;
3365 /* This is fairly cheap, so always run it when optimizing. */
3366 if (optimize > 0)
3368 int fcmp_count = 0;
3369 int fbranch_count = 0;
3371 /* Walk all the insns in this function looking for fcmp & fbranch
3372 instructions. Keep track of how many of each we find. */
3373 for (insn = get_insns (); insn; insn = next_insn (insn))
3375 rtx tmp;
3377 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
3378 if (! NONJUMP_INSN_P (insn) && ! JUMP_P (insn))
3379 continue;
3381 tmp = PATTERN (insn);
3383 /* It must be a set. */
3384 if (GET_CODE (tmp) != SET)
3385 continue;
3387 /* If the destination is CCFP, then we've found an fcmp insn. */
3388 tmp = SET_DEST (tmp);
3389 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3391 fcmp_count++;
3392 continue;
3395 tmp = PATTERN (insn);
3396 /* If this is an fbranch instruction, bump the fbranch counter. */
3397 if (GET_CODE (tmp) == SET
3398 && SET_DEST (tmp) == pc_rtx
3399 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3400 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3401 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3402 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3404 fbranch_count++;
3405 continue;
3410 /* Find all floating point compare + branch insns. If possible,
3411 reverse the comparison & the branch to avoid add,tr insns. */
3412 for (insn = get_insns (); insn; insn = next_insn (insn))
3414 rtx tmp;
3415 rtx_insn *next;
3417 /* Ignore anything that isn't an INSN. */
3418 if (! NONJUMP_INSN_P (insn))
3419 continue;
3421 tmp = PATTERN (insn);
3423 /* It must be a set. */
3424 if (GET_CODE (tmp) != SET)
3425 continue;
3427 /* The destination must be CCFP, which is register zero. */
3428 tmp = SET_DEST (tmp);
3429 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3430 continue;
3432 /* INSN should be a set of CCFP.
3434 See if the result of this insn is used in a reversed FP
3435 conditional branch. If so, reverse our condition and
3436 the branch. Doing so avoids useless add,tr insns. */
3437 next = next_insn (insn);
3438 while (next)
3440 /* Jumps, calls and labels stop our search. */
3441 if (JUMP_P (next) || CALL_P (next) || LABEL_P (next))
3442 break;
3444 /* As does another fcmp insn. */
3445 if (NONJUMP_INSN_P (next)
3446 && GET_CODE (PATTERN (next)) == SET
3447 && GET_CODE (SET_DEST (PATTERN (next))) == REG
3448 && REGNO (SET_DEST (PATTERN (next))) == 0)
3449 break;
3451 next = next_insn (next);
3454 /* Is NEXT_INSN a branch? */
3455 if (next && JUMP_P (next))
3457 rtx pattern = PATTERN (next);
3459 /* If it a reversed fp conditional branch (e.g. uses add,tr)
3460 and CCFP dies, then reverse our conditional and the branch
3461 to avoid the add,tr. */
3462 if (GET_CODE (pattern) == SET
3463 && SET_DEST (pattern) == pc_rtx
3464 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3465 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3466 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3467 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3468 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3469 && (fcmp_count == fbranch_count
3470 || (check_notes
3471 && find_regno_note (next, REG_DEAD, 0))))
3473 /* Reverse the branch. */
3474 tmp = XEXP (SET_SRC (pattern), 1);
3475 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3476 XEXP (SET_SRC (pattern), 2) = tmp;
3477 INSN_CODE (next) = -1;
3479 /* Reverse our condition. */
3480 tmp = PATTERN (insn);
3481 PUT_CODE (XEXP (tmp, 1),
3482 (reverse_condition_maybe_unordered
3483 (GET_CODE (XEXP (tmp, 1)))));
3489 pass = !pass;
3493 /* You may have trouble believing this, but this is the 32 bit HP-PA
3494 stack layout. Wow.
3496 Offset Contents
3498 Variable arguments (optional; any number may be allocated)
3500 SP-(4*(N+9)) arg word N
3502 SP-56 arg word 5
3503 SP-52 arg word 4
3505 Fixed arguments (must be allocated; may remain unused)
3507 SP-48 arg word 3
3508 SP-44 arg word 2
3509 SP-40 arg word 1
3510 SP-36 arg word 0
3512 Frame Marker
3514 SP-32 External Data Pointer (DP)
3515 SP-28 External sr4
3516 SP-24 External/stub RP (RP')
3517 SP-20 Current RP
3518 SP-16 Static Link
3519 SP-12 Clean up
3520 SP-8 Calling Stub RP (RP'')
3521 SP-4 Previous SP
3523 Top of Frame
3525 SP-0 Stack Pointer (points to next available address)
3529 /* This function saves registers as follows. Registers marked with ' are
3530 this function's registers (as opposed to the previous function's).
3531 If a frame_pointer isn't needed, r4 is saved as a general register;
3532 the space for the frame pointer is still allocated, though, to keep
3533 things simple.
3536 Top of Frame
3538 SP (FP') Previous FP
3539 SP + 4 Alignment filler (sigh)
3540 SP + 8 Space for locals reserved here.
3544 SP + n All call saved register used.
3548 SP + o All call saved fp registers used.
3552 SP + p (SP') points to next available address.
3556 /* Global variables set by output_function_prologue(). */
3557 /* Size of frame. Need to know this to emit return insns from
3558 leaf procedures. */
3559 static HOST_WIDE_INT actual_fsize, local_fsize;
3560 static int save_fregs;
3562 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3563 Handle case where DISP > 8k by using the add_high_const patterns.
3565 Note in DISP > 8k case, we will leave the high part of the address
3566 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3568 static void
3569 store_reg (int reg, HOST_WIDE_INT disp, int base)
3571 rtx dest, src, basereg;
3572 rtx_insn *insn;
3574 src = gen_rtx_REG (word_mode, reg);
3575 basereg = gen_rtx_REG (Pmode, base);
3576 if (VAL_14_BITS_P (disp))
3578 dest = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
3579 insn = emit_move_insn (dest, src);
3581 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3583 rtx delta = GEN_INT (disp);
3584 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3586 emit_move_insn (tmpreg, delta);
3587 insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3588 if (DO_FRAME_NOTES)
3590 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3591 gen_rtx_SET (tmpreg,
3592 gen_rtx_PLUS (Pmode, basereg, delta)));
3593 RTX_FRAME_RELATED_P (insn) = 1;
3595 dest = gen_rtx_MEM (word_mode, tmpreg);
3596 insn = emit_move_insn (dest, src);
3598 else
3600 rtx delta = GEN_INT (disp);
3601 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3602 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3604 emit_move_insn (tmpreg, high);
3605 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3606 insn = emit_move_insn (dest, src);
3607 if (DO_FRAME_NOTES)
3608 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3609 gen_rtx_SET (gen_rtx_MEM (word_mode,
3610 gen_rtx_PLUS (word_mode,
3611 basereg,
3612 delta)),
3613 src));
3616 if (DO_FRAME_NOTES)
3617 RTX_FRAME_RELATED_P (insn) = 1;
3620 /* Emit RTL to store REG at the memory location specified by BASE and then
3621 add MOD to BASE. MOD must be <= 8k. */
3623 static void
3624 store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3626 rtx basereg, srcreg, delta;
3627 rtx_insn *insn;
3629 gcc_assert (VAL_14_BITS_P (mod));
3631 basereg = gen_rtx_REG (Pmode, base);
3632 srcreg = gen_rtx_REG (word_mode, reg);
3633 delta = GEN_INT (mod);
3635 insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3636 if (DO_FRAME_NOTES)
3638 RTX_FRAME_RELATED_P (insn) = 1;
3640 /* RTX_FRAME_RELATED_P must be set on each frame related set
3641 in a parallel with more than one element. */
3642 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3643 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3647 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3648 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3649 whether to add a frame note or not.
3651 In the DISP > 8k case, we leave the high part of the address in %r1.
3652 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3654 static void
3655 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3657 rtx_insn *insn;
3659 if (VAL_14_BITS_P (disp))
3661 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3662 plus_constant (Pmode,
3663 gen_rtx_REG (Pmode, base), disp));
3665 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3667 rtx basereg = gen_rtx_REG (Pmode, base);
3668 rtx delta = GEN_INT (disp);
3669 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3671 emit_move_insn (tmpreg, delta);
3672 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3673 gen_rtx_PLUS (Pmode, tmpreg, basereg));
3674 if (DO_FRAME_NOTES)
3675 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3676 gen_rtx_SET (tmpreg,
3677 gen_rtx_PLUS (Pmode, basereg, delta)));
3679 else
3681 rtx basereg = gen_rtx_REG (Pmode, base);
3682 rtx delta = GEN_INT (disp);
3683 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3685 emit_move_insn (tmpreg,
3686 gen_rtx_PLUS (Pmode, basereg,
3687 gen_rtx_HIGH (Pmode, delta)));
3688 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3689 gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3692 if (DO_FRAME_NOTES && note)
3693 RTX_FRAME_RELATED_P (insn) = 1;
3696 HOST_WIDE_INT
3697 pa_compute_frame_size (HOST_WIDE_INT size, int *fregs_live)
3699 int freg_saved = 0;
3700 int i, j;
3702 /* The code in pa_expand_prologue and pa_expand_epilogue must
3703 be consistent with the rounding and size calculation done here.
3704 Change them at the same time. */
3706 /* We do our own stack alignment. First, round the size of the
3707 stack locals up to a word boundary. */
3708 size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3710 /* Space for previous frame pointer + filler. If any frame is
3711 allocated, we need to add in the STARTING_FRAME_OFFSET. We
3712 waste some space here for the sake of HP compatibility. The
3713 first slot is only used when the frame pointer is needed. */
3714 if (size || frame_pointer_needed)
3715 size += STARTING_FRAME_OFFSET;
3717 /* If the current function calls __builtin_eh_return, then we need
3718 to allocate stack space for registers that will hold data for
3719 the exception handler. */
3720 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3722 unsigned int i;
3724 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3725 continue;
3726 size += i * UNITS_PER_WORD;
3729 /* Account for space used by the callee general register saves. */
3730 for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3731 if (df_regs_ever_live_p (i))
3732 size += UNITS_PER_WORD;
3734 /* Account for space used by the callee floating point register saves. */
3735 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3736 if (df_regs_ever_live_p (i)
3737 || (!TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3739 freg_saved = 1;
3741 /* We always save both halves of the FP register, so always
3742 increment the frame size by 8 bytes. */
3743 size += 8;
3746 /* If any of the floating registers are saved, account for the
3747 alignment needed for the floating point register save block. */
3748 if (freg_saved)
3750 size = (size + 7) & ~7;
3751 if (fregs_live)
3752 *fregs_live = 1;
3755 /* The various ABIs include space for the outgoing parameters in the
3756 size of the current function's stack frame. We don't need to align
3757 for the outgoing arguments as their alignment is set by the final
3758 rounding for the frame as a whole. */
3759 size += crtl->outgoing_args_size;
3761 /* Allocate space for the fixed frame marker. This space must be
3762 allocated for any function that makes calls or allocates
3763 stack space. */
3764 if (!crtl->is_leaf || size)
3765 size += TARGET_64BIT ? 48 : 32;
3767 /* Finally, round to the preferred stack boundary. */
3768 return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3769 & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3772 /* Generate the assembly code for function entry. FILE is a stdio
3773 stream to output the code to. SIZE is an int: how many units of
3774 temporary storage to allocate.
3776 Refer to the array `regs_ever_live' to determine which registers to
3777 save; `regs_ever_live[I]' is nonzero if register number I is ever
3778 used in the function. This function is responsible for knowing
3779 which registers should not be saved even if used. */
3781 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3782 of memory. If any fpu reg is used in the function, we allocate
3783 such a block here, at the bottom of the frame, just in case it's needed.
3785 If this function is a leaf procedure, then we may choose not
3786 to do a "save" insn. The decision about whether or not
3787 to do this is made in regclass.c. */
3789 static void
3790 pa_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3792 /* The function's label and associated .PROC must never be
3793 separated and must be output *after* any profiling declarations
3794 to avoid changing spaces/subspaces within a procedure. */
3795 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3796 fputs ("\t.PROC\n", file);
3798 /* pa_expand_prologue does the dirty work now. We just need
3799 to output the assembler directives which denote the start
3800 of a function. */
3801 fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
3802 if (crtl->is_leaf)
3803 fputs (",NO_CALLS", file);
3804 else
3805 fputs (",CALLS", file);
3806 if (rp_saved)
3807 fputs (",SAVE_RP", file);
3809 /* The SAVE_SP flag is used to indicate that register %r3 is stored
3810 at the beginning of the frame and that it is used as the frame
3811 pointer for the frame. We do this because our current frame
3812 layout doesn't conform to that specified in the HP runtime
3813 documentation and we need a way to indicate to programs such as
3814 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
3815 isn't used by HP compilers but is supported by the assembler.
3816 However, SAVE_SP is supposed to indicate that the previous stack
3817 pointer has been saved in the frame marker. */
3818 if (frame_pointer_needed)
3819 fputs (",SAVE_SP", file);
3821 /* Pass on information about the number of callee register saves
3822 performed in the prologue.
3824 The compiler is supposed to pass the highest register number
3825 saved, the assembler then has to adjust that number before
3826 entering it into the unwind descriptor (to account for any
3827 caller saved registers with lower register numbers than the
3828 first callee saved register). */
3829 if (gr_saved)
3830 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3832 if (fr_saved)
3833 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3835 fputs ("\n\t.ENTRY\n", file);
3837 remove_useless_addtr_insns (0);
3840 void
3841 pa_expand_prologue (void)
3843 int merge_sp_adjust_with_store = 0;
3844 HOST_WIDE_INT size = get_frame_size ();
3845 HOST_WIDE_INT offset;
3846 int i;
3847 rtx tmpreg;
3848 rtx_insn *insn;
3850 gr_saved = 0;
3851 fr_saved = 0;
3852 save_fregs = 0;
3854 /* Compute total size for frame pointer, filler, locals and rounding to
3855 the next word boundary. Similar code appears in pa_compute_frame_size
3856 and must be changed in tandem with this code. */
3857 local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3858 if (local_fsize || frame_pointer_needed)
3859 local_fsize += STARTING_FRAME_OFFSET;
3861 actual_fsize = pa_compute_frame_size (size, &save_fregs);
3862 if (flag_stack_usage_info)
3863 current_function_static_stack_size = actual_fsize;
3865 /* Compute a few things we will use often. */
3866 tmpreg = gen_rtx_REG (word_mode, 1);
3868 /* Save RP first. The calling conventions manual states RP will
3869 always be stored into the caller's frame at sp - 20 or sp - 16
3870 depending on which ABI is in use. */
3871 if (df_regs_ever_live_p (2) || crtl->calls_eh_return)
3873 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3874 rp_saved = true;
3876 else
3877 rp_saved = false;
3879 /* Allocate the local frame and set up the frame pointer if needed. */
3880 if (actual_fsize != 0)
3882 if (frame_pointer_needed)
3884 /* Copy the old frame pointer temporarily into %r1. Set up the
3885 new stack pointer, then store away the saved old frame pointer
3886 into the stack at sp and at the same time update the stack
3887 pointer by actual_fsize bytes. Two versions, first
3888 handles small (<8k) frames. The second handles large (>=8k)
3889 frames. */
3890 insn = emit_move_insn (tmpreg, hard_frame_pointer_rtx);
3891 if (DO_FRAME_NOTES)
3892 RTX_FRAME_RELATED_P (insn) = 1;
3894 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3895 if (DO_FRAME_NOTES)
3896 RTX_FRAME_RELATED_P (insn) = 1;
3898 if (VAL_14_BITS_P (actual_fsize))
3899 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
3900 else
3902 /* It is incorrect to store the saved frame pointer at *sp,
3903 then increment sp (writes beyond the current stack boundary).
3905 So instead use stwm to store at *sp and post-increment the
3906 stack pointer as an atomic operation. Then increment sp to
3907 finish allocating the new frame. */
3908 HOST_WIDE_INT adjust1 = 8192 - 64;
3909 HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
3911 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
3912 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3913 adjust2, 1);
3916 /* We set SAVE_SP in frames that need a frame pointer. Thus,
3917 we need to store the previous stack pointer (frame pointer)
3918 into the frame marker on targets that use the HP unwind
3919 library. This allows the HP unwind library to be used to
3920 unwind GCC frames. However, we are not fully compatible
3921 with the HP library because our frame layout differs from
3922 that specified in the HP runtime specification.
3924 We don't want a frame note on this instruction as the frame
3925 marker moves during dynamic stack allocation.
3927 This instruction also serves as a blockage to prevent
3928 register spills from being scheduled before the stack
3929 pointer is raised. This is necessary as we store
3930 registers using the frame pointer as a base register,
3931 and the frame pointer is set before sp is raised. */
3932 if (TARGET_HPUX_UNWIND_LIBRARY)
3934 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
3935 GEN_INT (TARGET_64BIT ? -8 : -4));
3937 emit_move_insn (gen_rtx_MEM (word_mode, addr),
3938 hard_frame_pointer_rtx);
3940 else
3941 emit_insn (gen_blockage ());
3943 /* no frame pointer needed. */
3944 else
3946 /* In some cases we can perform the first callee register save
3947 and allocating the stack frame at the same time. If so, just
3948 make a note of it and defer allocating the frame until saving
3949 the callee registers. */
3950 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
3951 merge_sp_adjust_with_store = 1;
3952 /* Can not optimize. Adjust the stack frame by actual_fsize
3953 bytes. */
3954 else
3955 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3956 actual_fsize, 1);
3960 /* Normal register save.
3962 Do not save the frame pointer in the frame_pointer_needed case. It
3963 was done earlier. */
3964 if (frame_pointer_needed)
3966 offset = local_fsize;
3968 /* Saving the EH return data registers in the frame is the simplest
3969 way to get the frame unwind information emitted. We put them
3970 just before the general registers. */
3971 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3973 unsigned int i, regno;
3975 for (i = 0; ; ++i)
3977 regno = EH_RETURN_DATA_REGNO (i);
3978 if (regno == INVALID_REGNUM)
3979 break;
3981 store_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
3982 offset += UNITS_PER_WORD;
3986 for (i = 18; i >= 4; i--)
3987 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
3989 store_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
3990 offset += UNITS_PER_WORD;
3991 gr_saved++;
3993 /* Account for %r3 which is saved in a special place. */
3994 gr_saved++;
3996 /* No frame pointer needed. */
3997 else
3999 offset = local_fsize - actual_fsize;
4001 /* Saving the EH return data registers in the frame is the simplest
4002 way to get the frame unwind information emitted. */
4003 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4005 unsigned int i, regno;
4007 for (i = 0; ; ++i)
4009 regno = EH_RETURN_DATA_REGNO (i);
4010 if (regno == INVALID_REGNUM)
4011 break;
4013 /* If merge_sp_adjust_with_store is nonzero, then we can
4014 optimize the first save. */
4015 if (merge_sp_adjust_with_store)
4017 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
4018 merge_sp_adjust_with_store = 0;
4020 else
4021 store_reg (regno, offset, STACK_POINTER_REGNUM);
4022 offset += UNITS_PER_WORD;
4026 for (i = 18; i >= 3; i--)
4027 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4029 /* If merge_sp_adjust_with_store is nonzero, then we can
4030 optimize the first GR save. */
4031 if (merge_sp_adjust_with_store)
4033 store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
4034 merge_sp_adjust_with_store = 0;
4036 else
4037 store_reg (i, offset, STACK_POINTER_REGNUM);
4038 offset += UNITS_PER_WORD;
4039 gr_saved++;
4042 /* If we wanted to merge the SP adjustment with a GR save, but we never
4043 did any GR saves, then just emit the adjustment here. */
4044 if (merge_sp_adjust_with_store)
4045 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4046 actual_fsize, 1);
4049 /* The hppa calling conventions say that %r19, the pic offset
4050 register, is saved at sp - 32 (in this function's frame)
4051 when generating PIC code. FIXME: What is the correct thing
4052 to do for functions which make no calls and allocate no
4053 frame? Do we need to allocate a frame, or can we just omit
4054 the save? For now we'll just omit the save.
4056 We don't want a note on this insn as the frame marker can
4057 move if there is a dynamic stack allocation. */
4058 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
4060 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
4062 emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
4066 /* Align pointer properly (doubleword boundary). */
4067 offset = (offset + 7) & ~7;
4069 /* Floating point register store. */
4070 if (save_fregs)
4072 rtx base;
4074 /* First get the frame or stack pointer to the start of the FP register
4075 save area. */
4076 if (frame_pointer_needed)
4078 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4079 base = hard_frame_pointer_rtx;
4081 else
4083 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4084 base = stack_pointer_rtx;
4087 /* Now actually save the FP registers. */
4088 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4090 if (df_regs_ever_live_p (i)
4091 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4093 rtx addr, reg;
4094 rtx_insn *insn;
4095 addr = gen_rtx_MEM (DFmode,
4096 gen_rtx_POST_INC (word_mode, tmpreg));
4097 reg = gen_rtx_REG (DFmode, i);
4098 insn = emit_move_insn (addr, reg);
4099 if (DO_FRAME_NOTES)
4101 RTX_FRAME_RELATED_P (insn) = 1;
4102 if (TARGET_64BIT)
4104 rtx mem = gen_rtx_MEM (DFmode,
4105 plus_constant (Pmode, base,
4106 offset));
4107 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4108 gen_rtx_SET (mem, reg));
4110 else
4112 rtx meml = gen_rtx_MEM (SFmode,
4113 plus_constant (Pmode, base,
4114 offset));
4115 rtx memr = gen_rtx_MEM (SFmode,
4116 plus_constant (Pmode, base,
4117 offset + 4));
4118 rtx regl = gen_rtx_REG (SFmode, i);
4119 rtx regr = gen_rtx_REG (SFmode, i + 1);
4120 rtx setl = gen_rtx_SET (meml, regl);
4121 rtx setr = gen_rtx_SET (memr, regr);
4122 rtvec vec;
4124 RTX_FRAME_RELATED_P (setl) = 1;
4125 RTX_FRAME_RELATED_P (setr) = 1;
4126 vec = gen_rtvec (2, setl, setr);
4127 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4128 gen_rtx_SEQUENCE (VOIDmode, vec));
4131 offset += GET_MODE_SIZE (DFmode);
4132 fr_saved++;
4138 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
4139 Handle case where DISP > 8k by using the add_high_const patterns. */
4141 static void
4142 load_reg (int reg, HOST_WIDE_INT disp, int base)
4144 rtx dest = gen_rtx_REG (word_mode, reg);
4145 rtx basereg = gen_rtx_REG (Pmode, base);
4146 rtx src;
4148 if (VAL_14_BITS_P (disp))
4149 src = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
4150 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
4152 rtx delta = GEN_INT (disp);
4153 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4155 emit_move_insn (tmpreg, delta);
4156 if (TARGET_DISABLE_INDEXING)
4158 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4159 src = gen_rtx_MEM (word_mode, tmpreg);
4161 else
4162 src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4164 else
4166 rtx delta = GEN_INT (disp);
4167 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
4168 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4170 emit_move_insn (tmpreg, high);
4171 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
4174 emit_move_insn (dest, src);
4177 /* Update the total code bytes output to the text section. */
4179 static void
4180 update_total_code_bytes (unsigned int nbytes)
4182 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
4183 && !IN_NAMED_SECTION_P (cfun->decl))
4185 unsigned int old_total = total_code_bytes;
4187 total_code_bytes += nbytes;
4189 /* Be prepared to handle overflows. */
4190 if (old_total > total_code_bytes)
4191 total_code_bytes = UINT_MAX;
4195 /* This function generates the assembly code for function exit.
4196 Args are as for output_function_prologue ().
4198 The function epilogue should not depend on the current stack
4199 pointer! It should use the frame pointer only. This is mandatory
4200 because of alloca; we also take advantage of it to omit stack
4201 adjustments before returning. */
4203 static void
4204 pa_output_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4206 rtx_insn *insn = get_last_insn ();
4207 bool extra_nop;
4209 /* pa_expand_epilogue does the dirty work now. We just need
4210 to output the assembler directives which denote the end
4211 of a function.
4213 To make debuggers happy, emit a nop if the epilogue was completely
4214 eliminated due to a volatile call as the last insn in the
4215 current function. That way the return address (in %r2) will
4216 always point to a valid instruction in the current function. */
4218 /* Get the last real insn. */
4219 if (NOTE_P (insn))
4220 insn = prev_real_insn (insn);
4222 /* If it is a sequence, then look inside. */
4223 if (insn && NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE)
4224 insn = as_a <rtx_sequence *> (PATTERN (insn))-> insn (0);
4226 /* If insn is a CALL_INSN, then it must be a call to a volatile
4227 function (otherwise there would be epilogue insns). */
4228 if (insn && CALL_P (insn))
4230 fputs ("\tnop\n", file);
4231 extra_nop = true;
4233 else
4234 extra_nop = false;
4236 fputs ("\t.EXIT\n\t.PROCEND\n", file);
4238 if (TARGET_SOM && TARGET_GAS)
4240 /* We are done with this subspace except possibly for some additional
4241 debug information. Forget that we are in this subspace to ensure
4242 that the next function is output in its own subspace. */
4243 in_section = NULL;
4244 cfun->machine->in_nsubspa = 2;
4247 /* Thunks do their own insn accounting. */
4248 if (cfun->is_thunk)
4249 return;
4251 if (INSN_ADDRESSES_SET_P ())
4253 last_address = extra_nop ? 4 : 0;
4254 insn = get_last_nonnote_insn ();
4255 if (insn)
4257 last_address += INSN_ADDRESSES (INSN_UID (insn));
4258 if (INSN_P (insn))
4259 last_address += insn_default_length (insn);
4261 last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4262 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
4264 else
4265 last_address = UINT_MAX;
4267 /* Finally, update the total number of code bytes output so far. */
4268 update_total_code_bytes (last_address);
4271 void
4272 pa_expand_epilogue (void)
4274 rtx tmpreg;
4275 HOST_WIDE_INT offset;
4276 HOST_WIDE_INT ret_off = 0;
4277 int i;
4278 int merge_sp_adjust_with_load = 0;
4280 /* We will use this often. */
4281 tmpreg = gen_rtx_REG (word_mode, 1);
4283 /* Try to restore RP early to avoid load/use interlocks when
4284 RP gets used in the return (bv) instruction. This appears to still
4285 be necessary even when we schedule the prologue and epilogue. */
4286 if (rp_saved)
4288 ret_off = TARGET_64BIT ? -16 : -20;
4289 if (frame_pointer_needed)
4291 load_reg (2, ret_off, HARD_FRAME_POINTER_REGNUM);
4292 ret_off = 0;
4294 else
4296 /* No frame pointer, and stack is smaller than 8k. */
4297 if (VAL_14_BITS_P (ret_off - actual_fsize))
4299 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4300 ret_off = 0;
4305 /* General register restores. */
4306 if (frame_pointer_needed)
4308 offset = local_fsize;
4310 /* If the current function calls __builtin_eh_return, then we need
4311 to restore the saved EH data registers. */
4312 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4314 unsigned int i, regno;
4316 for (i = 0; ; ++i)
4318 regno = EH_RETURN_DATA_REGNO (i);
4319 if (regno == INVALID_REGNUM)
4320 break;
4322 load_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4323 offset += UNITS_PER_WORD;
4327 for (i = 18; i >= 4; i--)
4328 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4330 load_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4331 offset += UNITS_PER_WORD;
4334 else
4336 offset = local_fsize - actual_fsize;
4338 /* If the current function calls __builtin_eh_return, then we need
4339 to restore the saved EH data registers. */
4340 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4342 unsigned int i, regno;
4344 for (i = 0; ; ++i)
4346 regno = EH_RETURN_DATA_REGNO (i);
4347 if (regno == INVALID_REGNUM)
4348 break;
4350 /* Only for the first load.
4351 merge_sp_adjust_with_load holds the register load
4352 with which we will merge the sp adjustment. */
4353 if (merge_sp_adjust_with_load == 0
4354 && local_fsize == 0
4355 && VAL_14_BITS_P (-actual_fsize))
4356 merge_sp_adjust_with_load = regno;
4357 else
4358 load_reg (regno, offset, STACK_POINTER_REGNUM);
4359 offset += UNITS_PER_WORD;
4363 for (i = 18; i >= 3; i--)
4365 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4367 /* Only for the first load.
4368 merge_sp_adjust_with_load holds the register load
4369 with which we will merge the sp adjustment. */
4370 if (merge_sp_adjust_with_load == 0
4371 && local_fsize == 0
4372 && VAL_14_BITS_P (-actual_fsize))
4373 merge_sp_adjust_with_load = i;
4374 else
4375 load_reg (i, offset, STACK_POINTER_REGNUM);
4376 offset += UNITS_PER_WORD;
4381 /* Align pointer properly (doubleword boundary). */
4382 offset = (offset + 7) & ~7;
4384 /* FP register restores. */
4385 if (save_fregs)
4387 /* Adjust the register to index off of. */
4388 if (frame_pointer_needed)
4389 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4390 else
4391 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4393 /* Actually do the restores now. */
4394 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4395 if (df_regs_ever_live_p (i)
4396 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4398 rtx src = gen_rtx_MEM (DFmode,
4399 gen_rtx_POST_INC (word_mode, tmpreg));
4400 rtx dest = gen_rtx_REG (DFmode, i);
4401 emit_move_insn (dest, src);
4405 /* Emit a blockage insn here to keep these insns from being moved to
4406 an earlier spot in the epilogue, or into the main instruction stream.
4408 This is necessary as we must not cut the stack back before all the
4409 restores are finished. */
4410 emit_insn (gen_blockage ());
4412 /* Reset stack pointer (and possibly frame pointer). The stack
4413 pointer is initially set to fp + 64 to avoid a race condition. */
4414 if (frame_pointer_needed)
4416 rtx delta = GEN_INT (-64);
4418 set_reg_plus_d (STACK_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM, 64, 0);
4419 emit_insn (gen_pre_load (hard_frame_pointer_rtx,
4420 stack_pointer_rtx, delta));
4422 /* If we were deferring a callee register restore, do it now. */
4423 else if (merge_sp_adjust_with_load)
4425 rtx delta = GEN_INT (-actual_fsize);
4426 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4428 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4430 else if (actual_fsize != 0)
4431 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4432 - actual_fsize, 0);
4434 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4435 frame greater than 8k), do so now. */
4436 if (ret_off != 0)
4437 load_reg (2, ret_off, STACK_POINTER_REGNUM);
4439 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4441 rtx sa = EH_RETURN_STACKADJ_RTX;
4443 emit_insn (gen_blockage ());
4444 emit_insn (TARGET_64BIT
4445 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4446 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4450 bool
4451 pa_can_use_return_insn (void)
4453 if (!reload_completed)
4454 return false;
4456 if (frame_pointer_needed)
4457 return false;
4459 if (df_regs_ever_live_p (2))
4460 return false;
4462 if (crtl->profile)
4463 return false;
4465 return pa_compute_frame_size (get_frame_size (), 0) == 0;
4469 hppa_pic_save_rtx (void)
4471 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4474 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4475 #define NO_DEFERRED_PROFILE_COUNTERS 0
4476 #endif
4479 /* Vector of funcdef numbers. */
4480 static vec<int> funcdef_nos;
4482 /* Output deferred profile counters. */
4483 static void
4484 output_deferred_profile_counters (void)
4486 unsigned int i;
4487 int align, n;
4489 if (funcdef_nos.is_empty ())
4490 return;
4492 switch_to_section (data_section);
4493 align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4494 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4496 for (i = 0; funcdef_nos.iterate (i, &n); i++)
4498 targetm.asm_out.internal_label (asm_out_file, "LP", n);
4499 assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4502 funcdef_nos.release ();
4505 void
4506 hppa_profile_hook (int label_no)
4508 /* We use SImode for the address of the function in both 32 and
4509 64-bit code to avoid having to provide DImode versions of the
4510 lcla2 and load_offset_label_address insn patterns. */
4511 rtx reg = gen_reg_rtx (SImode);
4512 rtx_code_label *label_rtx = gen_label_rtx ();
4513 rtx begin_label_rtx;
4514 rtx_insn *call_insn;
4515 char begin_label_name[16];
4517 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4518 label_no);
4519 begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4521 if (TARGET_64BIT)
4522 emit_move_insn (arg_pointer_rtx,
4523 gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
4524 GEN_INT (64)));
4526 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4528 /* The address of the function is loaded into %r25 with an instruction-
4529 relative sequence that avoids the use of relocations. The sequence
4530 is split so that the load_offset_label_address instruction can
4531 occupy the delay slot of the call to _mcount. */
4532 if (TARGET_PA_20)
4533 emit_insn (gen_lcla2 (reg, label_rtx));
4534 else
4535 emit_insn (gen_lcla1 (reg, label_rtx));
4537 emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25),
4538 reg, begin_label_rtx, label_rtx));
4540 #if !NO_DEFERRED_PROFILE_COUNTERS
4542 rtx count_label_rtx, addr, r24;
4543 char count_label_name[16];
4545 funcdef_nos.safe_push (label_no);
4546 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4547 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name));
4549 addr = force_reg (Pmode, count_label_rtx);
4550 r24 = gen_rtx_REG (Pmode, 24);
4551 emit_move_insn (r24, addr);
4553 call_insn =
4554 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4555 gen_rtx_SYMBOL_REF (Pmode,
4556 "_mcount")),
4557 GEN_INT (TARGET_64BIT ? 24 : 12)));
4559 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4561 #else
4563 call_insn =
4564 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4565 gen_rtx_SYMBOL_REF (Pmode,
4566 "_mcount")),
4567 GEN_INT (TARGET_64BIT ? 16 : 8)));
4569 #endif
4571 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4572 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4574 /* Indicate the _mcount call cannot throw, nor will it execute a
4575 non-local goto. */
4576 make_reg_eh_region_note_nothrow_nononlocal (call_insn);
4579 /* Fetch the return address for the frame COUNT steps up from
4580 the current frame, after the prologue. FRAMEADDR is the
4581 frame pointer of the COUNT frame.
4583 We want to ignore any export stub remnants here. To handle this,
4584 we examine the code at the return address, and if it is an export
4585 stub, we return a memory rtx for the stub return address stored
4586 at frame-24.
4588 The value returned is used in two different ways:
4590 1. To find a function's caller.
4592 2. To change the return address for a function.
4594 This function handles most instances of case 1; however, it will
4595 fail if there are two levels of stubs to execute on the return
4596 path. The only way I believe that can happen is if the return value
4597 needs a parameter relocation, which never happens for C code.
4599 This function handles most instances of case 2; however, it will
4600 fail if we did not originally have stub code on the return path
4601 but will need stub code on the new return path. This can happen if
4602 the caller & callee are both in the main program, but the new
4603 return location is in a shared library. */
4606 pa_return_addr_rtx (int count, rtx frameaddr)
4608 rtx label;
4609 rtx rp;
4610 rtx saved_rp;
4611 rtx ins;
4613 /* The instruction stream at the return address of a PA1.X export stub is:
4615 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4616 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4617 0x00011820 | stub+16: mtsp r1,sr0
4618 0xe0400002 | stub+20: be,n 0(sr0,rp)
4620 0xe0400002 must be specified as -532676606 so that it won't be
4621 rejected as an invalid immediate operand on 64-bit hosts.
4623 The instruction stream at the return address of a PA2.0 export stub is:
4625 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4626 0xe840d002 | stub+12: bve,n (rp)
4629 HOST_WIDE_INT insns[4];
4630 int i, len;
4632 if (count != 0)
4633 return NULL_RTX;
4635 rp = get_hard_reg_initial_val (Pmode, 2);
4637 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4638 return rp;
4640 /* If there is no export stub then just use the value saved from
4641 the return pointer register. */
4643 saved_rp = gen_reg_rtx (Pmode);
4644 emit_move_insn (saved_rp, rp);
4646 /* Get pointer to the instruction stream. We have to mask out the
4647 privilege level from the two low order bits of the return address
4648 pointer here so that ins will point to the start of the first
4649 instruction that would have been executed if we returned. */
4650 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4651 label = gen_label_rtx ();
4653 if (TARGET_PA_20)
4655 insns[0] = 0x4bc23fd1;
4656 insns[1] = -398405630;
4657 len = 2;
4659 else
4661 insns[0] = 0x4bc23fd1;
4662 insns[1] = 0x004010a1;
4663 insns[2] = 0x00011820;
4664 insns[3] = -532676606;
4665 len = 4;
4668 /* Check the instruction stream at the normal return address for the
4669 export stub. If it is an export stub, than our return address is
4670 really in -24[frameaddr]. */
4672 for (i = 0; i < len; i++)
4674 rtx op0 = gen_rtx_MEM (SImode, plus_constant (Pmode, ins, i * 4));
4675 rtx op1 = GEN_INT (insns[i]);
4676 emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label);
4679 /* Here we know that our return address points to an export
4680 stub. We don't want to return the address of the export stub,
4681 but rather the return address of the export stub. That return
4682 address is stored at -24[frameaddr]. */
4684 emit_move_insn (saved_rp,
4685 gen_rtx_MEM (Pmode,
4686 memory_address (Pmode,
4687 plus_constant (Pmode, frameaddr,
4688 -24))));
4690 emit_label (label);
4692 return saved_rp;
4695 void
4696 pa_emit_bcond_fp (rtx operands[])
4698 enum rtx_code code = GET_CODE (operands[0]);
4699 rtx operand0 = operands[1];
4700 rtx operand1 = operands[2];
4701 rtx label = operands[3];
4703 emit_insn (gen_rtx_SET (gen_rtx_REG (CCFPmode, 0),
4704 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1)));
4706 emit_jump_insn (gen_rtx_SET (pc_rtx,
4707 gen_rtx_IF_THEN_ELSE (VOIDmode,
4708 gen_rtx_fmt_ee (NE,
4709 VOIDmode,
4710 gen_rtx_REG (CCFPmode, 0),
4711 const0_rtx),
4712 gen_rtx_LABEL_REF (VOIDmode, label),
4713 pc_rtx)));
4717 /* Adjust the cost of a scheduling dependency. Return the new cost of
4718 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4720 static int
4721 pa_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
4723 enum attr_type attr_type;
4725 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4726 true dependencies as they are described with bypasses now. */
4727 if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0)
4728 return cost;
4730 if (! recog_memoized (insn))
4731 return 0;
4733 attr_type = get_attr_type (insn);
4735 switch (REG_NOTE_KIND (link))
4737 case REG_DEP_ANTI:
4738 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4739 cycles later. */
4741 if (attr_type == TYPE_FPLOAD)
4743 rtx pat = PATTERN (insn);
4744 rtx dep_pat = PATTERN (dep_insn);
4745 if (GET_CODE (pat) == PARALLEL)
4747 /* This happens for the fldXs,mb patterns. */
4748 pat = XVECEXP (pat, 0, 0);
4750 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4751 /* If this happens, we have to extend this to schedule
4752 optimally. Return 0 for now. */
4753 return 0;
4755 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4757 if (! recog_memoized (dep_insn))
4758 return 0;
4759 switch (get_attr_type (dep_insn))
4761 case TYPE_FPALU:
4762 case TYPE_FPMULSGL:
4763 case TYPE_FPMULDBL:
4764 case TYPE_FPDIVSGL:
4765 case TYPE_FPDIVDBL:
4766 case TYPE_FPSQRTSGL:
4767 case TYPE_FPSQRTDBL:
4768 /* A fpload can't be issued until one cycle before a
4769 preceding arithmetic operation has finished if
4770 the target of the fpload is any of the sources
4771 (or destination) of the arithmetic operation. */
4772 return insn_default_latency (dep_insn) - 1;
4774 default:
4775 return 0;
4779 else if (attr_type == TYPE_FPALU)
4781 rtx pat = PATTERN (insn);
4782 rtx dep_pat = PATTERN (dep_insn);
4783 if (GET_CODE (pat) == PARALLEL)
4785 /* This happens for the fldXs,mb patterns. */
4786 pat = XVECEXP (pat, 0, 0);
4788 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4789 /* If this happens, we have to extend this to schedule
4790 optimally. Return 0 for now. */
4791 return 0;
4793 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4795 if (! recog_memoized (dep_insn))
4796 return 0;
4797 switch (get_attr_type (dep_insn))
4799 case TYPE_FPDIVSGL:
4800 case TYPE_FPDIVDBL:
4801 case TYPE_FPSQRTSGL:
4802 case TYPE_FPSQRTDBL:
4803 /* An ALU flop can't be issued until two cycles before a
4804 preceding divide or sqrt operation has finished if
4805 the target of the ALU flop is any of the sources
4806 (or destination) of the divide or sqrt operation. */
4807 return insn_default_latency (dep_insn) - 2;
4809 default:
4810 return 0;
4815 /* For other anti dependencies, the cost is 0. */
4816 return 0;
4818 case REG_DEP_OUTPUT:
4819 /* Output dependency; DEP_INSN writes a register that INSN writes some
4820 cycles later. */
4821 if (attr_type == TYPE_FPLOAD)
4823 rtx pat = PATTERN (insn);
4824 rtx dep_pat = PATTERN (dep_insn);
4825 if (GET_CODE (pat) == PARALLEL)
4827 /* This happens for the fldXs,mb patterns. */
4828 pat = XVECEXP (pat, 0, 0);
4830 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4831 /* If this happens, we have to extend this to schedule
4832 optimally. Return 0 for now. */
4833 return 0;
4835 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4837 if (! recog_memoized (dep_insn))
4838 return 0;
4839 switch (get_attr_type (dep_insn))
4841 case TYPE_FPALU:
4842 case TYPE_FPMULSGL:
4843 case TYPE_FPMULDBL:
4844 case TYPE_FPDIVSGL:
4845 case TYPE_FPDIVDBL:
4846 case TYPE_FPSQRTSGL:
4847 case TYPE_FPSQRTDBL:
4848 /* A fpload can't be issued until one cycle before a
4849 preceding arithmetic operation has finished if
4850 the target of the fpload is the destination of the
4851 arithmetic operation.
4853 Exception: For PA7100LC, PA7200 and PA7300, the cost
4854 is 3 cycles, unless they bundle together. We also
4855 pay the penalty if the second insn is a fpload. */
4856 return insn_default_latency (dep_insn) - 1;
4858 default:
4859 return 0;
4863 else if (attr_type == TYPE_FPALU)
4865 rtx pat = PATTERN (insn);
4866 rtx dep_pat = PATTERN (dep_insn);
4867 if (GET_CODE (pat) == PARALLEL)
4869 /* This happens for the fldXs,mb patterns. */
4870 pat = XVECEXP (pat, 0, 0);
4872 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4873 /* If this happens, we have to extend this to schedule
4874 optimally. Return 0 for now. */
4875 return 0;
4877 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4879 if (! recog_memoized (dep_insn))
4880 return 0;
4881 switch (get_attr_type (dep_insn))
4883 case TYPE_FPDIVSGL:
4884 case TYPE_FPDIVDBL:
4885 case TYPE_FPSQRTSGL:
4886 case TYPE_FPSQRTDBL:
4887 /* An ALU flop can't be issued until two cycles before a
4888 preceding divide or sqrt operation has finished if
4889 the target of the ALU flop is also the target of
4890 the divide or sqrt operation. */
4891 return insn_default_latency (dep_insn) - 2;
4893 default:
4894 return 0;
4899 /* For other output dependencies, the cost is 0. */
4900 return 0;
4902 default:
4903 gcc_unreachable ();
4907 /* Adjust scheduling priorities. We use this to try and keep addil
4908 and the next use of %r1 close together. */
4909 static int
4910 pa_adjust_priority (rtx_insn *insn, int priority)
4912 rtx set = single_set (insn);
4913 rtx src, dest;
4914 if (set)
4916 src = SET_SRC (set);
4917 dest = SET_DEST (set);
4918 if (GET_CODE (src) == LO_SUM
4919 && symbolic_operand (XEXP (src, 1), VOIDmode)
4920 && ! read_only_operand (XEXP (src, 1), VOIDmode))
4921 priority >>= 3;
4923 else if (GET_CODE (src) == MEM
4924 && GET_CODE (XEXP (src, 0)) == LO_SUM
4925 && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
4926 && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
4927 priority >>= 1;
4929 else if (GET_CODE (dest) == MEM
4930 && GET_CODE (XEXP (dest, 0)) == LO_SUM
4931 && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
4932 && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
4933 priority >>= 3;
4935 return priority;
4938 /* The 700 can only issue a single insn at a time.
4939 The 7XXX processors can issue two insns at a time.
4940 The 8000 can issue 4 insns at a time. */
4941 static int
4942 pa_issue_rate (void)
4944 switch (pa_cpu)
4946 case PROCESSOR_700: return 1;
4947 case PROCESSOR_7100: return 2;
4948 case PROCESSOR_7100LC: return 2;
4949 case PROCESSOR_7200: return 2;
4950 case PROCESSOR_7300: return 2;
4951 case PROCESSOR_8000: return 4;
4953 default:
4954 gcc_unreachable ();
4960 /* Return any length plus adjustment needed by INSN which already has
4961 its length computed as LENGTH. Return LENGTH if no adjustment is
4962 necessary.
4964 Also compute the length of an inline block move here as it is too
4965 complicated to express as a length attribute in pa.md. */
4967 pa_adjust_insn_length (rtx_insn *insn, int length)
4969 rtx pat = PATTERN (insn);
4971 /* If length is negative or undefined, provide initial length. */
4972 if ((unsigned int) length >= INT_MAX)
4974 if (GET_CODE (pat) == SEQUENCE)
4975 insn = as_a <rtx_insn *> (XVECEXP (pat, 0, 0));
4977 switch (get_attr_type (insn))
4979 case TYPE_MILLI:
4980 length = pa_attr_length_millicode_call (insn);
4981 break;
4982 case TYPE_CALL:
4983 length = pa_attr_length_call (insn, 0);
4984 break;
4985 case TYPE_SIBCALL:
4986 length = pa_attr_length_call (insn, 1);
4987 break;
4988 case TYPE_DYNCALL:
4989 length = pa_attr_length_indirect_call (insn);
4990 break;
4991 case TYPE_SH_FUNC_ADRS:
4992 length = pa_attr_length_millicode_call (insn) + 20;
4993 break;
4994 default:
4995 gcc_unreachable ();
4999 /* Block move pattern. */
5000 if (NONJUMP_INSN_P (insn)
5001 && GET_CODE (pat) == PARALLEL
5002 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5003 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5004 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
5005 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
5006 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
5007 length += compute_movmem_length (insn) - 4;
5008 /* Block clear pattern. */
5009 else if (NONJUMP_INSN_P (insn)
5010 && GET_CODE (pat) == PARALLEL
5011 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5012 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5013 && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
5014 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
5015 length += compute_clrmem_length (insn) - 4;
5016 /* Conditional branch with an unfilled delay slot. */
5017 else if (JUMP_P (insn) && ! simplejump_p (insn))
5019 /* Adjust a short backwards conditional with an unfilled delay slot. */
5020 if (GET_CODE (pat) == SET
5021 && length == 4
5022 && JUMP_LABEL (insn) != NULL_RTX
5023 && ! forward_branch_p (insn))
5024 length += 4;
5025 else if (GET_CODE (pat) == PARALLEL
5026 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
5027 && length == 4)
5028 length += 4;
5029 /* Adjust dbra insn with short backwards conditional branch with
5030 unfilled delay slot -- only for case where counter is in a
5031 general register register. */
5032 else if (GET_CODE (pat) == PARALLEL
5033 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
5034 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
5035 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
5036 && length == 4
5037 && ! forward_branch_p (insn))
5038 length += 4;
5040 return length;
5043 /* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook. */
5045 static bool
5046 pa_print_operand_punct_valid_p (unsigned char code)
5048 if (code == '@'
5049 || code == '#'
5050 || code == '*'
5051 || code == '^')
5052 return true;
5054 return false;
5057 /* Print operand X (an rtx) in assembler syntax to file FILE.
5058 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
5059 For `%' followed by punctuation, CODE is the punctuation and X is null. */
5061 void
5062 pa_print_operand (FILE *file, rtx x, int code)
5064 switch (code)
5066 case '#':
5067 /* Output a 'nop' if there's nothing for the delay slot. */
5068 if (dbr_sequence_length () == 0)
5069 fputs ("\n\tnop", file);
5070 return;
5071 case '*':
5072 /* Output a nullification completer if there's nothing for the */
5073 /* delay slot or nullification is requested. */
5074 if (dbr_sequence_length () == 0 ||
5075 (final_sequence &&
5076 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
5077 fputs (",n", file);
5078 return;
5079 case 'R':
5080 /* Print out the second register name of a register pair.
5081 I.e., R (6) => 7. */
5082 fputs (reg_names[REGNO (x) + 1], file);
5083 return;
5084 case 'r':
5085 /* A register or zero. */
5086 if (x == const0_rtx
5087 || (x == CONST0_RTX (DFmode))
5088 || (x == CONST0_RTX (SFmode)))
5090 fputs ("%r0", file);
5091 return;
5093 else
5094 break;
5095 case 'f':
5096 /* A register or zero (floating point). */
5097 if (x == const0_rtx
5098 || (x == CONST0_RTX (DFmode))
5099 || (x == CONST0_RTX (SFmode)))
5101 fputs ("%fr0", file);
5102 return;
5104 else
5105 break;
5106 case 'A':
5108 rtx xoperands[2];
5110 xoperands[0] = XEXP (XEXP (x, 0), 0);
5111 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
5112 pa_output_global_address (file, xoperands[1], 0);
5113 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
5114 return;
5117 case 'C': /* Plain (C)ondition */
5118 case 'X':
5119 switch (GET_CODE (x))
5121 case EQ:
5122 fputs ("=", file); break;
5123 case NE:
5124 fputs ("<>", file); break;
5125 case GT:
5126 fputs (">", file); break;
5127 case GE:
5128 fputs (">=", file); break;
5129 case GEU:
5130 fputs (">>=", file); break;
5131 case GTU:
5132 fputs (">>", file); break;
5133 case LT:
5134 fputs ("<", file); break;
5135 case LE:
5136 fputs ("<=", file); break;
5137 case LEU:
5138 fputs ("<<=", file); break;
5139 case LTU:
5140 fputs ("<<", file); break;
5141 default:
5142 gcc_unreachable ();
5144 return;
5145 case 'N': /* Condition, (N)egated */
5146 switch (GET_CODE (x))
5148 case EQ:
5149 fputs ("<>", file); break;
5150 case NE:
5151 fputs ("=", file); break;
5152 case GT:
5153 fputs ("<=", file); break;
5154 case GE:
5155 fputs ("<", file); break;
5156 case GEU:
5157 fputs ("<<", file); break;
5158 case GTU:
5159 fputs ("<<=", file); break;
5160 case LT:
5161 fputs (">=", file); break;
5162 case LE:
5163 fputs (">", file); break;
5164 case LEU:
5165 fputs (">>", file); break;
5166 case LTU:
5167 fputs (">>=", file); break;
5168 default:
5169 gcc_unreachable ();
5171 return;
5172 /* For floating point comparisons. Note that the output
5173 predicates are the complement of the desired mode. The
5174 conditions for GT, GE, LT, LE and LTGT cause an invalid
5175 operation exception if the result is unordered and this
5176 exception is enabled in the floating-point status register. */
5177 case 'Y':
5178 switch (GET_CODE (x))
5180 case EQ:
5181 fputs ("!=", file); break;
5182 case NE:
5183 fputs ("=", file); break;
5184 case GT:
5185 fputs ("!>", file); break;
5186 case GE:
5187 fputs ("!>=", file); break;
5188 case LT:
5189 fputs ("!<", file); break;
5190 case LE:
5191 fputs ("!<=", file); break;
5192 case LTGT:
5193 fputs ("!<>", file); break;
5194 case UNLE:
5195 fputs ("!?<=", file); break;
5196 case UNLT:
5197 fputs ("!?<", file); break;
5198 case UNGE:
5199 fputs ("!?>=", file); break;
5200 case UNGT:
5201 fputs ("!?>", file); break;
5202 case UNEQ:
5203 fputs ("!?=", file); break;
5204 case UNORDERED:
5205 fputs ("!?", file); break;
5206 case ORDERED:
5207 fputs ("?", file); break;
5208 default:
5209 gcc_unreachable ();
5211 return;
5212 case 'S': /* Condition, operands are (S)wapped. */
5213 switch (GET_CODE (x))
5215 case EQ:
5216 fputs ("=", file); break;
5217 case NE:
5218 fputs ("<>", file); break;
5219 case GT:
5220 fputs ("<", file); break;
5221 case GE:
5222 fputs ("<=", file); break;
5223 case GEU:
5224 fputs ("<<=", file); break;
5225 case GTU:
5226 fputs ("<<", file); break;
5227 case LT:
5228 fputs (">", file); break;
5229 case LE:
5230 fputs (">=", file); break;
5231 case LEU:
5232 fputs (">>=", file); break;
5233 case LTU:
5234 fputs (">>", file); break;
5235 default:
5236 gcc_unreachable ();
5238 return;
5239 case 'B': /* Condition, (B)oth swapped and negate. */
5240 switch (GET_CODE (x))
5242 case EQ:
5243 fputs ("<>", file); break;
5244 case NE:
5245 fputs ("=", file); break;
5246 case GT:
5247 fputs (">=", file); break;
5248 case GE:
5249 fputs (">", file); break;
5250 case GEU:
5251 fputs (">>", file); break;
5252 case GTU:
5253 fputs (">>=", file); break;
5254 case LT:
5255 fputs ("<=", file); break;
5256 case LE:
5257 fputs ("<", file); break;
5258 case LEU:
5259 fputs ("<<", file); break;
5260 case LTU:
5261 fputs ("<<=", file); break;
5262 default:
5263 gcc_unreachable ();
5265 return;
5266 case 'k':
5267 gcc_assert (GET_CODE (x) == CONST_INT);
5268 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
5269 return;
5270 case 'Q':
5271 gcc_assert (GET_CODE (x) == CONST_INT);
5272 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
5273 return;
5274 case 'L':
5275 gcc_assert (GET_CODE (x) == CONST_INT);
5276 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
5277 return;
5278 case 'o':
5279 gcc_assert (GET_CODE (x) == CONST_INT
5280 && (INTVAL (x) == 1 || INTVAL (x) == 2 || INTVAL (x) == 3));
5281 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5282 return;
5283 case 'O':
5284 gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
5285 fprintf (file, "%d", exact_log2 (INTVAL (x)));
5286 return;
5287 case 'p':
5288 gcc_assert (GET_CODE (x) == CONST_INT);
5289 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
5290 return;
5291 case 'P':
5292 gcc_assert (GET_CODE (x) == CONST_INT);
5293 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
5294 return;
5295 case 'I':
5296 if (GET_CODE (x) == CONST_INT)
5297 fputs ("i", file);
5298 return;
5299 case 'M':
5300 case 'F':
5301 switch (GET_CODE (XEXP (x, 0)))
5303 case PRE_DEC:
5304 case PRE_INC:
5305 if (ASSEMBLER_DIALECT == 0)
5306 fputs ("s,mb", file);
5307 else
5308 fputs (",mb", file);
5309 break;
5310 case POST_DEC:
5311 case POST_INC:
5312 if (ASSEMBLER_DIALECT == 0)
5313 fputs ("s,ma", file);
5314 else
5315 fputs (",ma", file);
5316 break;
5317 case PLUS:
5318 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5319 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5321 if (ASSEMBLER_DIALECT == 0)
5322 fputs ("x", file);
5324 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5325 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5327 if (ASSEMBLER_DIALECT == 0)
5328 fputs ("x,s", file);
5329 else
5330 fputs (",s", file);
5332 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
5333 fputs ("s", file);
5334 break;
5335 default:
5336 if (code == 'F' && ASSEMBLER_DIALECT == 0)
5337 fputs ("s", file);
5338 break;
5340 return;
5341 case 'G':
5342 pa_output_global_address (file, x, 0);
5343 return;
5344 case 'H':
5345 pa_output_global_address (file, x, 1);
5346 return;
5347 case 0: /* Don't do anything special */
5348 break;
5349 case 'Z':
5351 unsigned op[3];
5352 compute_zdepwi_operands (INTVAL (x), op);
5353 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5354 return;
5356 case 'z':
5358 unsigned op[3];
5359 compute_zdepdi_operands (INTVAL (x), op);
5360 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5361 return;
5363 case 'c':
5364 /* We can get here from a .vtable_inherit due to our
5365 CONSTANT_ADDRESS_P rejecting perfectly good constant
5366 addresses. */
5367 break;
5368 default:
5369 gcc_unreachable ();
5371 if (GET_CODE (x) == REG)
5373 fputs (reg_names [REGNO (x)], file);
5374 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5376 fputs ("R", file);
5377 return;
5379 if (FP_REG_P (x)
5380 && GET_MODE_SIZE (GET_MODE (x)) <= 4
5381 && (REGNO (x) & 1) == 0)
5382 fputs ("L", file);
5384 else if (GET_CODE (x) == MEM)
5386 int size = GET_MODE_SIZE (GET_MODE (x));
5387 rtx base = NULL_RTX;
5388 switch (GET_CODE (XEXP (x, 0)))
5390 case PRE_DEC:
5391 case POST_DEC:
5392 base = XEXP (XEXP (x, 0), 0);
5393 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5394 break;
5395 case PRE_INC:
5396 case POST_INC:
5397 base = XEXP (XEXP (x, 0), 0);
5398 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5399 break;
5400 case PLUS:
5401 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5402 fprintf (file, "%s(%s)",
5403 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5404 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5405 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5406 fprintf (file, "%s(%s)",
5407 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5408 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5409 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5410 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5412 /* Because the REG_POINTER flag can get lost during reload,
5413 pa_legitimate_address_p canonicalizes the order of the
5414 index and base registers in the combined move patterns. */
5415 rtx base = XEXP (XEXP (x, 0), 1);
5416 rtx index = XEXP (XEXP (x, 0), 0);
5418 fprintf (file, "%s(%s)",
5419 reg_names [REGNO (index)], reg_names [REGNO (base)]);
5421 else
5422 output_address (XEXP (x, 0));
5423 break;
5424 default:
5425 output_address (XEXP (x, 0));
5426 break;
5429 else
5430 output_addr_const (file, x);
5433 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
5435 void
5436 pa_output_global_address (FILE *file, rtx x, int round_constant)
5439 /* Imagine (high (const (plus ...))). */
5440 if (GET_CODE (x) == HIGH)
5441 x = XEXP (x, 0);
5443 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5444 output_addr_const (file, x);
5445 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5447 output_addr_const (file, x);
5448 fputs ("-$global$", file);
5450 else if (GET_CODE (x) == CONST)
5452 const char *sep = "";
5453 int offset = 0; /* assembler wants -$global$ at end */
5454 rtx base = NULL_RTX;
5456 switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
5458 case LABEL_REF:
5459 case SYMBOL_REF:
5460 base = XEXP (XEXP (x, 0), 0);
5461 output_addr_const (file, base);
5462 break;
5463 case CONST_INT:
5464 offset = INTVAL (XEXP (XEXP (x, 0), 0));
5465 break;
5466 default:
5467 gcc_unreachable ();
5470 switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
5472 case LABEL_REF:
5473 case SYMBOL_REF:
5474 base = XEXP (XEXP (x, 0), 1);
5475 output_addr_const (file, base);
5476 break;
5477 case CONST_INT:
5478 offset = INTVAL (XEXP (XEXP (x, 0), 1));
5479 break;
5480 default:
5481 gcc_unreachable ();
5484 /* How bogus. The compiler is apparently responsible for
5485 rounding the constant if it uses an LR field selector.
5487 The linker and/or assembler seem a better place since
5488 they have to do this kind of thing already.
5490 If we fail to do this, HP's optimizing linker may eliminate
5491 an addil, but not update the ldw/stw/ldo instruction that
5492 uses the result of the addil. */
5493 if (round_constant)
5494 offset = ((offset + 0x1000) & ~0x1fff);
5496 switch (GET_CODE (XEXP (x, 0)))
5498 case PLUS:
5499 if (offset < 0)
5501 offset = -offset;
5502 sep = "-";
5504 else
5505 sep = "+";
5506 break;
5508 case MINUS:
5509 gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5510 sep = "-";
5511 break;
5513 default:
5514 gcc_unreachable ();
5517 if (!read_only_operand (base, VOIDmode) && !flag_pic)
5518 fputs ("-$global$", file);
5519 if (offset)
5520 fprintf (file, "%s%d", sep, offset);
5522 else
5523 output_addr_const (file, x);
5526 /* Output boilerplate text to appear at the beginning of the file.
5527 There are several possible versions. */
5528 #define aputs(x) fputs(x, asm_out_file)
5529 static inline void
5530 pa_file_start_level (void)
5532 if (TARGET_64BIT)
5533 aputs ("\t.LEVEL 2.0w\n");
5534 else if (TARGET_PA_20)
5535 aputs ("\t.LEVEL 2.0\n");
5536 else if (TARGET_PA_11)
5537 aputs ("\t.LEVEL 1.1\n");
5538 else
5539 aputs ("\t.LEVEL 1.0\n");
5542 static inline void
5543 pa_file_start_space (int sortspace)
5545 aputs ("\t.SPACE $PRIVATE$");
5546 if (sortspace)
5547 aputs (",SORT=16");
5548 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31");
5549 if (flag_tm)
5550 aputs ("\n\t.SUBSPA $TM_CLONE_TABLE$,QUAD=1,ALIGN=8,ACCESS=31");
5551 aputs ("\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5552 "\n\t.SPACE $TEXT$");
5553 if (sortspace)
5554 aputs (",SORT=8");
5555 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5556 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5559 static inline void
5560 pa_file_start_file (int want_version)
5562 if (write_symbols != NO_DEBUG)
5564 output_file_directive (asm_out_file, main_input_filename);
5565 if (want_version)
5566 aputs ("\t.version\t\"01.01\"\n");
5570 static inline void
5571 pa_file_start_mcount (const char *aswhat)
5573 if (profile_flag)
5574 fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5577 static void
5578 pa_elf_file_start (void)
5580 pa_file_start_level ();
5581 pa_file_start_mcount ("ENTRY");
5582 pa_file_start_file (0);
5585 static void
5586 pa_som_file_start (void)
5588 pa_file_start_level ();
5589 pa_file_start_space (0);
5590 aputs ("\t.IMPORT $global$,DATA\n"
5591 "\t.IMPORT $$dyncall,MILLICODE\n");
5592 pa_file_start_mcount ("CODE");
5593 pa_file_start_file (0);
5596 static void
5597 pa_linux_file_start (void)
5599 pa_file_start_file (1);
5600 pa_file_start_level ();
5601 pa_file_start_mcount ("CODE");
5604 static void
5605 pa_hpux64_gas_file_start (void)
5607 pa_file_start_level ();
5608 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5609 if (profile_flag)
5610 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5611 #endif
5612 pa_file_start_file (1);
5615 static void
5616 pa_hpux64_hpas_file_start (void)
5618 pa_file_start_level ();
5619 pa_file_start_space (1);
5620 pa_file_start_mcount ("CODE");
5621 pa_file_start_file (0);
5623 #undef aputs
5625 /* Search the deferred plabel list for SYMBOL and return its internal
5626 label. If an entry for SYMBOL is not found, a new entry is created. */
5629 pa_get_deferred_plabel (rtx symbol)
5631 const char *fname = XSTR (symbol, 0);
5632 size_t i;
5634 /* See if we have already put this function on the list of deferred
5635 plabels. This list is generally small, so a liner search is not
5636 too ugly. If it proves too slow replace it with something faster. */
5637 for (i = 0; i < n_deferred_plabels; i++)
5638 if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
5639 break;
5641 /* If the deferred plabel list is empty, or this entry was not found
5642 on the list, create a new entry on the list. */
5643 if (deferred_plabels == NULL || i == n_deferred_plabels)
5645 tree id;
5647 if (deferred_plabels == 0)
5648 deferred_plabels = ggc_alloc<deferred_plabel> ();
5649 else
5650 deferred_plabels = GGC_RESIZEVEC (struct deferred_plabel,
5651 deferred_plabels,
5652 n_deferred_plabels + 1);
5654 i = n_deferred_plabels++;
5655 deferred_plabels[i].internal_label = gen_label_rtx ();
5656 deferred_plabels[i].symbol = symbol;
5658 /* Gross. We have just implicitly taken the address of this
5659 function. Mark it in the same manner as assemble_name. */
5660 id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5661 if (id)
5662 mark_referenced (id);
5665 return deferred_plabels[i].internal_label;
5668 static void
5669 output_deferred_plabels (void)
5671 size_t i;
5673 /* If we have some deferred plabels, then we need to switch into the
5674 data or readonly data section, and align it to a 4 byte boundary
5675 before outputting the deferred plabels. */
5676 if (n_deferred_plabels)
5678 switch_to_section (flag_pic ? data_section : readonly_data_section);
5679 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5682 /* Now output the deferred plabels. */
5683 for (i = 0; i < n_deferred_plabels; i++)
5685 targetm.asm_out.internal_label (asm_out_file, "L",
5686 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5687 assemble_integer (deferred_plabels[i].symbol,
5688 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5692 /* Initialize optabs to point to emulation routines. */
5694 static void
5695 pa_init_libfuncs (void)
5697 if (HPUX_LONG_DOUBLE_LIBRARY)
5699 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5700 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5701 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5702 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5703 set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5704 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5705 set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5706 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5707 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5709 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5710 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5711 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5712 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5713 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5714 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5715 set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5717 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5718 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5719 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5720 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5722 set_conv_libfunc (sfix_optab, SImode, TFmode,
5723 TARGET_64BIT ? "__U_Qfcnvfxt_quad_to_sgl"
5724 : "_U_Qfcnvfxt_quad_to_sgl");
5725 set_conv_libfunc (sfix_optab, DImode, TFmode,
5726 "_U_Qfcnvfxt_quad_to_dbl");
5727 set_conv_libfunc (ufix_optab, SImode, TFmode,
5728 "_U_Qfcnvfxt_quad_to_usgl");
5729 set_conv_libfunc (ufix_optab, DImode, TFmode,
5730 "_U_Qfcnvfxt_quad_to_udbl");
5732 set_conv_libfunc (sfloat_optab, TFmode, SImode,
5733 "_U_Qfcnvxf_sgl_to_quad");
5734 set_conv_libfunc (sfloat_optab, TFmode, DImode,
5735 "_U_Qfcnvxf_dbl_to_quad");
5736 set_conv_libfunc (ufloat_optab, TFmode, SImode,
5737 "_U_Qfcnvxf_usgl_to_quad");
5738 set_conv_libfunc (ufloat_optab, TFmode, DImode,
5739 "_U_Qfcnvxf_udbl_to_quad");
5742 if (TARGET_SYNC_LIBCALL)
5743 init_sync_libfuncs (UNITS_PER_WORD);
5746 /* HP's millicode routines mean something special to the assembler.
5747 Keep track of which ones we have used. */
5749 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5750 static void import_milli (enum millicodes);
5751 static char imported[(int) end1000];
5752 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5753 static const char import_string[] = ".IMPORT $$....,MILLICODE";
5754 #define MILLI_START 10
5756 static void
5757 import_milli (enum millicodes code)
5759 char str[sizeof (import_string)];
5761 if (!imported[(int) code])
5763 imported[(int) code] = 1;
5764 strcpy (str, import_string);
5765 strncpy (str + MILLI_START, milli_names[(int) code], 4);
5766 output_asm_insn (str, 0);
5770 /* The register constraints have put the operands and return value in
5771 the proper registers. */
5773 const char *
5774 pa_output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx_insn *insn)
5776 import_milli (mulI);
5777 return pa_output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5780 /* Emit the rtl for doing a division by a constant. */
5782 /* Do magic division millicodes exist for this value? */
5783 const int pa_magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5785 /* We'll use an array to keep track of the magic millicodes and
5786 whether or not we've used them already. [n][0] is signed, [n][1] is
5787 unsigned. */
5789 static int div_milli[16][2];
5792 pa_emit_hpdiv_const (rtx *operands, int unsignedp)
5794 if (GET_CODE (operands[2]) == CONST_INT
5795 && INTVAL (operands[2]) > 0
5796 && INTVAL (operands[2]) < 16
5797 && pa_magic_milli[INTVAL (operands[2])])
5799 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5801 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
5802 emit
5803 (gen_rtx_PARALLEL
5804 (VOIDmode,
5805 gen_rtvec (6, gen_rtx_SET (gen_rtx_REG (SImode, 29),
5806 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5807 SImode,
5808 gen_rtx_REG (SImode, 26),
5809 operands[2])),
5810 gen_rtx_CLOBBER (VOIDmode, operands[4]),
5811 gen_rtx_CLOBBER (VOIDmode, operands[3]),
5812 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5813 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
5814 gen_rtx_CLOBBER (VOIDmode, ret))));
5815 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
5816 return 1;
5818 return 0;
5821 const char *
5822 pa_output_div_insn (rtx *operands, int unsignedp, rtx_insn *insn)
5824 int divisor;
5826 /* If the divisor is a constant, try to use one of the special
5827 opcodes .*/
5828 if (GET_CODE (operands[0]) == CONST_INT)
5830 static char buf[100];
5831 divisor = INTVAL (operands[0]);
5832 if (!div_milli[divisor][unsignedp])
5834 div_milli[divisor][unsignedp] = 1;
5835 if (unsignedp)
5836 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
5837 else
5838 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
5840 if (unsignedp)
5842 sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
5843 INTVAL (operands[0]));
5844 return pa_output_millicode_call (insn,
5845 gen_rtx_SYMBOL_REF (SImode, buf));
5847 else
5849 sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
5850 INTVAL (operands[0]));
5851 return pa_output_millicode_call (insn,
5852 gen_rtx_SYMBOL_REF (SImode, buf));
5855 /* Divisor isn't a special constant. */
5856 else
5858 if (unsignedp)
5860 import_milli (divU);
5861 return pa_output_millicode_call (insn,
5862 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
5864 else
5866 import_milli (divI);
5867 return pa_output_millicode_call (insn,
5868 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
5873 /* Output a $$rem millicode to do mod. */
5875 const char *
5876 pa_output_mod_insn (int unsignedp, rtx_insn *insn)
5878 if (unsignedp)
5880 import_milli (remU);
5881 return pa_output_millicode_call (insn,
5882 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
5884 else
5886 import_milli (remI);
5887 return pa_output_millicode_call (insn,
5888 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
5892 void
5893 pa_output_arg_descriptor (rtx_insn *call_insn)
5895 const char *arg_regs[4];
5896 machine_mode arg_mode;
5897 rtx link;
5898 int i, output_flag = 0;
5899 int regno;
5901 /* We neither need nor want argument location descriptors for the
5902 64bit runtime environment or the ELF32 environment. */
5903 if (TARGET_64BIT || TARGET_ELF32)
5904 return;
5906 for (i = 0; i < 4; i++)
5907 arg_regs[i] = 0;
5909 /* Specify explicitly that no argument relocations should take place
5910 if using the portable runtime calling conventions. */
5911 if (TARGET_PORTABLE_RUNTIME)
5913 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5914 asm_out_file);
5915 return;
5918 gcc_assert (CALL_P (call_insn));
5919 for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
5920 link; link = XEXP (link, 1))
5922 rtx use = XEXP (link, 0);
5924 if (! (GET_CODE (use) == USE
5925 && GET_CODE (XEXP (use, 0)) == REG
5926 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5927 continue;
5929 arg_mode = GET_MODE (XEXP (use, 0));
5930 regno = REGNO (XEXP (use, 0));
5931 if (regno >= 23 && regno <= 26)
5933 arg_regs[26 - regno] = "GR";
5934 if (arg_mode == DImode)
5935 arg_regs[25 - regno] = "GR";
5937 else if (regno >= 32 && regno <= 39)
5939 if (arg_mode == SFmode)
5940 arg_regs[(regno - 32) / 2] = "FR";
5941 else
5943 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
5944 arg_regs[(regno - 34) / 2] = "FR";
5945 arg_regs[(regno - 34) / 2 + 1] = "FU";
5946 #else
5947 arg_regs[(regno - 34) / 2] = "FU";
5948 arg_regs[(regno - 34) / 2 + 1] = "FR";
5949 #endif
5953 fputs ("\t.CALL ", asm_out_file);
5954 for (i = 0; i < 4; i++)
5956 if (arg_regs[i])
5958 if (output_flag++)
5959 fputc (',', asm_out_file);
5960 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
5963 fputc ('\n', asm_out_file);
5966 /* Inform reload about cases where moving X with a mode MODE to or from
5967 a register in RCLASS requires an extra scratch or immediate register.
5968 Return the class needed for the immediate register. */
5970 static reg_class_t
5971 pa_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
5972 machine_mode mode, secondary_reload_info *sri)
5974 int regno;
5975 enum reg_class rclass = (enum reg_class) rclass_i;
5977 /* Handle the easy stuff first. */
5978 if (rclass == R1_REGS)
5979 return NO_REGS;
5981 if (REG_P (x))
5983 regno = REGNO (x);
5984 if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
5985 return NO_REGS;
5987 else
5988 regno = -1;
5990 /* If we have something like (mem (mem (...)), we can safely assume the
5991 inner MEM will end up in a general register after reloading, so there's
5992 no need for a secondary reload. */
5993 if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
5994 return NO_REGS;
5996 /* Trying to load a constant into a FP register during PIC code
5997 generation requires %r1 as a scratch register. For float modes,
5998 the only legitimate constant is CONST0_RTX. However, there are
5999 a few patterns that accept constant double operands. */
6000 if (flag_pic
6001 && FP_REG_CLASS_P (rclass)
6002 && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
6004 switch (mode)
6006 case SImode:
6007 sri->icode = CODE_FOR_reload_insi_r1;
6008 break;
6010 case DImode:
6011 sri->icode = CODE_FOR_reload_indi_r1;
6012 break;
6014 case SFmode:
6015 sri->icode = CODE_FOR_reload_insf_r1;
6016 break;
6018 case DFmode:
6019 sri->icode = CODE_FOR_reload_indf_r1;
6020 break;
6022 default:
6023 gcc_unreachable ();
6025 return NO_REGS;
6028 /* Secondary reloads of symbolic expressions require %r1 as a scratch
6029 register when we're generating PIC code or when the operand isn't
6030 readonly. */
6031 if (pa_symbolic_expression_p (x))
6033 if (GET_CODE (x) == HIGH)
6034 x = XEXP (x, 0);
6036 if (flag_pic || !read_only_operand (x, VOIDmode))
6038 switch (mode)
6040 case SImode:
6041 sri->icode = CODE_FOR_reload_insi_r1;
6042 break;
6044 case DImode:
6045 sri->icode = CODE_FOR_reload_indi_r1;
6046 break;
6048 default:
6049 gcc_unreachable ();
6051 return NO_REGS;
6055 /* Profiling showed the PA port spends about 1.3% of its compilation
6056 time in true_regnum from calls inside pa_secondary_reload_class. */
6057 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
6058 regno = true_regnum (x);
6060 /* Handle reloads for floating point loads and stores. */
6061 if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
6062 && FP_REG_CLASS_P (rclass))
6064 if (MEM_P (x))
6066 x = XEXP (x, 0);
6068 /* We don't need a secondary reload for indexed memory addresses.
6070 When INT14_OK_STRICT is true, it might appear that we could
6071 directly allow register indirect memory addresses. However,
6072 this doesn't work because we don't support SUBREGs in
6073 floating-point register copies and reload doesn't tell us
6074 when it's going to use a SUBREG. */
6075 if (IS_INDEX_ADDR_P (x))
6076 return NO_REGS;
6079 /* Request a secondary reload with a general scratch register
6080 for everything else. ??? Could symbolic operands be handled
6081 directly when generating non-pic PA 2.0 code? */
6082 sri->icode = (in_p
6083 ? direct_optab_handler (reload_in_optab, mode)
6084 : direct_optab_handler (reload_out_optab, mode));
6085 return NO_REGS;
6088 /* A SAR<->FP register copy requires an intermediate general register
6089 and secondary memory. We need a secondary reload with a general
6090 scratch register for spills. */
6091 if (rclass == SHIFT_REGS)
6093 /* Handle spill. */
6094 if (regno >= FIRST_PSEUDO_REGISTER || regno < 0)
6096 sri->icode = (in_p
6097 ? direct_optab_handler (reload_in_optab, mode)
6098 : direct_optab_handler (reload_out_optab, mode));
6099 return NO_REGS;
6102 /* Handle FP copy. */
6103 if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))
6104 return GENERAL_REGS;
6107 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
6108 && REGNO_REG_CLASS (regno) == SHIFT_REGS
6109 && FP_REG_CLASS_P (rclass))
6110 return GENERAL_REGS;
6112 return NO_REGS;
6115 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY. The argument pointer
6116 is only marked as live on entry by df-scan when it is a fixed
6117 register. It isn't a fixed register in the 64-bit runtime,
6118 so we need to mark it here. */
6120 static void
6121 pa_extra_live_on_entry (bitmap regs)
6123 if (TARGET_64BIT)
6124 bitmap_set_bit (regs, ARG_POINTER_REGNUM);
6127 /* Implement EH_RETURN_HANDLER_RTX. The MEM needs to be volatile
6128 to prevent it from being deleted. */
6131 pa_eh_return_handler_rtx (void)
6133 rtx tmp;
6135 tmp = gen_rtx_PLUS (word_mode, hard_frame_pointer_rtx,
6136 TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20));
6137 tmp = gen_rtx_MEM (word_mode, tmp);
6138 tmp->volatil = 1;
6139 return tmp;
6142 /* In the 32-bit runtime, arguments larger than eight bytes are passed
6143 by invisible reference. As a GCC extension, we also pass anything
6144 with a zero or variable size by reference.
6146 The 64-bit runtime does not describe passing any types by invisible
6147 reference. The internals of GCC can't currently handle passing
6148 empty structures, and zero or variable length arrays when they are
6149 not passed entirely on the stack or by reference. Thus, as a GCC
6150 extension, we pass these types by reference. The HP compiler doesn't
6151 support these types, so hopefully there shouldn't be any compatibility
6152 issues. This may have to be revisited when HP releases a C99 compiler
6153 or updates the ABI. */
6155 static bool
6156 pa_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
6157 machine_mode mode, const_tree type,
6158 bool named ATTRIBUTE_UNUSED)
6160 HOST_WIDE_INT size;
6162 if (type)
6163 size = int_size_in_bytes (type);
6164 else
6165 size = GET_MODE_SIZE (mode);
6167 if (TARGET_64BIT)
6168 return size <= 0;
6169 else
6170 return size <= 0 || size > 8;
6173 enum direction
6174 pa_function_arg_padding (machine_mode mode, const_tree type)
6176 if (mode == BLKmode
6177 || (TARGET_64BIT
6178 && type
6179 && (AGGREGATE_TYPE_P (type)
6180 || TREE_CODE (type) == COMPLEX_TYPE
6181 || TREE_CODE (type) == VECTOR_TYPE)))
6183 /* Return none if justification is not required. */
6184 if (type
6185 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
6186 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
6187 return none;
6189 /* The directions set here are ignored when a BLKmode argument larger
6190 than a word is placed in a register. Different code is used for
6191 the stack and registers. This makes it difficult to have a
6192 consistent data representation for both the stack and registers.
6193 For both runtimes, the justification and padding for arguments on
6194 the stack and in registers should be identical. */
6195 if (TARGET_64BIT)
6196 /* The 64-bit runtime specifies left justification for aggregates. */
6197 return upward;
6198 else
6199 /* The 32-bit runtime architecture specifies right justification.
6200 When the argument is passed on the stack, the argument is padded
6201 with garbage on the left. The HP compiler pads with zeros. */
6202 return downward;
6205 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
6206 return downward;
6207 else
6208 return none;
6212 /* Do what is necessary for `va_start'. We look at the current function
6213 to determine if stdargs or varargs is used and fill in an initial
6214 va_list. A pointer to this constructor is returned. */
6216 static rtx
6217 hppa_builtin_saveregs (void)
6219 rtx offset, dest;
6220 tree fntype = TREE_TYPE (current_function_decl);
6221 int argadj = ((!stdarg_p (fntype))
6222 ? UNITS_PER_WORD : 0);
6224 if (argadj)
6225 offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, argadj);
6226 else
6227 offset = crtl->args.arg_offset_rtx;
6229 if (TARGET_64BIT)
6231 int i, off;
6233 /* Adjust for varargs/stdarg differences. */
6234 if (argadj)
6235 offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, -argadj);
6236 else
6237 offset = crtl->args.arg_offset_rtx;
6239 /* We need to save %r26 .. %r19 inclusive starting at offset -64
6240 from the incoming arg pointer and growing to larger addresses. */
6241 for (i = 26, off = -64; i >= 19; i--, off += 8)
6242 emit_move_insn (gen_rtx_MEM (word_mode,
6243 plus_constant (Pmode,
6244 arg_pointer_rtx, off)),
6245 gen_rtx_REG (word_mode, i));
6247 /* The incoming args pointer points just beyond the flushback area;
6248 normally this is not a serious concern. However, when we are doing
6249 varargs/stdargs we want to make the arg pointer point to the start
6250 of the incoming argument area. */
6251 emit_move_insn (virtual_incoming_args_rtx,
6252 plus_constant (Pmode, arg_pointer_rtx, -64));
6254 /* Now return a pointer to the first anonymous argument. */
6255 return copy_to_reg (expand_binop (Pmode, add_optab,
6256 virtual_incoming_args_rtx,
6257 offset, 0, 0, OPTAB_LIB_WIDEN));
6260 /* Store general registers on the stack. */
6261 dest = gen_rtx_MEM (BLKmode,
6262 plus_constant (Pmode, crtl->args.internal_arg_pointer,
6263 -16));
6264 set_mem_alias_set (dest, get_varargs_alias_set ());
6265 set_mem_align (dest, BITS_PER_WORD);
6266 move_block_from_reg (23, dest, 4);
6268 /* move_block_from_reg will emit code to store the argument registers
6269 individually as scalar stores.
6271 However, other insns may later load from the same addresses for
6272 a structure load (passing a struct to a varargs routine).
6274 The alias code assumes that such aliasing can never happen, so we
6275 have to keep memory referencing insns from moving up beyond the
6276 last argument register store. So we emit a blockage insn here. */
6277 emit_insn (gen_blockage ());
6279 return copy_to_reg (expand_binop (Pmode, add_optab,
6280 crtl->args.internal_arg_pointer,
6281 offset, 0, 0, OPTAB_LIB_WIDEN));
6284 static void
6285 hppa_va_start (tree valist, rtx nextarg)
6287 nextarg = expand_builtin_saveregs ();
6288 std_expand_builtin_va_start (valist, nextarg);
6291 static tree
6292 hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6293 gimple_seq *post_p)
6295 if (TARGET_64BIT)
6297 /* Args grow upward. We can use the generic routines. */
6298 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6300 else /* !TARGET_64BIT */
6302 tree ptr = build_pointer_type (type);
6303 tree valist_type;
6304 tree t, u;
6305 unsigned int size, ofs;
6306 bool indirect;
6308 indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0);
6309 if (indirect)
6311 type = ptr;
6312 ptr = build_pointer_type (type);
6314 size = int_size_in_bytes (type);
6315 valist_type = TREE_TYPE (valist);
6317 /* Args grow down. Not handled by generic routines. */
6319 u = fold_convert (sizetype, size_in_bytes (type));
6320 u = fold_build1 (NEGATE_EXPR, sizetype, u);
6321 t = fold_build_pointer_plus (valist, u);
6323 /* Align to 4 or 8 byte boundary depending on argument size. */
6325 u = build_int_cst (TREE_TYPE (t), (HOST_WIDE_INT)(size > 4 ? -8 : -4));
6326 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u);
6327 t = fold_convert (valist_type, t);
6329 t = build2 (MODIFY_EXPR, valist_type, valist, t);
6331 ofs = (8 - size) % 4;
6332 if (ofs != 0)
6333 t = fold_build_pointer_plus_hwi (t, ofs);
6335 t = fold_convert (ptr, t);
6336 t = build_va_arg_indirect_ref (t);
6338 if (indirect)
6339 t = build_va_arg_indirect_ref (t);
6341 return t;
6345 /* True if MODE is valid for the target. By "valid", we mean able to
6346 be manipulated in non-trivial ways. In particular, this means all
6347 the arithmetic is supported.
6349 Currently, TImode is not valid as the HP 64-bit runtime documentation
6350 doesn't document the alignment and calling conventions for this type.
6351 Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
6352 2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE. */
6354 static bool
6355 pa_scalar_mode_supported_p (machine_mode mode)
6357 int precision = GET_MODE_PRECISION (mode);
6359 switch (GET_MODE_CLASS (mode))
6361 case MODE_PARTIAL_INT:
6362 case MODE_INT:
6363 if (precision == CHAR_TYPE_SIZE)
6364 return true;
6365 if (precision == SHORT_TYPE_SIZE)
6366 return true;
6367 if (precision == INT_TYPE_SIZE)
6368 return true;
6369 if (precision == LONG_TYPE_SIZE)
6370 return true;
6371 if (precision == LONG_LONG_TYPE_SIZE)
6372 return true;
6373 return false;
6375 case MODE_FLOAT:
6376 if (precision == FLOAT_TYPE_SIZE)
6377 return true;
6378 if (precision == DOUBLE_TYPE_SIZE)
6379 return true;
6380 if (precision == LONG_DOUBLE_TYPE_SIZE)
6381 return true;
6382 return false;
6384 case MODE_DECIMAL_FLOAT:
6385 return false;
6387 default:
6388 gcc_unreachable ();
6392 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
6393 it branches into the delay slot. Otherwise, return FALSE. */
6395 static bool
6396 branch_to_delay_slot_p (rtx_insn *insn)
6398 rtx_insn *jump_insn;
6400 if (dbr_sequence_length ())
6401 return FALSE;
6403 jump_insn = next_active_insn (JUMP_LABEL (insn));
6404 while (insn)
6406 insn = next_active_insn (insn);
6407 if (jump_insn == insn)
6408 return TRUE;
6410 /* We can't rely on the length of asms. So, we return FALSE when
6411 the branch is followed by an asm. */
6412 if (!insn
6413 || GET_CODE (PATTERN (insn)) == ASM_INPUT
6414 || extract_asm_operands (PATTERN (insn)) != NULL_RTX
6415 || get_attr_length (insn) > 0)
6416 break;
6419 return FALSE;
6422 /* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot.
6424 This occurs when INSN has an unfilled delay slot and is followed
6425 by an asm. Disaster can occur if the asm is empty and the jump
6426 branches into the delay slot. So, we add a nop in the delay slot
6427 when this occurs. */
6429 static bool
6430 branch_needs_nop_p (rtx_insn *insn)
6432 rtx_insn *jump_insn;
6434 if (dbr_sequence_length ())
6435 return FALSE;
6437 jump_insn = next_active_insn (JUMP_LABEL (insn));
6438 while (insn)
6440 insn = next_active_insn (insn);
6441 if (!insn || jump_insn == insn)
6442 return TRUE;
6444 if (!(GET_CODE (PATTERN (insn)) == ASM_INPUT
6445 || extract_asm_operands (PATTERN (insn)) != NULL_RTX)
6446 && get_attr_length (insn) > 0)
6447 break;
6450 return FALSE;
6453 /* Return TRUE if INSN, a forward jump insn, can use nullification
6454 to skip the following instruction. This avoids an extra cycle due
6455 to a mis-predicted branch when we fall through. */
6457 static bool
6458 use_skip_p (rtx_insn *insn)
6460 rtx_insn *jump_insn = next_active_insn (JUMP_LABEL (insn));
6462 while (insn)
6464 insn = next_active_insn (insn);
6466 /* We can't rely on the length of asms, so we can't skip asms. */
6467 if (!insn
6468 || GET_CODE (PATTERN (insn)) == ASM_INPUT
6469 || extract_asm_operands (PATTERN (insn)) != NULL_RTX)
6470 break;
6471 if (get_attr_length (insn) == 4
6472 && jump_insn == next_active_insn (insn))
6473 return TRUE;
6474 if (get_attr_length (insn) > 0)
6475 break;
6478 return FALSE;
6481 /* This routine handles all the normal conditional branch sequences we
6482 might need to generate. It handles compare immediate vs compare
6483 register, nullification of delay slots, varying length branches,
6484 negated branches, and all combinations of the above. It returns the
6485 output appropriate to emit the branch corresponding to all given
6486 parameters. */
6488 const char *
6489 pa_output_cbranch (rtx *operands, int negated, rtx_insn *insn)
6491 static char buf[100];
6492 bool useskip;
6493 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6494 int length = get_attr_length (insn);
6495 int xdelay;
6497 /* A conditional branch to the following instruction (e.g. the delay slot)
6498 is asking for a disaster. This can happen when not optimizing and
6499 when jump optimization fails.
6501 While it is usually safe to emit nothing, this can fail if the
6502 preceding instruction is a nullified branch with an empty delay
6503 slot and the same branch target as this branch. We could check
6504 for this but jump optimization should eliminate nop jumps. It
6505 is always safe to emit a nop. */
6506 if (branch_to_delay_slot_p (insn))
6507 return "nop";
6509 /* The doubleword form of the cmpib instruction doesn't have the LEU
6510 and GTU conditions while the cmpb instruction does. Since we accept
6511 zero for cmpb, we must ensure that we use cmpb for the comparison. */
6512 if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6513 operands[2] = gen_rtx_REG (DImode, 0);
6514 if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
6515 operands[1] = gen_rtx_REG (DImode, 0);
6517 /* If this is a long branch with its delay slot unfilled, set `nullify'
6518 as it can nullify the delay slot and save a nop. */
6519 if (length == 8 && dbr_sequence_length () == 0)
6520 nullify = 1;
6522 /* If this is a short forward conditional branch which did not get
6523 its delay slot filled, the delay slot can still be nullified. */
6524 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6525 nullify = forward_branch_p (insn);
6527 /* A forward branch over a single nullified insn can be done with a
6528 comclr instruction. This avoids a single cycle penalty due to
6529 mis-predicted branch if we fall through (branch not taken). */
6530 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6532 switch (length)
6534 /* All short conditional branches except backwards with an unfilled
6535 delay slot. */
6536 case 4:
6537 if (useskip)
6538 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6539 else
6540 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6541 if (GET_MODE (operands[1]) == DImode)
6542 strcat (buf, "*");
6543 if (negated)
6544 strcat (buf, "%B3");
6545 else
6546 strcat (buf, "%S3");
6547 if (useskip)
6548 strcat (buf, " %2,%r1,%%r0");
6549 else if (nullify)
6551 if (branch_needs_nop_p (insn))
6552 strcat (buf, ",n %2,%r1,%0%#");
6553 else
6554 strcat (buf, ",n %2,%r1,%0");
6556 else
6557 strcat (buf, " %2,%r1,%0");
6558 break;
6560 /* All long conditionals. Note a short backward branch with an
6561 unfilled delay slot is treated just like a long backward branch
6562 with an unfilled delay slot. */
6563 case 8:
6564 /* Handle weird backwards branch with a filled delay slot
6565 which is nullified. */
6566 if (dbr_sequence_length () != 0
6567 && ! forward_branch_p (insn)
6568 && nullify)
6570 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6571 if (GET_MODE (operands[1]) == DImode)
6572 strcat (buf, "*");
6573 if (negated)
6574 strcat (buf, "%S3");
6575 else
6576 strcat (buf, "%B3");
6577 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6579 /* Handle short backwards branch with an unfilled delay slot.
6580 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6581 taken and untaken branches. */
6582 else if (dbr_sequence_length () == 0
6583 && ! forward_branch_p (insn)
6584 && INSN_ADDRESSES_SET_P ()
6585 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6586 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6588 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6589 if (GET_MODE (operands[1]) == DImode)
6590 strcat (buf, "*");
6591 if (negated)
6592 strcat (buf, "%B3 %2,%r1,%0%#");
6593 else
6594 strcat (buf, "%S3 %2,%r1,%0%#");
6596 else
6598 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6599 if (GET_MODE (operands[1]) == DImode)
6600 strcat (buf, "*");
6601 if (negated)
6602 strcat (buf, "%S3");
6603 else
6604 strcat (buf, "%B3");
6605 if (nullify)
6606 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6607 else
6608 strcat (buf, " %2,%r1,%%r0\n\tb %0");
6610 break;
6612 default:
6613 /* The reversed conditional branch must branch over one additional
6614 instruction if the delay slot is filled and needs to be extracted
6615 by pa_output_lbranch. If the delay slot is empty or this is a
6616 nullified forward branch, the instruction after the reversed
6617 condition branch must be nullified. */
6618 if (dbr_sequence_length () == 0
6619 || (nullify && forward_branch_p (insn)))
6621 nullify = 1;
6622 xdelay = 0;
6623 operands[4] = GEN_INT (length);
6625 else
6627 xdelay = 1;
6628 operands[4] = GEN_INT (length + 4);
6631 /* Create a reversed conditional branch which branches around
6632 the following insns. */
6633 if (GET_MODE (operands[1]) != DImode)
6635 if (nullify)
6637 if (negated)
6638 strcpy (buf,
6639 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6640 else
6641 strcpy (buf,
6642 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6644 else
6646 if (negated)
6647 strcpy (buf,
6648 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6649 else
6650 strcpy (buf,
6651 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6654 else
6656 if (nullify)
6658 if (negated)
6659 strcpy (buf,
6660 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6661 else
6662 strcpy (buf,
6663 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6665 else
6667 if (negated)
6668 strcpy (buf,
6669 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6670 else
6671 strcpy (buf,
6672 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6676 output_asm_insn (buf, operands);
6677 return pa_output_lbranch (operands[0], insn, xdelay);
6679 return buf;
6682 /* This routine handles output of long unconditional branches that
6683 exceed the maximum range of a simple branch instruction. Since
6684 we don't have a register available for the branch, we save register
6685 %r1 in the frame marker, load the branch destination DEST into %r1,
6686 execute the branch, and restore %r1 in the delay slot of the branch.
6688 Since long branches may have an insn in the delay slot and the
6689 delay slot is used to restore %r1, we in general need to extract
6690 this insn and execute it before the branch. However, to facilitate
6691 use of this function by conditional branches, we also provide an
6692 option to not extract the delay insn so that it will be emitted
6693 after the long branch. So, if there is an insn in the delay slot,
6694 it is extracted if XDELAY is nonzero.
6696 The lengths of the various long-branch sequences are 20, 16 and 24
6697 bytes for the portable runtime, non-PIC and PIC cases, respectively. */
6699 const char *
6700 pa_output_lbranch (rtx dest, rtx_insn *insn, int xdelay)
6702 rtx xoperands[2];
6704 xoperands[0] = dest;
6706 /* First, free up the delay slot. */
6707 if (xdelay && dbr_sequence_length () != 0)
6709 /* We can't handle a jump in the delay slot. */
6710 gcc_assert (! JUMP_P (NEXT_INSN (insn)));
6712 final_scan_insn (NEXT_INSN (insn), asm_out_file,
6713 optimize, 0, NULL);
6715 /* Now delete the delay insn. */
6716 SET_INSN_DELETED (NEXT_INSN (insn));
6719 /* Output an insn to save %r1. The runtime documentation doesn't
6720 specify whether the "Clean Up" slot in the callers frame can
6721 be clobbered by the callee. It isn't copied by HP's builtin
6722 alloca, so this suggests that it can be clobbered if necessary.
6723 The "Static Link" location is copied by HP builtin alloca, so
6724 we avoid using it. Using the cleanup slot might be a problem
6725 if we have to interoperate with languages that pass cleanup
6726 information. However, it should be possible to handle these
6727 situations with GCC's asm feature.
6729 The "Current RP" slot is reserved for the called procedure, so
6730 we try to use it when we don't have a frame of our own. It's
6731 rather unlikely that we won't have a frame when we need to emit
6732 a very long branch.
6734 Really the way to go long term is a register scavenger; goto
6735 the target of the jump and find a register which we can use
6736 as a scratch to hold the value in %r1. Then, we wouldn't have
6737 to free up the delay slot or clobber a slot that may be needed
6738 for other purposes. */
6739 if (TARGET_64BIT)
6741 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6742 /* Use the return pointer slot in the frame marker. */
6743 output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
6744 else
6745 /* Use the slot at -40 in the frame marker since HP builtin
6746 alloca doesn't copy it. */
6747 output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
6749 else
6751 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6752 /* Use the return pointer slot in the frame marker. */
6753 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
6754 else
6755 /* Use the "Clean Up" slot in the frame marker. In GCC,
6756 the only other use of this location is for copying a
6757 floating point double argument from a floating-point
6758 register to two general registers. The copy is done
6759 as an "atomic" operation when outputting a call, so it
6760 won't interfere with our using the location here. */
6761 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
6764 if (TARGET_PORTABLE_RUNTIME)
6766 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6767 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6768 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6770 else if (flag_pic)
6772 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6773 if (TARGET_SOM || !TARGET_GAS)
6775 xoperands[1] = gen_label_rtx ();
6776 output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands);
6777 targetm.asm_out.internal_label (asm_out_file, "L",
6778 CODE_LABEL_NUMBER (xoperands[1]));
6779 output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands);
6781 else
6783 output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands);
6784 output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
6786 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6788 else
6789 /* Now output a very long branch to the original target. */
6790 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
6792 /* Now restore the value of %r1 in the delay slot. */
6793 if (TARGET_64BIT)
6795 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6796 return "ldd -16(%%r30),%%r1";
6797 else
6798 return "ldd -40(%%r30),%%r1";
6800 else
6802 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6803 return "ldw -20(%%r30),%%r1";
6804 else
6805 return "ldw -12(%%r30),%%r1";
6809 /* This routine handles all the branch-on-bit conditional branch sequences we
6810 might need to generate. It handles nullification of delay slots,
6811 varying length branches, negated branches and all combinations of the
6812 above. it returns the appropriate output template to emit the branch. */
6814 const char *
6815 pa_output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn, int which)
6817 static char buf[100];
6818 bool useskip;
6819 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6820 int length = get_attr_length (insn);
6821 int xdelay;
6823 /* A conditional branch to the following instruction (e.g. the delay slot) is
6824 asking for a disaster. I do not think this can happen as this pattern
6825 is only used when optimizing; jump optimization should eliminate the
6826 jump. But be prepared just in case. */
6828 if (branch_to_delay_slot_p (insn))
6829 return "nop";
6831 /* If this is a long branch with its delay slot unfilled, set `nullify'
6832 as it can nullify the delay slot and save a nop. */
6833 if (length == 8 && dbr_sequence_length () == 0)
6834 nullify = 1;
6836 /* If this is a short forward conditional branch which did not get
6837 its delay slot filled, the delay slot can still be nullified. */
6838 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6839 nullify = forward_branch_p (insn);
6841 /* A forward branch over a single nullified insn can be done with a
6842 extrs instruction. This avoids a single cycle penalty due to
6843 mis-predicted branch if we fall through (branch not taken). */
6844 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6846 switch (length)
6849 /* All short conditional branches except backwards with an unfilled
6850 delay slot. */
6851 case 4:
6852 if (useskip)
6853 strcpy (buf, "{extrs,|extrw,s,}");
6854 else
6855 strcpy (buf, "bb,");
6856 if (useskip && GET_MODE (operands[0]) == DImode)
6857 strcpy (buf, "extrd,s,*");
6858 else if (GET_MODE (operands[0]) == DImode)
6859 strcpy (buf, "bb,*");
6860 if ((which == 0 && negated)
6861 || (which == 1 && ! negated))
6862 strcat (buf, ">=");
6863 else
6864 strcat (buf, "<");
6865 if (useskip)
6866 strcat (buf, " %0,%1,1,%%r0");
6867 else if (nullify && negated)
6869 if (branch_needs_nop_p (insn))
6870 strcat (buf, ",n %0,%1,%3%#");
6871 else
6872 strcat (buf, ",n %0,%1,%3");
6874 else if (nullify && ! negated)
6876 if (branch_needs_nop_p (insn))
6877 strcat (buf, ",n %0,%1,%2%#");
6878 else
6879 strcat (buf, ",n %0,%1,%2");
6881 else if (! nullify && negated)
6882 strcat (buf, " %0,%1,%3");
6883 else if (! nullify && ! negated)
6884 strcat (buf, " %0,%1,%2");
6885 break;
6887 /* All long conditionals. Note a short backward branch with an
6888 unfilled delay slot is treated just like a long backward branch
6889 with an unfilled delay slot. */
6890 case 8:
6891 /* Handle weird backwards branch with a filled delay slot
6892 which is nullified. */
6893 if (dbr_sequence_length () != 0
6894 && ! forward_branch_p (insn)
6895 && nullify)
6897 strcpy (buf, "bb,");
6898 if (GET_MODE (operands[0]) == DImode)
6899 strcat (buf, "*");
6900 if ((which == 0 && negated)
6901 || (which == 1 && ! negated))
6902 strcat (buf, "<");
6903 else
6904 strcat (buf, ">=");
6905 if (negated)
6906 strcat (buf, ",n %0,%1,.+12\n\tb %3");
6907 else
6908 strcat (buf, ",n %0,%1,.+12\n\tb %2");
6910 /* Handle short backwards branch with an unfilled delay slot.
6911 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6912 taken and untaken branches. */
6913 else if (dbr_sequence_length () == 0
6914 && ! forward_branch_p (insn)
6915 && INSN_ADDRESSES_SET_P ()
6916 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6917 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6919 strcpy (buf, "bb,");
6920 if (GET_MODE (operands[0]) == DImode)
6921 strcat (buf, "*");
6922 if ((which == 0 && negated)
6923 || (which == 1 && ! negated))
6924 strcat (buf, ">=");
6925 else
6926 strcat (buf, "<");
6927 if (negated)
6928 strcat (buf, " %0,%1,%3%#");
6929 else
6930 strcat (buf, " %0,%1,%2%#");
6932 else
6934 if (GET_MODE (operands[0]) == DImode)
6935 strcpy (buf, "extrd,s,*");
6936 else
6937 strcpy (buf, "{extrs,|extrw,s,}");
6938 if ((which == 0 && negated)
6939 || (which == 1 && ! negated))
6940 strcat (buf, "<");
6941 else
6942 strcat (buf, ">=");
6943 if (nullify && negated)
6944 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
6945 else if (nullify && ! negated)
6946 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
6947 else if (negated)
6948 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
6949 else
6950 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
6952 break;
6954 default:
6955 /* The reversed conditional branch must branch over one additional
6956 instruction if the delay slot is filled and needs to be extracted
6957 by pa_output_lbranch. If the delay slot is empty or this is a
6958 nullified forward branch, the instruction after the reversed
6959 condition branch must be nullified. */
6960 if (dbr_sequence_length () == 0
6961 || (nullify && forward_branch_p (insn)))
6963 nullify = 1;
6964 xdelay = 0;
6965 operands[4] = GEN_INT (length);
6967 else
6969 xdelay = 1;
6970 operands[4] = GEN_INT (length + 4);
6973 if (GET_MODE (operands[0]) == DImode)
6974 strcpy (buf, "bb,*");
6975 else
6976 strcpy (buf, "bb,");
6977 if ((which == 0 && negated)
6978 || (which == 1 && !negated))
6979 strcat (buf, "<");
6980 else
6981 strcat (buf, ">=");
6982 if (nullify)
6983 strcat (buf, ",n %0,%1,.+%4");
6984 else
6985 strcat (buf, " %0,%1,.+%4");
6986 output_asm_insn (buf, operands);
6987 return pa_output_lbranch (negated ? operands[3] : operands[2],
6988 insn, xdelay);
6990 return buf;
6993 /* This routine handles all the branch-on-variable-bit conditional branch
6994 sequences we might need to generate. It handles nullification of delay
6995 slots, varying length branches, negated branches and all combinations
6996 of the above. it returns the appropriate output template to emit the
6997 branch. */
6999 const char *
7000 pa_output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn,
7001 int which)
7003 static char buf[100];
7004 bool useskip;
7005 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7006 int length = get_attr_length (insn);
7007 int xdelay;
7009 /* A conditional branch to the following instruction (e.g. the delay slot) is
7010 asking for a disaster. I do not think this can happen as this pattern
7011 is only used when optimizing; jump optimization should eliminate the
7012 jump. But be prepared just in case. */
7014 if (branch_to_delay_slot_p (insn))
7015 return "nop";
7017 /* If this is a long branch with its delay slot unfilled, set `nullify'
7018 as it can nullify the delay slot and save a nop. */
7019 if (length == 8 && dbr_sequence_length () == 0)
7020 nullify = 1;
7022 /* If this is a short forward conditional branch which did not get
7023 its delay slot filled, the delay slot can still be nullified. */
7024 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7025 nullify = forward_branch_p (insn);
7027 /* A forward branch over a single nullified insn can be done with a
7028 extrs instruction. This avoids a single cycle penalty due to
7029 mis-predicted branch if we fall through (branch not taken). */
7030 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
7032 switch (length)
7035 /* All short conditional branches except backwards with an unfilled
7036 delay slot. */
7037 case 4:
7038 if (useskip)
7039 strcpy (buf, "{vextrs,|extrw,s,}");
7040 else
7041 strcpy (buf, "{bvb,|bb,}");
7042 if (useskip && GET_MODE (operands[0]) == DImode)
7043 strcpy (buf, "extrd,s,*");
7044 else if (GET_MODE (operands[0]) == DImode)
7045 strcpy (buf, "bb,*");
7046 if ((which == 0 && negated)
7047 || (which == 1 && ! negated))
7048 strcat (buf, ">=");
7049 else
7050 strcat (buf, "<");
7051 if (useskip)
7052 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
7053 else if (nullify && negated)
7055 if (branch_needs_nop_p (insn))
7056 strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
7057 else
7058 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
7060 else if (nullify && ! negated)
7062 if (branch_needs_nop_p (insn))
7063 strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
7064 else
7065 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
7067 else if (! nullify && negated)
7068 strcat (buf, "{ %0,%3| %0,%%sar,%3}");
7069 else if (! nullify && ! negated)
7070 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
7071 break;
7073 /* All long conditionals. Note a short backward branch with an
7074 unfilled delay slot is treated just like a long backward branch
7075 with an unfilled delay slot. */
7076 case 8:
7077 /* Handle weird backwards branch with a filled delay slot
7078 which is nullified. */
7079 if (dbr_sequence_length () != 0
7080 && ! forward_branch_p (insn)
7081 && nullify)
7083 strcpy (buf, "{bvb,|bb,}");
7084 if (GET_MODE (operands[0]) == DImode)
7085 strcat (buf, "*");
7086 if ((which == 0 && negated)
7087 || (which == 1 && ! negated))
7088 strcat (buf, "<");
7089 else
7090 strcat (buf, ">=");
7091 if (negated)
7092 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
7093 else
7094 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
7096 /* Handle short backwards branch with an unfilled delay slot.
7097 Using a bb;nop rather than extrs;bl saves 1 cycle for both
7098 taken and untaken branches. */
7099 else if (dbr_sequence_length () == 0
7100 && ! forward_branch_p (insn)
7101 && INSN_ADDRESSES_SET_P ()
7102 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7103 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7105 strcpy (buf, "{bvb,|bb,}");
7106 if (GET_MODE (operands[0]) == DImode)
7107 strcat (buf, "*");
7108 if ((which == 0 && negated)
7109 || (which == 1 && ! negated))
7110 strcat (buf, ">=");
7111 else
7112 strcat (buf, "<");
7113 if (negated)
7114 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
7115 else
7116 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
7118 else
7120 strcpy (buf, "{vextrs,|extrw,s,}");
7121 if (GET_MODE (operands[0]) == DImode)
7122 strcpy (buf, "extrd,s,*");
7123 if ((which == 0 && negated)
7124 || (which == 1 && ! negated))
7125 strcat (buf, "<");
7126 else
7127 strcat (buf, ">=");
7128 if (nullify && negated)
7129 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
7130 else if (nullify && ! negated)
7131 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
7132 else if (negated)
7133 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
7134 else
7135 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
7137 break;
7139 default:
7140 /* The reversed conditional branch must branch over one additional
7141 instruction if the delay slot is filled and needs to be extracted
7142 by pa_output_lbranch. If the delay slot is empty or this is a
7143 nullified forward branch, the instruction after the reversed
7144 condition branch must be nullified. */
7145 if (dbr_sequence_length () == 0
7146 || (nullify && forward_branch_p (insn)))
7148 nullify = 1;
7149 xdelay = 0;
7150 operands[4] = GEN_INT (length);
7152 else
7154 xdelay = 1;
7155 operands[4] = GEN_INT (length + 4);
7158 if (GET_MODE (operands[0]) == DImode)
7159 strcpy (buf, "bb,*");
7160 else
7161 strcpy (buf, "{bvb,|bb,}");
7162 if ((which == 0 && negated)
7163 || (which == 1 && !negated))
7164 strcat (buf, "<");
7165 else
7166 strcat (buf, ">=");
7167 if (nullify)
7168 strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
7169 else
7170 strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
7171 output_asm_insn (buf, operands);
7172 return pa_output_lbranch (negated ? operands[3] : operands[2],
7173 insn, xdelay);
7175 return buf;
7178 /* Return the output template for emitting a dbra type insn.
7180 Note it may perform some output operations on its own before
7181 returning the final output string. */
7182 const char *
7183 pa_output_dbra (rtx *operands, rtx_insn *insn, int which_alternative)
7185 int length = get_attr_length (insn);
7187 /* A conditional branch to the following instruction (e.g. the delay slot) is
7188 asking for a disaster. Be prepared! */
7190 if (branch_to_delay_slot_p (insn))
7192 if (which_alternative == 0)
7193 return "ldo %1(%0),%0";
7194 else if (which_alternative == 1)
7196 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
7197 output_asm_insn ("ldw -16(%%r30),%4", operands);
7198 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7199 return "{fldws|fldw} -16(%%r30),%0";
7201 else
7203 output_asm_insn ("ldw %0,%4", operands);
7204 return "ldo %1(%4),%4\n\tstw %4,%0";
7208 if (which_alternative == 0)
7210 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7211 int xdelay;
7213 /* If this is a long branch with its delay slot unfilled, set `nullify'
7214 as it can nullify the delay slot and save a nop. */
7215 if (length == 8 && dbr_sequence_length () == 0)
7216 nullify = 1;
7218 /* If this is a short forward conditional branch which did not get
7219 its delay slot filled, the delay slot can still be nullified. */
7220 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7221 nullify = forward_branch_p (insn);
7223 switch (length)
7225 case 4:
7226 if (nullify)
7228 if (branch_needs_nop_p (insn))
7229 return "addib,%C2,n %1,%0,%3%#";
7230 else
7231 return "addib,%C2,n %1,%0,%3";
7233 else
7234 return "addib,%C2 %1,%0,%3";
7236 case 8:
7237 /* Handle weird backwards branch with a fulled delay slot
7238 which is nullified. */
7239 if (dbr_sequence_length () != 0
7240 && ! forward_branch_p (insn)
7241 && nullify)
7242 return "addib,%N2,n %1,%0,.+12\n\tb %3";
7243 /* Handle short backwards branch with an unfilled delay slot.
7244 Using a addb;nop rather than addi;bl saves 1 cycle for both
7245 taken and untaken branches. */
7246 else if (dbr_sequence_length () == 0
7247 && ! forward_branch_p (insn)
7248 && INSN_ADDRESSES_SET_P ()
7249 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7250 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7251 return "addib,%C2 %1,%0,%3%#";
7253 /* Handle normal cases. */
7254 if (nullify)
7255 return "addi,%N2 %1,%0,%0\n\tb,n %3";
7256 else
7257 return "addi,%N2 %1,%0,%0\n\tb %3";
7259 default:
7260 /* The reversed conditional branch must branch over one additional
7261 instruction if the delay slot is filled and needs to be extracted
7262 by pa_output_lbranch. If the delay slot is empty or this is a
7263 nullified forward branch, the instruction after the reversed
7264 condition branch must be nullified. */
7265 if (dbr_sequence_length () == 0
7266 || (nullify && forward_branch_p (insn)))
7268 nullify = 1;
7269 xdelay = 0;
7270 operands[4] = GEN_INT (length);
7272 else
7274 xdelay = 1;
7275 operands[4] = GEN_INT (length + 4);
7278 if (nullify)
7279 output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
7280 else
7281 output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
7283 return pa_output_lbranch (operands[3], insn, xdelay);
7287 /* Deal with gross reload from FP register case. */
7288 else if (which_alternative == 1)
7290 /* Move loop counter from FP register to MEM then into a GR,
7291 increment the GR, store the GR into MEM, and finally reload
7292 the FP register from MEM from within the branch's delay slot. */
7293 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
7294 operands);
7295 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7296 if (length == 24)
7297 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
7298 else if (length == 28)
7299 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7300 else
7302 operands[5] = GEN_INT (length - 16);
7303 output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
7304 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7305 return pa_output_lbranch (operands[3], insn, 0);
7308 /* Deal with gross reload from memory case. */
7309 else
7311 /* Reload loop counter from memory, the store back to memory
7312 happens in the branch's delay slot. */
7313 output_asm_insn ("ldw %0,%4", operands);
7314 if (length == 12)
7315 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
7316 else if (length == 16)
7317 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
7318 else
7320 operands[5] = GEN_INT (length - 4);
7321 output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
7322 return pa_output_lbranch (operands[3], insn, 0);
7327 /* Return the output template for emitting a movb type insn.
7329 Note it may perform some output operations on its own before
7330 returning the final output string. */
7331 const char *
7332 pa_output_movb (rtx *operands, rtx_insn *insn, int which_alternative,
7333 int reverse_comparison)
7335 int length = get_attr_length (insn);
7337 /* A conditional branch to the following instruction (e.g. the delay slot) is
7338 asking for a disaster. Be prepared! */
7340 if (branch_to_delay_slot_p (insn))
7342 if (which_alternative == 0)
7343 return "copy %1,%0";
7344 else if (which_alternative == 1)
7346 output_asm_insn ("stw %1,-16(%%r30)", operands);
7347 return "{fldws|fldw} -16(%%r30),%0";
7349 else if (which_alternative == 2)
7350 return "stw %1,%0";
7351 else
7352 return "mtsar %r1";
7355 /* Support the second variant. */
7356 if (reverse_comparison)
7357 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
7359 if (which_alternative == 0)
7361 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7362 int xdelay;
7364 /* If this is a long branch with its delay slot unfilled, set `nullify'
7365 as it can nullify the delay slot and save a nop. */
7366 if (length == 8 && dbr_sequence_length () == 0)
7367 nullify = 1;
7369 /* If this is a short forward conditional branch which did not get
7370 its delay slot filled, the delay slot can still be nullified. */
7371 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7372 nullify = forward_branch_p (insn);
7374 switch (length)
7376 case 4:
7377 if (nullify)
7379 if (branch_needs_nop_p (insn))
7380 return "movb,%C2,n %1,%0,%3%#";
7381 else
7382 return "movb,%C2,n %1,%0,%3";
7384 else
7385 return "movb,%C2 %1,%0,%3";
7387 case 8:
7388 /* Handle weird backwards branch with a filled delay slot
7389 which is nullified. */
7390 if (dbr_sequence_length () != 0
7391 && ! forward_branch_p (insn)
7392 && nullify)
7393 return "movb,%N2,n %1,%0,.+12\n\tb %3";
7395 /* Handle short backwards branch with an unfilled delay slot.
7396 Using a movb;nop rather than or;bl saves 1 cycle for both
7397 taken and untaken branches. */
7398 else if (dbr_sequence_length () == 0
7399 && ! forward_branch_p (insn)
7400 && INSN_ADDRESSES_SET_P ()
7401 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7402 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7403 return "movb,%C2 %1,%0,%3%#";
7404 /* Handle normal cases. */
7405 if (nullify)
7406 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
7407 else
7408 return "or,%N2 %1,%%r0,%0\n\tb %3";
7410 default:
7411 /* The reversed conditional branch must branch over one additional
7412 instruction if the delay slot is filled and needs to be extracted
7413 by pa_output_lbranch. If the delay slot is empty or this is a
7414 nullified forward branch, the instruction after the reversed
7415 condition branch must be nullified. */
7416 if (dbr_sequence_length () == 0
7417 || (nullify && forward_branch_p (insn)))
7419 nullify = 1;
7420 xdelay = 0;
7421 operands[4] = GEN_INT (length);
7423 else
7425 xdelay = 1;
7426 operands[4] = GEN_INT (length + 4);
7429 if (nullify)
7430 output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
7431 else
7432 output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
7434 return pa_output_lbranch (operands[3], insn, xdelay);
7437 /* Deal with gross reload for FP destination register case. */
7438 else if (which_alternative == 1)
7440 /* Move source register to MEM, perform the branch test, then
7441 finally load the FP register from MEM from within the branch's
7442 delay slot. */
7443 output_asm_insn ("stw %1,-16(%%r30)", operands);
7444 if (length == 12)
7445 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7446 else if (length == 16)
7447 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7448 else
7450 operands[4] = GEN_INT (length - 4);
7451 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
7452 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7453 return pa_output_lbranch (operands[3], insn, 0);
7456 /* Deal with gross reload from memory case. */
7457 else if (which_alternative == 2)
7459 /* Reload loop counter from memory, the store back to memory
7460 happens in the branch's delay slot. */
7461 if (length == 8)
7462 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7463 else if (length == 12)
7464 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7465 else
7467 operands[4] = GEN_INT (length);
7468 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7469 operands);
7470 return pa_output_lbranch (operands[3], insn, 0);
7473 /* Handle SAR as a destination. */
7474 else
7476 if (length == 8)
7477 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7478 else if (length == 12)
7479 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7480 else
7482 operands[4] = GEN_INT (length);
7483 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7484 operands);
7485 return pa_output_lbranch (operands[3], insn, 0);
7490 /* Copy any FP arguments in INSN into integer registers. */
7491 static void
7492 copy_fp_args (rtx_insn *insn)
7494 rtx link;
7495 rtx xoperands[2];
7497 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7499 int arg_mode, regno;
7500 rtx use = XEXP (link, 0);
7502 if (! (GET_CODE (use) == USE
7503 && GET_CODE (XEXP (use, 0)) == REG
7504 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7505 continue;
7507 arg_mode = GET_MODE (XEXP (use, 0));
7508 regno = REGNO (XEXP (use, 0));
7510 /* Is it a floating point register? */
7511 if (regno >= 32 && regno <= 39)
7513 /* Copy the FP register into an integer register via memory. */
7514 if (arg_mode == SFmode)
7516 xoperands[0] = XEXP (use, 0);
7517 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
7518 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
7519 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7521 else
7523 xoperands[0] = XEXP (use, 0);
7524 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
7525 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
7526 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
7527 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7533 /* Compute length of the FP argument copy sequence for INSN. */
7534 static int
7535 length_fp_args (rtx_insn *insn)
7537 int length = 0;
7538 rtx link;
7540 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7542 int arg_mode, regno;
7543 rtx use = XEXP (link, 0);
7545 if (! (GET_CODE (use) == USE
7546 && GET_CODE (XEXP (use, 0)) == REG
7547 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7548 continue;
7550 arg_mode = GET_MODE (XEXP (use, 0));
7551 regno = REGNO (XEXP (use, 0));
7553 /* Is it a floating point register? */
7554 if (regno >= 32 && regno <= 39)
7556 if (arg_mode == SFmode)
7557 length += 8;
7558 else
7559 length += 12;
7563 return length;
7566 /* Return the attribute length for the millicode call instruction INSN.
7567 The length must match the code generated by pa_output_millicode_call.
7568 We include the delay slot in the returned length as it is better to
7569 over estimate the length than to under estimate it. */
7572 pa_attr_length_millicode_call (rtx_insn *insn)
7574 unsigned long distance = -1;
7575 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7577 if (INSN_ADDRESSES_SET_P ())
7579 distance = (total + insn_current_reference_address (insn));
7580 if (distance < total)
7581 distance = -1;
7584 if (TARGET_64BIT)
7586 if (!TARGET_LONG_CALLS && distance < 7600000)
7587 return 8;
7589 return 20;
7591 else if (TARGET_PORTABLE_RUNTIME)
7592 return 24;
7593 else
7595 if (!TARGET_LONG_CALLS && distance < MAX_PCREL17F_OFFSET)
7596 return 8;
7598 if (!flag_pic)
7599 return 12;
7601 return 24;
7605 /* INSN is a function call.
7607 CALL_DEST is the routine we are calling. */
7609 const char *
7610 pa_output_millicode_call (rtx_insn *insn, rtx call_dest)
7612 int attr_length = get_attr_length (insn);
7613 int seq_length = dbr_sequence_length ();
7614 rtx xoperands[3];
7616 xoperands[0] = call_dest;
7617 xoperands[2] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
7619 /* Handle the common case where we are sure that the branch will
7620 reach the beginning of the $CODE$ subspace. The within reach
7621 form of the $$sh_func_adrs call has a length of 28. Because it
7622 has an attribute type of sh_func_adrs, it never has a nonzero
7623 sequence length (i.e., the delay slot is never filled). */
7624 if (!TARGET_LONG_CALLS
7625 && (attr_length == 8
7626 || (attr_length == 28
7627 && get_attr_type (insn) == TYPE_SH_FUNC_ADRS)))
7629 output_asm_insn ("{bl|b,l} %0,%2", xoperands);
7631 else
7633 if (TARGET_64BIT)
7635 /* It might seem that one insn could be saved by accessing
7636 the millicode function using the linkage table. However,
7637 this doesn't work in shared libraries and other dynamically
7638 loaded objects. Using a pc-relative sequence also avoids
7639 problems related to the implicit use of the gp register. */
7640 output_asm_insn ("b,l .+8,%%r1", xoperands);
7642 if (TARGET_GAS)
7644 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
7645 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
7647 else
7649 xoperands[1] = gen_label_rtx ();
7650 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7651 targetm.asm_out.internal_label (asm_out_file, "L",
7652 CODE_LABEL_NUMBER (xoperands[1]));
7653 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7656 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7658 else if (TARGET_PORTABLE_RUNTIME)
7660 /* Pure portable runtime doesn't allow be/ble; we also don't
7661 have PIC support in the assembler/linker, so this sequence
7662 is needed. */
7664 /* Get the address of our target into %r1. */
7665 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7666 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7668 /* Get our return address into %r31. */
7669 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7670 output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
7672 /* Jump to our target address in %r1. */
7673 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7675 else if (!flag_pic)
7677 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7678 if (TARGET_PA_20)
7679 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
7680 else
7681 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7683 else
7685 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7686 output_asm_insn ("addi 16,%%r1,%%r31", xoperands);
7688 if (TARGET_SOM || !TARGET_GAS)
7690 /* The HP assembler can generate relocations for the
7691 difference of two symbols. GAS can do this for a
7692 millicode symbol but not an arbitrary external
7693 symbol when generating SOM output. */
7694 xoperands[1] = gen_label_rtx ();
7695 targetm.asm_out.internal_label (asm_out_file, "L",
7696 CODE_LABEL_NUMBER (xoperands[1]));
7697 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7698 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7700 else
7702 output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands);
7703 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1",
7704 xoperands);
7707 /* Jump to our target address in %r1. */
7708 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7712 if (seq_length == 0)
7713 output_asm_insn ("nop", xoperands);
7715 return "";
7718 /* Return the attribute length of the call instruction INSN. The SIBCALL
7719 flag indicates whether INSN is a regular call or a sibling call. The
7720 length returned must be longer than the code actually generated by
7721 pa_output_call. Since branch shortening is done before delay branch
7722 sequencing, there is no way to determine whether or not the delay
7723 slot will be filled during branch shortening. Even when the delay
7724 slot is filled, we may have to add a nop if the delay slot contains
7725 a branch that can't reach its target. Thus, we always have to include
7726 the delay slot in the length estimate. This used to be done in
7727 pa_adjust_insn_length but we do it here now as some sequences always
7728 fill the delay slot and we can save four bytes in the estimate for
7729 these sequences. */
7732 pa_attr_length_call (rtx_insn *insn, int sibcall)
7734 int local_call;
7735 rtx call, call_dest;
7736 tree call_decl;
7737 int length = 0;
7738 rtx pat = PATTERN (insn);
7739 unsigned long distance = -1;
7741 gcc_assert (CALL_P (insn));
7743 if (INSN_ADDRESSES_SET_P ())
7745 unsigned long total;
7747 total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7748 distance = (total + insn_current_reference_address (insn));
7749 if (distance < total)
7750 distance = -1;
7753 gcc_assert (GET_CODE (pat) == PARALLEL);
7755 /* Get the call rtx. */
7756 call = XVECEXP (pat, 0, 0);
7757 if (GET_CODE (call) == SET)
7758 call = SET_SRC (call);
7760 gcc_assert (GET_CODE (call) == CALL);
7762 /* Determine if this is a local call. */
7763 call_dest = XEXP (XEXP (call, 0), 0);
7764 call_decl = SYMBOL_REF_DECL (call_dest);
7765 local_call = call_decl && targetm.binds_local_p (call_decl);
7767 /* pc-relative branch. */
7768 if (!TARGET_LONG_CALLS
7769 && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7770 || distance < MAX_PCREL17F_OFFSET))
7771 length += 8;
7773 /* 64-bit plabel sequence. */
7774 else if (TARGET_64BIT && !local_call)
7775 length += sibcall ? 28 : 24;
7777 /* non-pic long absolute branch sequence. */
7778 else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7779 length += 12;
7781 /* long pc-relative branch sequence. */
7782 else if (TARGET_LONG_PIC_SDIFF_CALL
7783 || (TARGET_GAS && !TARGET_SOM
7784 && (TARGET_LONG_PIC_PCREL_CALL || local_call)))
7786 length += 20;
7788 if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7789 length += 8;
7792 /* 32-bit plabel sequence. */
7793 else
7795 length += 32;
7797 if (TARGET_SOM)
7798 length += length_fp_args (insn);
7800 if (flag_pic)
7801 length += 4;
7803 if (!TARGET_PA_20)
7805 if (!sibcall)
7806 length += 8;
7808 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7809 length += 8;
7813 return length;
7816 /* INSN is a function call.
7818 CALL_DEST is the routine we are calling. */
7820 const char *
7821 pa_output_call (rtx_insn *insn, rtx call_dest, int sibcall)
7823 int seq_length = dbr_sequence_length ();
7824 tree call_decl = SYMBOL_REF_DECL (call_dest);
7825 int local_call = call_decl && targetm.binds_local_p (call_decl);
7826 rtx xoperands[2];
7828 xoperands[0] = call_dest;
7830 /* Handle the common case where we're sure that the branch will reach
7831 the beginning of the "$CODE$" subspace. This is the beginning of
7832 the current function if we are in a named section. */
7833 if (!TARGET_LONG_CALLS && pa_attr_length_call (insn, sibcall) == 8)
7835 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
7836 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7838 else
7840 if (TARGET_64BIT && !local_call)
7842 /* ??? As far as I can tell, the HP linker doesn't support the
7843 long pc-relative sequence described in the 64-bit runtime
7844 architecture. So, we use a slightly longer indirect call. */
7845 xoperands[0] = pa_get_deferred_plabel (call_dest);
7846 xoperands[1] = gen_label_rtx ();
7848 /* If this isn't a sibcall, we put the load of %r27 into the
7849 delay slot. We can't do this in a sibcall as we don't
7850 have a second call-clobbered scratch register available.
7851 We don't need to do anything when generating fast indirect
7852 calls. */
7853 if (seq_length != 0 && !sibcall)
7855 final_scan_insn (NEXT_INSN (insn), asm_out_file,
7856 optimize, 0, NULL);
7858 /* Now delete the delay insn. */
7859 SET_INSN_DELETED (NEXT_INSN (insn));
7860 seq_length = 0;
7863 output_asm_insn ("addil LT'%0,%%r27", xoperands);
7864 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
7865 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
7867 if (sibcall)
7869 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7870 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands);
7871 output_asm_insn ("bve (%%r1)", xoperands);
7873 else
7875 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
7876 output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
7877 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7878 seq_length = 1;
7881 else
7883 int indirect_call = 0;
7885 /* Emit a long call. There are several different sequences
7886 of increasing length and complexity. In most cases,
7887 they don't allow an instruction in the delay slot. */
7888 if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7889 && !TARGET_LONG_PIC_SDIFF_CALL
7890 && !(TARGET_GAS && !TARGET_SOM
7891 && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7892 && !TARGET_64BIT)
7893 indirect_call = 1;
7895 if (seq_length != 0
7896 && !sibcall
7897 && (!TARGET_PA_20
7898 || indirect_call
7899 || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)))
7901 /* A non-jump insn in the delay slot. By definition we can
7902 emit this insn before the call (and in fact before argument
7903 relocating. */
7904 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
7905 NULL);
7907 /* Now delete the delay insn. */
7908 SET_INSN_DELETED (NEXT_INSN (insn));
7909 seq_length = 0;
7912 if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7914 /* This is the best sequence for making long calls in
7915 non-pic code. Unfortunately, GNU ld doesn't provide
7916 the stub needed for external calls, and GAS's support
7917 for this with the SOM linker is buggy. It is safe
7918 to use this for local calls. */
7919 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7920 if (sibcall)
7921 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
7922 else
7924 if (TARGET_PA_20)
7925 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
7926 xoperands);
7927 else
7928 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7930 output_asm_insn ("copy %%r31,%%r2", xoperands);
7931 seq_length = 1;
7934 else
7936 if (TARGET_LONG_PIC_SDIFF_CALL)
7938 /* The HP assembler and linker can handle relocations
7939 for the difference of two symbols. The HP assembler
7940 recognizes the sequence as a pc-relative call and
7941 the linker provides stubs when needed. */
7942 xoperands[1] = gen_label_rtx ();
7943 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7944 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7945 targetm.asm_out.internal_label (asm_out_file, "L",
7946 CODE_LABEL_NUMBER (xoperands[1]));
7947 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7949 else if (TARGET_GAS && !TARGET_SOM
7950 && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7952 /* GAS currently can't generate the relocations that
7953 are needed for the SOM linker under HP-UX using this
7954 sequence. The GNU linker doesn't generate the stubs
7955 that are needed for external calls on TARGET_ELF32
7956 with this sequence. For now, we have to use a
7957 longer plabel sequence when using GAS. */
7958 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7959 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1",
7960 xoperands);
7961 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1",
7962 xoperands);
7964 else
7966 /* Emit a long plabel-based call sequence. This is
7967 essentially an inline implementation of $$dyncall.
7968 We don't actually try to call $$dyncall as this is
7969 as difficult as calling the function itself. */
7970 xoperands[0] = pa_get_deferred_plabel (call_dest);
7971 xoperands[1] = gen_label_rtx ();
7973 /* Since the call is indirect, FP arguments in registers
7974 need to be copied to the general registers. Then, the
7975 argument relocation stub will copy them back. */
7976 if (TARGET_SOM)
7977 copy_fp_args (insn);
7979 if (flag_pic)
7981 output_asm_insn ("addil LT'%0,%%r19", xoperands);
7982 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
7983 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands);
7985 else
7987 output_asm_insn ("addil LR'%0-$global$,%%r27",
7988 xoperands);
7989 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
7990 xoperands);
7993 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands);
7994 output_asm_insn ("depi 0,31,2,%%r1", xoperands);
7995 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands);
7996 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands);
7998 if (!sibcall && !TARGET_PA_20)
8000 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
8001 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8002 output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
8003 else
8004 output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
8008 if (TARGET_PA_20)
8010 if (sibcall)
8011 output_asm_insn ("bve (%%r1)", xoperands);
8012 else
8014 if (indirect_call)
8016 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8017 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
8018 seq_length = 1;
8020 else
8021 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8024 else
8026 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
8027 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
8028 xoperands);
8030 if (sibcall)
8032 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8033 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
8034 else
8035 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
8037 else
8039 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8040 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
8041 else
8042 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
8044 if (indirect_call)
8045 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
8046 else
8047 output_asm_insn ("copy %%r31,%%r2", xoperands);
8048 seq_length = 1;
8055 if (seq_length == 0)
8056 output_asm_insn ("nop", xoperands);
8058 return "";
8061 /* Return the attribute length of the indirect call instruction INSN.
8062 The length must match the code generated by output_indirect call.
8063 The returned length includes the delay slot. Currently, the delay
8064 slot of an indirect call sequence is not exposed and it is used by
8065 the sequence itself. */
8068 pa_attr_length_indirect_call (rtx_insn *insn)
8070 unsigned long distance = -1;
8071 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
8073 if (INSN_ADDRESSES_SET_P ())
8075 distance = (total + insn_current_reference_address (insn));
8076 if (distance < total)
8077 distance = -1;
8080 if (TARGET_64BIT)
8081 return 12;
8083 if (TARGET_FAST_INDIRECT_CALLS
8084 || (!TARGET_LONG_CALLS
8085 && !TARGET_PORTABLE_RUNTIME
8086 && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
8087 || distance < MAX_PCREL17F_OFFSET)))
8088 return 8;
8090 if (flag_pic)
8091 return 20;
8093 if (TARGET_PORTABLE_RUNTIME)
8094 return 16;
8096 /* Out of reach, can use ble. */
8097 return 12;
8100 const char *
8101 pa_output_indirect_call (rtx_insn *insn, rtx call_dest)
8103 rtx xoperands[1];
8105 if (TARGET_64BIT)
8107 xoperands[0] = call_dest;
8108 output_asm_insn ("ldd 16(%0),%%r2", xoperands);
8109 output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands);
8110 return "";
8113 /* First the special case for kernels, level 0 systems, etc. */
8114 if (TARGET_FAST_INDIRECT_CALLS)
8115 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8117 /* Now the normal case -- we can reach $$dyncall directly or
8118 we're sure that we can get there via a long-branch stub.
8120 No need to check target flags as the length uniquely identifies
8121 the remaining cases. */
8122 if (pa_attr_length_indirect_call (insn) == 8)
8124 /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
8125 $$dyncall. Since BLE uses %r31 as the link register, the 22-bit
8126 variant of the B,L instruction can't be used on the SOM target. */
8127 if (TARGET_PA_20 && !TARGET_SOM)
8128 return ".CALL\tARGW0=GR\n\tb,l $$dyncall,%%r2\n\tcopy %%r2,%%r31";
8129 else
8130 return ".CALL\tARGW0=GR\n\tbl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
8133 /* Long millicode call, but we are not generating PIC or portable runtime
8134 code. */
8135 if (pa_attr_length_indirect_call (insn) == 12)
8136 return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
8138 /* Long millicode call for portable runtime. */
8139 if (pa_attr_length_indirect_call (insn) == 16)
8140 return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)";
8142 /* We need a long PIC call to $$dyncall. */
8143 xoperands[0] = NULL_RTX;
8144 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
8145 if (TARGET_SOM || !TARGET_GAS)
8147 xoperands[0] = gen_label_rtx ();
8148 output_asm_insn ("addil L'$$dyncall-%0,%%r2", xoperands);
8149 targetm.asm_out.internal_label (asm_out_file, "L",
8150 CODE_LABEL_NUMBER (xoperands[0]));
8151 output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands);
8153 else
8155 output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r2", xoperands);
8156 output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
8157 xoperands);
8159 output_asm_insn ("bv %%r0(%%r1)", xoperands);
8160 output_asm_insn ("ldo 12(%%r2),%%r2", xoperands);
8161 return "";
8164 /* In HPUX 8.0's shared library scheme, special relocations are needed
8165 for function labels if they might be passed to a function
8166 in a shared library (because shared libraries don't live in code
8167 space), and special magic is needed to construct their address. */
8169 void
8170 pa_encode_label (rtx sym)
8172 const char *str = XSTR (sym, 0);
8173 int len = strlen (str) + 1;
8174 char *newstr, *p;
8176 p = newstr = XALLOCAVEC (char, len + 1);
8177 *p++ = '@';
8178 strcpy (p, str);
8180 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
8183 static void
8184 pa_encode_section_info (tree decl, rtx rtl, int first)
8186 int old_referenced = 0;
8188 if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF)
8189 old_referenced
8190 = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED;
8192 default_encode_section_info (decl, rtl, first);
8194 if (first && TEXT_SPACE_P (decl))
8196 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
8197 if (TREE_CODE (decl) == FUNCTION_DECL)
8198 pa_encode_label (XEXP (rtl, 0));
8200 else if (old_referenced)
8201 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced;
8204 /* This is sort of inverse to pa_encode_section_info. */
8206 static const char *
8207 pa_strip_name_encoding (const char *str)
8209 str += (*str == '@');
8210 str += (*str == '*');
8211 return str;
8214 /* Returns 1 if OP is a function label involved in a simple addition
8215 with a constant. Used to keep certain patterns from matching
8216 during instruction combination. */
8218 pa_is_function_label_plus_const (rtx op)
8220 /* Strip off any CONST. */
8221 if (GET_CODE (op) == CONST)
8222 op = XEXP (op, 0);
8224 return (GET_CODE (op) == PLUS
8225 && function_label_operand (XEXP (op, 0), VOIDmode)
8226 && GET_CODE (XEXP (op, 1)) == CONST_INT);
8229 /* Output assembly code for a thunk to FUNCTION. */
8231 static void
8232 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
8233 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
8234 tree function)
8236 static unsigned int current_thunk_number;
8237 int val_14 = VAL_14_BITS_P (delta);
8238 unsigned int old_last_address = last_address, nbytes = 0;
8239 char label[16];
8240 rtx xoperands[4];
8242 xoperands[0] = XEXP (DECL_RTL (function), 0);
8243 xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
8244 xoperands[2] = GEN_INT (delta);
8246 final_start_function (emit_barrier (), file, 1);
8248 /* Output the thunk. We know that the function is in the same
8249 translation unit (i.e., the same space) as the thunk, and that
8250 thunks are output after their method. Thus, we don't need an
8251 external branch to reach the function. With SOM and GAS,
8252 functions and thunks are effectively in different sections.
8253 Thus, we can always use a IA-relative branch and the linker
8254 will add a long branch stub if necessary.
8256 However, we have to be careful when generating PIC code on the
8257 SOM port to ensure that the sequence does not transfer to an
8258 import stub for the target function as this could clobber the
8259 return value saved at SP-24. This would also apply to the
8260 32-bit linux port if the multi-space model is implemented. */
8261 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8262 && !(flag_pic && TREE_PUBLIC (function))
8263 && (TARGET_GAS || last_address < 262132))
8264 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8265 && ((targetm_common.have_named_sections
8266 && DECL_SECTION_NAME (thunk_fndecl) != NULL
8267 /* The GNU 64-bit linker has rather poor stub management.
8268 So, we use a long branch from thunks that aren't in
8269 the same section as the target function. */
8270 && ((!TARGET_64BIT
8271 && (DECL_SECTION_NAME (thunk_fndecl)
8272 != DECL_SECTION_NAME (function)))
8273 || ((DECL_SECTION_NAME (thunk_fndecl)
8274 == DECL_SECTION_NAME (function))
8275 && last_address < 262132)))
8276 /* In this case, we need to be able to reach the start of
8277 the stub table even though the function is likely closer
8278 and can be jumped to directly. */
8279 || (targetm_common.have_named_sections
8280 && DECL_SECTION_NAME (thunk_fndecl) == NULL
8281 && DECL_SECTION_NAME (function) == NULL
8282 && total_code_bytes < MAX_PCREL17F_OFFSET)
8283 /* Likewise. */
8284 || (!targetm_common.have_named_sections
8285 && total_code_bytes < MAX_PCREL17F_OFFSET))))
8287 if (!val_14)
8288 output_asm_insn ("addil L'%2,%%r26", xoperands);
8290 output_asm_insn ("b %0", xoperands);
8292 if (val_14)
8294 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8295 nbytes += 8;
8297 else
8299 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8300 nbytes += 12;
8303 else if (TARGET_64BIT)
8305 /* We only have one call-clobbered scratch register, so we can't
8306 make use of the delay slot if delta doesn't fit in 14 bits. */
8307 if (!val_14)
8309 output_asm_insn ("addil L'%2,%%r26", xoperands);
8310 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8313 output_asm_insn ("b,l .+8,%%r1", xoperands);
8315 if (TARGET_GAS)
8317 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
8318 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
8320 else
8322 xoperands[3] = GEN_INT (val_14 ? 8 : 16);
8323 output_asm_insn ("addil L'%0-%1-%3,%%r1", xoperands);
8326 if (val_14)
8328 output_asm_insn ("bv %%r0(%%r1)", xoperands);
8329 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8330 nbytes += 20;
8332 else
8334 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8335 nbytes += 24;
8338 else if (TARGET_PORTABLE_RUNTIME)
8340 output_asm_insn ("ldil L'%0,%%r1", xoperands);
8341 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
8343 if (!val_14)
8344 output_asm_insn ("addil L'%2,%%r26", xoperands);
8346 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8348 if (val_14)
8350 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8351 nbytes += 16;
8353 else
8355 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8356 nbytes += 20;
8359 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8361 /* The function is accessible from outside this module. The only
8362 way to avoid an import stub between the thunk and function is to
8363 call the function directly with an indirect sequence similar to
8364 that used by $$dyncall. This is possible because $$dyncall acts
8365 as the import stub in an indirect call. */
8366 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8367 xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8368 output_asm_insn ("addil LT'%3,%%r19", xoperands);
8369 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8370 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8371 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8372 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8373 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8374 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8376 if (!val_14)
8378 output_asm_insn ("addil L'%2,%%r26", xoperands);
8379 nbytes += 4;
8382 if (TARGET_PA_20)
8384 output_asm_insn ("bve (%%r22)", xoperands);
8385 nbytes += 36;
8387 else if (TARGET_NO_SPACE_REGS)
8389 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
8390 nbytes += 36;
8392 else
8394 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8395 output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8396 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
8397 nbytes += 44;
8400 if (val_14)
8401 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8402 else
8403 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8405 else if (flag_pic)
8407 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
8409 if (TARGET_SOM || !TARGET_GAS)
8411 output_asm_insn ("addil L'%0-%1-8,%%r1", xoperands);
8412 output_asm_insn ("ldo R'%0-%1-8(%%r1),%%r22", xoperands);
8414 else
8416 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
8417 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r22", xoperands);
8420 if (!val_14)
8421 output_asm_insn ("addil L'%2,%%r26", xoperands);
8423 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8425 if (val_14)
8427 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8428 nbytes += 20;
8430 else
8432 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8433 nbytes += 24;
8436 else
8438 if (!val_14)
8439 output_asm_insn ("addil L'%2,%%r26", xoperands);
8441 output_asm_insn ("ldil L'%0,%%r22", xoperands);
8442 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8444 if (val_14)
8446 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8447 nbytes += 12;
8449 else
8451 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8452 nbytes += 16;
8456 final_end_function ();
8458 if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8460 switch_to_section (data_section);
8461 output_asm_insn (".align 4", xoperands);
8462 ASM_OUTPUT_LABEL (file, label);
8463 output_asm_insn (".word P'%0", xoperands);
8466 current_thunk_number++;
8467 nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
8468 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
8469 last_address += nbytes;
8470 if (old_last_address > last_address)
8471 last_address = UINT_MAX;
8472 update_total_code_bytes (nbytes);
8475 /* Only direct calls to static functions are allowed to be sibling (tail)
8476 call optimized.
8478 This restriction is necessary because some linker generated stubs will
8479 store return pointers into rp' in some cases which might clobber a
8480 live value already in rp'.
8482 In a sibcall the current function and the target function share stack
8483 space. Thus if the path to the current function and the path to the
8484 target function save a value in rp', they save the value into the
8485 same stack slot, which has undesirable consequences.
8487 Because of the deferred binding nature of shared libraries any function
8488 with external scope could be in a different load module and thus require
8489 rp' to be saved when calling that function. So sibcall optimizations
8490 can only be safe for static function.
8492 Note that GCC never needs return value relocations, so we don't have to
8493 worry about static calls with return value relocations (which require
8494 saving rp').
8496 It is safe to perform a sibcall optimization when the target function
8497 will never return. */
8498 static bool
8499 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8501 if (TARGET_PORTABLE_RUNTIME)
8502 return false;
8504 /* Sibcalls are ok for TARGET_ELF32 as along as the linker is used in
8505 single subspace mode and the call is not indirect. As far as I know,
8506 there is no operating system support for the multiple subspace mode.
8507 It might be possible to support indirect calls if we didn't use
8508 $$dyncall (see the indirect sequence generated in pa_output_call). */
8509 if (TARGET_ELF32)
8510 return (decl != NULL_TREE);
8512 /* Sibcalls are not ok because the arg pointer register is not a fixed
8513 register. This prevents the sibcall optimization from occurring. In
8514 addition, there are problems with stub placement using GNU ld. This
8515 is because a normal sibcall branch uses a 17-bit relocation while
8516 a regular call branch uses a 22-bit relocation. As a result, more
8517 care needs to be taken in the placement of long-branch stubs. */
8518 if (TARGET_64BIT)
8519 return false;
8521 /* Sibcalls are only ok within a translation unit. */
8522 return (decl && !TREE_PUBLIC (decl));
8525 /* ??? Addition is not commutative on the PA due to the weird implicit
8526 space register selection rules for memory addresses. Therefore, we
8527 don't consider a + b == b + a, as this might be inside a MEM. */
8528 static bool
8529 pa_commutative_p (const_rtx x, int outer_code)
8531 return (COMMUTATIVE_P (x)
8532 && (TARGET_NO_SPACE_REGS
8533 || (outer_code != UNKNOWN && outer_code != MEM)
8534 || GET_CODE (x) != PLUS));
8537 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8538 use in fmpyadd instructions. */
8540 pa_fmpyaddoperands (rtx *operands)
8542 machine_mode mode = GET_MODE (operands[0]);
8544 /* Must be a floating point mode. */
8545 if (mode != SFmode && mode != DFmode)
8546 return 0;
8548 /* All modes must be the same. */
8549 if (! (mode == GET_MODE (operands[1])
8550 && mode == GET_MODE (operands[2])
8551 && mode == GET_MODE (operands[3])
8552 && mode == GET_MODE (operands[4])
8553 && mode == GET_MODE (operands[5])))
8554 return 0;
8556 /* All operands must be registers. */
8557 if (! (GET_CODE (operands[1]) == REG
8558 && GET_CODE (operands[2]) == REG
8559 && GET_CODE (operands[3]) == REG
8560 && GET_CODE (operands[4]) == REG
8561 && GET_CODE (operands[5]) == REG))
8562 return 0;
8564 /* Only 2 real operands to the addition. One of the input operands must
8565 be the same as the output operand. */
8566 if (! rtx_equal_p (operands[3], operands[4])
8567 && ! rtx_equal_p (operands[3], operands[5]))
8568 return 0;
8570 /* Inout operand of add cannot conflict with any operands from multiply. */
8571 if (rtx_equal_p (operands[3], operands[0])
8572 || rtx_equal_p (operands[3], operands[1])
8573 || rtx_equal_p (operands[3], operands[2]))
8574 return 0;
8576 /* multiply cannot feed into addition operands. */
8577 if (rtx_equal_p (operands[4], operands[0])
8578 || rtx_equal_p (operands[5], operands[0]))
8579 return 0;
8581 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8582 if (mode == SFmode
8583 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8584 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8585 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8586 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8587 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8588 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8589 return 0;
8591 /* Passed. Operands are suitable for fmpyadd. */
8592 return 1;
8595 #if !defined(USE_COLLECT2)
8596 static void
8597 pa_asm_out_constructor (rtx symbol, int priority)
8599 if (!function_label_operand (symbol, VOIDmode))
8600 pa_encode_label (symbol);
8602 #ifdef CTORS_SECTION_ASM_OP
8603 default_ctor_section_asm_out_constructor (symbol, priority);
8604 #else
8605 # ifdef TARGET_ASM_NAMED_SECTION
8606 default_named_section_asm_out_constructor (symbol, priority);
8607 # else
8608 default_stabs_asm_out_constructor (symbol, priority);
8609 # endif
8610 #endif
8613 static void
8614 pa_asm_out_destructor (rtx symbol, int priority)
8616 if (!function_label_operand (symbol, VOIDmode))
8617 pa_encode_label (symbol);
8619 #ifdef DTORS_SECTION_ASM_OP
8620 default_dtor_section_asm_out_destructor (symbol, priority);
8621 #else
8622 # ifdef TARGET_ASM_NAMED_SECTION
8623 default_named_section_asm_out_destructor (symbol, priority);
8624 # else
8625 default_stabs_asm_out_destructor (symbol, priority);
8626 # endif
8627 #endif
8629 #endif
8631 /* This function places uninitialized global data in the bss section.
8632 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
8633 function on the SOM port to prevent uninitialized global data from
8634 being placed in the data section. */
8636 void
8637 pa_asm_output_aligned_bss (FILE *stream,
8638 const char *name,
8639 unsigned HOST_WIDE_INT size,
8640 unsigned int align)
8642 switch_to_section (bss_section);
8643 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8645 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
8646 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
8647 #endif
8649 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
8650 ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
8651 #endif
8653 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8654 ASM_OUTPUT_LABEL (stream, name);
8655 fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8658 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
8659 that doesn't allow the alignment of global common storage to be directly
8660 specified. The SOM linker aligns common storage based on the rounded
8661 value of the NUM_BYTES parameter in the .comm directive. It's not
8662 possible to use the .align directive as it doesn't affect the alignment
8663 of the label associated with a .comm directive. */
8665 void
8666 pa_asm_output_aligned_common (FILE *stream,
8667 const char *name,
8668 unsigned HOST_WIDE_INT size,
8669 unsigned int align)
8671 unsigned int max_common_align;
8673 max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
8674 if (align > max_common_align)
8676 warning (0, "alignment (%u) for %s exceeds maximum alignment "
8677 "for global common data. Using %u",
8678 align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT);
8679 align = max_common_align;
8682 switch_to_section (bss_section);
8684 assemble_name (stream, name);
8685 fprintf (stream, "\t.comm " HOST_WIDE_INT_PRINT_UNSIGNED"\n",
8686 MAX (size, align / BITS_PER_UNIT));
8689 /* We can't use .comm for local common storage as the SOM linker effectively
8690 treats the symbol as universal and uses the same storage for local symbols
8691 with the same name in different object files. The .block directive
8692 reserves an uninitialized block of storage. However, it's not common
8693 storage. Fortunately, GCC never requests common storage with the same
8694 name in any given translation unit. */
8696 void
8697 pa_asm_output_aligned_local (FILE *stream,
8698 const char *name,
8699 unsigned HOST_WIDE_INT size,
8700 unsigned int align)
8702 switch_to_section (bss_section);
8703 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8705 #ifdef LOCAL_ASM_OP
8706 fprintf (stream, "%s", LOCAL_ASM_OP);
8707 assemble_name (stream, name);
8708 fprintf (stream, "\n");
8709 #endif
8711 ASM_OUTPUT_LABEL (stream, name);
8712 fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8715 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8716 use in fmpysub instructions. */
8718 pa_fmpysuboperands (rtx *operands)
8720 machine_mode mode = GET_MODE (operands[0]);
8722 /* Must be a floating point mode. */
8723 if (mode != SFmode && mode != DFmode)
8724 return 0;
8726 /* All modes must be the same. */
8727 if (! (mode == GET_MODE (operands[1])
8728 && mode == GET_MODE (operands[2])
8729 && mode == GET_MODE (operands[3])
8730 && mode == GET_MODE (operands[4])
8731 && mode == GET_MODE (operands[5])))
8732 return 0;
8734 /* All operands must be registers. */
8735 if (! (GET_CODE (operands[1]) == REG
8736 && GET_CODE (operands[2]) == REG
8737 && GET_CODE (operands[3]) == REG
8738 && GET_CODE (operands[4]) == REG
8739 && GET_CODE (operands[5]) == REG))
8740 return 0;
8742 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
8743 operation, so operands[4] must be the same as operand[3]. */
8744 if (! rtx_equal_p (operands[3], operands[4]))
8745 return 0;
8747 /* multiply cannot feed into subtraction. */
8748 if (rtx_equal_p (operands[5], operands[0]))
8749 return 0;
8751 /* Inout operand of sub cannot conflict with any operands from multiply. */
8752 if (rtx_equal_p (operands[3], operands[0])
8753 || rtx_equal_p (operands[3], operands[1])
8754 || rtx_equal_p (operands[3], operands[2]))
8755 return 0;
8757 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8758 if (mode == SFmode
8759 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8760 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8761 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8762 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8763 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8764 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8765 return 0;
8767 /* Passed. Operands are suitable for fmpysub. */
8768 return 1;
8771 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
8772 constants for a MULT embedded inside a memory address. */
8774 pa_mem_shadd_constant_p (int val)
8776 if (val == 2 || val == 4 || val == 8)
8777 return 1;
8778 else
8779 return 0;
8782 /* Return 1 if the given constant is 1, 2, or 3. These are the valid
8783 constants for shadd instructions. */
8785 pa_shadd_constant_p (int val)
8787 if (val == 1 || val == 2 || val == 3)
8788 return 1;
8789 else
8790 return 0;
8793 /* Return TRUE if INSN branches forward. */
8795 static bool
8796 forward_branch_p (rtx_insn *insn)
8798 rtx lab = JUMP_LABEL (insn);
8800 /* The INSN must have a jump label. */
8801 gcc_assert (lab != NULL_RTX);
8803 if (INSN_ADDRESSES_SET_P ())
8804 return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn));
8806 while (insn)
8808 if (insn == lab)
8809 return true;
8810 else
8811 insn = NEXT_INSN (insn);
8814 return false;
8817 /* Output an unconditional move and branch insn. */
8819 const char *
8820 pa_output_parallel_movb (rtx *operands, rtx_insn *insn)
8822 int length = get_attr_length (insn);
8824 /* These are the cases in which we win. */
8825 if (length == 4)
8826 return "mov%I1b,tr %1,%0,%2";
8828 /* None of the following cases win, but they don't lose either. */
8829 if (length == 8)
8831 if (dbr_sequence_length () == 0)
8833 /* Nothing in the delay slot, fake it by putting the combined
8834 insn (the copy or add) in the delay slot of a bl. */
8835 if (GET_CODE (operands[1]) == CONST_INT)
8836 return "b %2\n\tldi %1,%0";
8837 else
8838 return "b %2\n\tcopy %1,%0";
8840 else
8842 /* Something in the delay slot, but we've got a long branch. */
8843 if (GET_CODE (operands[1]) == CONST_INT)
8844 return "ldi %1,%0\n\tb %2";
8845 else
8846 return "copy %1,%0\n\tb %2";
8850 if (GET_CODE (operands[1]) == CONST_INT)
8851 output_asm_insn ("ldi %1,%0", operands);
8852 else
8853 output_asm_insn ("copy %1,%0", operands);
8854 return pa_output_lbranch (operands[2], insn, 1);
8857 /* Output an unconditional add and branch insn. */
8859 const char *
8860 pa_output_parallel_addb (rtx *operands, rtx_insn *insn)
8862 int length = get_attr_length (insn);
8864 /* To make life easy we want operand0 to be the shared input/output
8865 operand and operand1 to be the readonly operand. */
8866 if (operands[0] == operands[1])
8867 operands[1] = operands[2];
8869 /* These are the cases in which we win. */
8870 if (length == 4)
8871 return "add%I1b,tr %1,%0,%3";
8873 /* None of the following cases win, but they don't lose either. */
8874 if (length == 8)
8876 if (dbr_sequence_length () == 0)
8877 /* Nothing in the delay slot, fake it by putting the combined
8878 insn (the copy or add) in the delay slot of a bl. */
8879 return "b %3\n\tadd%I1 %1,%0,%0";
8880 else
8881 /* Something in the delay slot, but we've got a long branch. */
8882 return "add%I1 %1,%0,%0\n\tb %3";
8885 output_asm_insn ("add%I1 %1,%0,%0", operands);
8886 return pa_output_lbranch (operands[3], insn, 1);
8889 /* We use this hook to perform a PA specific optimization which is difficult
8890 to do in earlier passes. */
8892 static void
8893 pa_reorg (void)
8895 remove_useless_addtr_insns (1);
8897 if (pa_cpu < PROCESSOR_8000)
8898 pa_combine_instructions ();
8901 /* The PA has a number of odd instructions which can perform multiple
8902 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
8903 it may be profitable to combine two instructions into one instruction
8904 with two outputs. It's not profitable PA2.0 machines because the
8905 two outputs would take two slots in the reorder buffers.
8907 This routine finds instructions which can be combined and combines
8908 them. We only support some of the potential combinations, and we
8909 only try common ways to find suitable instructions.
8911 * addb can add two registers or a register and a small integer
8912 and jump to a nearby (+-8k) location. Normally the jump to the
8913 nearby location is conditional on the result of the add, but by
8914 using the "true" condition we can make the jump unconditional.
8915 Thus addb can perform two independent operations in one insn.
8917 * movb is similar to addb in that it can perform a reg->reg
8918 or small immediate->reg copy and jump to a nearby (+-8k location).
8920 * fmpyadd and fmpysub can perform a FP multiply and either an
8921 FP add or FP sub if the operands of the multiply and add/sub are
8922 independent (there are other minor restrictions). Note both
8923 the fmpy and fadd/fsub can in theory move to better spots according
8924 to data dependencies, but for now we require the fmpy stay at a
8925 fixed location.
8927 * Many of the memory operations can perform pre & post updates
8928 of index registers. GCC's pre/post increment/decrement addressing
8929 is far too simple to take advantage of all the possibilities. This
8930 pass may not be suitable since those insns may not be independent.
8932 * comclr can compare two ints or an int and a register, nullify
8933 the following instruction and zero some other register. This
8934 is more difficult to use as it's harder to find an insn which
8935 will generate a comclr than finding something like an unconditional
8936 branch. (conditional moves & long branches create comclr insns).
8938 * Most arithmetic operations can conditionally skip the next
8939 instruction. They can be viewed as "perform this operation
8940 and conditionally jump to this nearby location" (where nearby
8941 is an insns away). These are difficult to use due to the
8942 branch length restrictions. */
8944 static void
8945 pa_combine_instructions (void)
8947 rtx_insn *anchor;
8949 /* This can get expensive since the basic algorithm is on the
8950 order of O(n^2) (or worse). Only do it for -O2 or higher
8951 levels of optimization. */
8952 if (optimize < 2)
8953 return;
8955 /* Walk down the list of insns looking for "anchor" insns which
8956 may be combined with "floating" insns. As the name implies,
8957 "anchor" instructions don't move, while "floating" insns may
8958 move around. */
8959 rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
8960 rtx_insn *new_rtx = make_insn_raw (par);
8962 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
8964 enum attr_pa_combine_type anchor_attr;
8965 enum attr_pa_combine_type floater_attr;
8967 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
8968 Also ignore any special USE insns. */
8969 if ((! NONJUMP_INSN_P (anchor) && ! JUMP_P (anchor) && ! CALL_P (anchor))
8970 || GET_CODE (PATTERN (anchor)) == USE
8971 || GET_CODE (PATTERN (anchor)) == CLOBBER)
8972 continue;
8974 anchor_attr = get_attr_pa_combine_type (anchor);
8975 /* See if anchor is an insn suitable for combination. */
8976 if (anchor_attr == PA_COMBINE_TYPE_FMPY
8977 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
8978 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
8979 && ! forward_branch_p (anchor)))
8981 rtx_insn *floater;
8983 for (floater = PREV_INSN (anchor);
8984 floater;
8985 floater = PREV_INSN (floater))
8987 if (NOTE_P (floater)
8988 || (NONJUMP_INSN_P (floater)
8989 && (GET_CODE (PATTERN (floater)) == USE
8990 || GET_CODE (PATTERN (floater)) == CLOBBER)))
8991 continue;
8993 /* Anything except a regular INSN will stop our search. */
8994 if (! NONJUMP_INSN_P (floater))
8996 floater = NULL;
8997 break;
9000 /* See if FLOATER is suitable for combination with the
9001 anchor. */
9002 floater_attr = get_attr_pa_combine_type (floater);
9003 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9004 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9005 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9006 && floater_attr == PA_COMBINE_TYPE_FMPY))
9008 /* If ANCHOR and FLOATER can be combined, then we're
9009 done with this pass. */
9010 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9011 SET_DEST (PATTERN (floater)),
9012 XEXP (SET_SRC (PATTERN (floater)), 0),
9013 XEXP (SET_SRC (PATTERN (floater)), 1)))
9014 break;
9017 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9018 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
9020 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
9022 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9023 SET_DEST (PATTERN (floater)),
9024 XEXP (SET_SRC (PATTERN (floater)), 0),
9025 XEXP (SET_SRC (PATTERN (floater)), 1)))
9026 break;
9028 else
9030 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9031 SET_DEST (PATTERN (floater)),
9032 SET_SRC (PATTERN (floater)),
9033 SET_SRC (PATTERN (floater))))
9034 break;
9039 /* If we didn't find anything on the backwards scan try forwards. */
9040 if (!floater
9041 && (anchor_attr == PA_COMBINE_TYPE_FMPY
9042 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
9044 for (floater = anchor; floater; floater = NEXT_INSN (floater))
9046 if (NOTE_P (floater)
9047 || (NONJUMP_INSN_P (floater)
9048 && (GET_CODE (PATTERN (floater)) == USE
9049 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9051 continue;
9053 /* Anything except a regular INSN will stop our search. */
9054 if (! NONJUMP_INSN_P (floater))
9056 floater = NULL;
9057 break;
9060 /* See if FLOATER is suitable for combination with the
9061 anchor. */
9062 floater_attr = get_attr_pa_combine_type (floater);
9063 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9064 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9065 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9066 && floater_attr == PA_COMBINE_TYPE_FMPY))
9068 /* If ANCHOR and FLOATER can be combined, then we're
9069 done with this pass. */
9070 if (pa_can_combine_p (new_rtx, anchor, floater, 1,
9071 SET_DEST (PATTERN (floater)),
9072 XEXP (SET_SRC (PATTERN (floater)),
9074 XEXP (SET_SRC (PATTERN (floater)),
9075 1)))
9076 break;
9081 /* FLOATER will be nonzero if we found a suitable floating
9082 insn for combination with ANCHOR. */
9083 if (floater
9084 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9085 || anchor_attr == PA_COMBINE_TYPE_FMPY))
9087 /* Emit the new instruction and delete the old anchor. */
9088 emit_insn_before (gen_rtx_PARALLEL
9089 (VOIDmode,
9090 gen_rtvec (2, PATTERN (anchor),
9091 PATTERN (floater))),
9092 anchor);
9094 SET_INSN_DELETED (anchor);
9096 /* Emit a special USE insn for FLOATER, then delete
9097 the floating insn. */
9098 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
9099 delete_insn (floater);
9101 continue;
9103 else if (floater
9104 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
9106 rtx temp;
9107 /* Emit the new_jump instruction and delete the old anchor. */
9108 temp
9109 = emit_jump_insn_before (gen_rtx_PARALLEL
9110 (VOIDmode,
9111 gen_rtvec (2, PATTERN (anchor),
9112 PATTERN (floater))),
9113 anchor);
9115 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
9116 SET_INSN_DELETED (anchor);
9118 /* Emit a special USE insn for FLOATER, then delete
9119 the floating insn. */
9120 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
9121 delete_insn (floater);
9122 continue;
9128 static int
9129 pa_can_combine_p (rtx_insn *new_rtx, rtx_insn *anchor, rtx_insn *floater,
9130 int reversed, rtx dest,
9131 rtx src1, rtx src2)
9133 int insn_code_number;
9134 rtx_insn *start, *end;
9136 /* Create a PARALLEL with the patterns of ANCHOR and
9137 FLOATER, try to recognize it, then test constraints
9138 for the resulting pattern.
9140 If the pattern doesn't match or the constraints
9141 aren't met keep searching for a suitable floater
9142 insn. */
9143 XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor);
9144 XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater);
9145 INSN_CODE (new_rtx) = -1;
9146 insn_code_number = recog_memoized (new_rtx);
9147 basic_block bb = BLOCK_FOR_INSN (anchor);
9148 if (insn_code_number < 0
9149 || (extract_insn (new_rtx),
9150 !constrain_operands (1, get_preferred_alternatives (new_rtx, bb))))
9151 return 0;
9153 if (reversed)
9155 start = anchor;
9156 end = floater;
9158 else
9160 start = floater;
9161 end = anchor;
9164 /* There's up to three operands to consider. One
9165 output and two inputs.
9167 The output must not be used between FLOATER & ANCHOR
9168 exclusive. The inputs must not be set between
9169 FLOATER and ANCHOR exclusive. */
9171 if (reg_used_between_p (dest, start, end))
9172 return 0;
9174 if (reg_set_between_p (src1, start, end))
9175 return 0;
9177 if (reg_set_between_p (src2, start, end))
9178 return 0;
9180 /* If we get here, then everything is good. */
9181 return 1;
9184 /* Return nonzero if references for INSN are delayed.
9186 Millicode insns are actually function calls with some special
9187 constraints on arguments and register usage.
9189 Millicode calls always expect their arguments in the integer argument
9190 registers, and always return their result in %r29 (ret1). They
9191 are expected to clobber their arguments, %r1, %r29, and the return
9192 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9194 This function tells reorg that the references to arguments and
9195 millicode calls do not appear to happen until after the millicode call.
9196 This allows reorg to put insns which set the argument registers into the
9197 delay slot of the millicode call -- thus they act more like traditional
9198 CALL_INSNs.
9200 Note we cannot consider side effects of the insn to be delayed because
9201 the branch and link insn will clobber the return pointer. If we happened
9202 to use the return pointer in the delay slot of the call, then we lose.
9204 get_attr_type will try to recognize the given insn, so make sure to
9205 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9206 in particular. */
9208 pa_insn_refs_are_delayed (rtx_insn *insn)
9210 return ((NONJUMP_INSN_P (insn)
9211 && GET_CODE (PATTERN (insn)) != SEQUENCE
9212 && GET_CODE (PATTERN (insn)) != USE
9213 && GET_CODE (PATTERN (insn)) != CLOBBER
9214 && get_attr_type (insn) == TYPE_MILLI));
9217 /* Promote the return value, but not the arguments. */
9219 static machine_mode
9220 pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9221 machine_mode mode,
9222 int *punsignedp ATTRIBUTE_UNUSED,
9223 const_tree fntype ATTRIBUTE_UNUSED,
9224 int for_return)
9226 if (for_return == 0)
9227 return mode;
9228 return promote_mode (type, mode, punsignedp);
9231 /* On the HP-PA the value is found in register(s) 28(-29), unless
9232 the mode is SF or DF. Then the value is returned in fr4 (32).
9234 This must perform the same promotions as PROMOTE_MODE, else promoting
9235 return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
9237 Small structures must be returned in a PARALLEL on PA64 in order
9238 to match the HP Compiler ABI. */
9240 static rtx
9241 pa_function_value (const_tree valtype,
9242 const_tree func ATTRIBUTE_UNUSED,
9243 bool outgoing ATTRIBUTE_UNUSED)
9245 machine_mode valmode;
9247 if (AGGREGATE_TYPE_P (valtype)
9248 || TREE_CODE (valtype) == COMPLEX_TYPE
9249 || TREE_CODE (valtype) == VECTOR_TYPE)
9251 HOST_WIDE_INT valsize = int_size_in_bytes (valtype);
9253 /* Handle aggregates that fit exactly in a word or double word. */
9254 if ((valsize & (UNITS_PER_WORD - 1)) == 0)
9255 return gen_rtx_REG (TYPE_MODE (valtype), 28);
9257 if (TARGET_64BIT)
9259 /* Aggregates with a size less than or equal to 128 bits are
9260 returned in GR 28(-29). They are left justified. The pad
9261 bits are undefined. Larger aggregates are returned in
9262 memory. */
9263 rtx loc[2];
9264 int i, offset = 0;
9265 int ub = valsize <= UNITS_PER_WORD ? 1 : 2;
9267 for (i = 0; i < ub; i++)
9269 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9270 gen_rtx_REG (DImode, 28 + i),
9271 GEN_INT (offset));
9272 offset += 8;
9275 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
9277 else if (valsize > UNITS_PER_WORD)
9279 /* Aggregates 5 to 8 bytes in size are returned in general
9280 registers r28-r29 in the same manner as other non
9281 floating-point objects. The data is right-justified and
9282 zero-extended to 64 bits. This is opposite to the normal
9283 justification used on big endian targets and requires
9284 special treatment. */
9285 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9286 gen_rtx_REG (DImode, 28), const0_rtx);
9287 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9291 if ((INTEGRAL_TYPE_P (valtype)
9292 && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD)
9293 || POINTER_TYPE_P (valtype))
9294 valmode = word_mode;
9295 else
9296 valmode = TYPE_MODE (valtype);
9298 if (TREE_CODE (valtype) == REAL_TYPE
9299 && !AGGREGATE_TYPE_P (valtype)
9300 && TYPE_MODE (valtype) != TFmode
9301 && !TARGET_SOFT_FLOAT)
9302 return gen_rtx_REG (valmode, 32);
9304 return gen_rtx_REG (valmode, 28);
9307 /* Implement the TARGET_LIBCALL_VALUE hook. */
9309 static rtx
9310 pa_libcall_value (machine_mode mode,
9311 const_rtx fun ATTRIBUTE_UNUSED)
9313 if (! TARGET_SOFT_FLOAT
9314 && (mode == SFmode || mode == DFmode))
9315 return gen_rtx_REG (mode, 32);
9316 else
9317 return gen_rtx_REG (mode, 28);
9320 /* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook. */
9322 static bool
9323 pa_function_value_regno_p (const unsigned int regno)
9325 if (regno == 28
9326 || (! TARGET_SOFT_FLOAT && regno == 32))
9327 return true;
9329 return false;
9332 /* Update the data in CUM to advance over an argument
9333 of mode MODE and data type TYPE.
9334 (TYPE is null for libcalls where that information may not be available.) */
9336 static void
9337 pa_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
9338 const_tree type, bool named ATTRIBUTE_UNUSED)
9340 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9341 int arg_size = FUNCTION_ARG_SIZE (mode, type);
9343 cum->nargs_prototype--;
9344 cum->words += (arg_size
9345 + ((cum->words & 01)
9346 && type != NULL_TREE
9347 && arg_size > 1));
9350 /* Return the location of a parameter that is passed in a register or NULL
9351 if the parameter has any component that is passed in memory.
9353 This is new code and will be pushed to into the net sources after
9354 further testing.
9356 ??? We might want to restructure this so that it looks more like other
9357 ports. */
9358 static rtx
9359 pa_function_arg (cumulative_args_t cum_v, machine_mode mode,
9360 const_tree type, bool named ATTRIBUTE_UNUSED)
9362 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9363 int max_arg_words = (TARGET_64BIT ? 8 : 4);
9364 int alignment = 0;
9365 int arg_size;
9366 int fpr_reg_base;
9367 int gpr_reg_base;
9368 rtx retval;
9370 if (mode == VOIDmode)
9371 return NULL_RTX;
9373 arg_size = FUNCTION_ARG_SIZE (mode, type);
9375 /* If this arg would be passed partially or totally on the stack, then
9376 this routine should return zero. pa_arg_partial_bytes will
9377 handle arguments which are split between regs and stack slots if
9378 the ABI mandates split arguments. */
9379 if (!TARGET_64BIT)
9381 /* The 32-bit ABI does not split arguments. */
9382 if (cum->words + arg_size > max_arg_words)
9383 return NULL_RTX;
9385 else
9387 if (arg_size > 1)
9388 alignment = cum->words & 1;
9389 if (cum->words + alignment >= max_arg_words)
9390 return NULL_RTX;
9393 /* The 32bit ABIs and the 64bit ABIs are rather different,
9394 particularly in their handling of FP registers. We might
9395 be able to cleverly share code between them, but I'm not
9396 going to bother in the hope that splitting them up results
9397 in code that is more easily understood. */
9399 if (TARGET_64BIT)
9401 /* Advance the base registers to their current locations.
9403 Remember, gprs grow towards smaller register numbers while
9404 fprs grow to higher register numbers. Also remember that
9405 although FP regs are 32-bit addressable, we pretend that
9406 the registers are 64-bits wide. */
9407 gpr_reg_base = 26 - cum->words;
9408 fpr_reg_base = 32 + cum->words;
9410 /* Arguments wider than one word and small aggregates need special
9411 treatment. */
9412 if (arg_size > 1
9413 || mode == BLKmode
9414 || (type && (AGGREGATE_TYPE_P (type)
9415 || TREE_CODE (type) == COMPLEX_TYPE
9416 || TREE_CODE (type) == VECTOR_TYPE)))
9418 /* Double-extended precision (80-bit), quad-precision (128-bit)
9419 and aggregates including complex numbers are aligned on
9420 128-bit boundaries. The first eight 64-bit argument slots
9421 are associated one-to-one, with general registers r26
9422 through r19, and also with floating-point registers fr4
9423 through fr11. Arguments larger than one word are always
9424 passed in general registers.
9426 Using a PARALLEL with a word mode register results in left
9427 justified data on a big-endian target. */
9429 rtx loc[8];
9430 int i, offset = 0, ub = arg_size;
9432 /* Align the base register. */
9433 gpr_reg_base -= alignment;
9435 ub = MIN (ub, max_arg_words - cum->words - alignment);
9436 for (i = 0; i < ub; i++)
9438 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9439 gen_rtx_REG (DImode, gpr_reg_base),
9440 GEN_INT (offset));
9441 gpr_reg_base -= 1;
9442 offset += 8;
9445 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
9448 else
9450 /* If the argument is larger than a word, then we know precisely
9451 which registers we must use. */
9452 if (arg_size > 1)
9454 if (cum->words)
9456 gpr_reg_base = 23;
9457 fpr_reg_base = 38;
9459 else
9461 gpr_reg_base = 25;
9462 fpr_reg_base = 34;
9465 /* Structures 5 to 8 bytes in size are passed in the general
9466 registers in the same manner as other non floating-point
9467 objects. The data is right-justified and zero-extended
9468 to 64 bits. This is opposite to the normal justification
9469 used on big endian targets and requires special treatment.
9470 We now define BLOCK_REG_PADDING to pad these objects.
9471 Aggregates, complex and vector types are passed in the same
9472 manner as structures. */
9473 if (mode == BLKmode
9474 || (type && (AGGREGATE_TYPE_P (type)
9475 || TREE_CODE (type) == COMPLEX_TYPE
9476 || TREE_CODE (type) == VECTOR_TYPE)))
9478 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9479 gen_rtx_REG (DImode, gpr_reg_base),
9480 const0_rtx);
9481 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9484 else
9486 /* We have a single word (32 bits). A simple computation
9487 will get us the register #s we need. */
9488 gpr_reg_base = 26 - cum->words;
9489 fpr_reg_base = 32 + 2 * cum->words;
9493 /* Determine if the argument needs to be passed in both general and
9494 floating point registers. */
9495 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
9496 /* If we are doing soft-float with portable runtime, then there
9497 is no need to worry about FP regs. */
9498 && !TARGET_SOFT_FLOAT
9499 /* The parameter must be some kind of scalar float, else we just
9500 pass it in integer registers. */
9501 && GET_MODE_CLASS (mode) == MODE_FLOAT
9502 /* The target function must not have a prototype. */
9503 && cum->nargs_prototype <= 0
9504 /* libcalls do not need to pass items in both FP and general
9505 registers. */
9506 && type != NULL_TREE
9507 /* All this hair applies to "outgoing" args only. This includes
9508 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
9509 && !cum->incoming)
9510 /* Also pass outgoing floating arguments in both registers in indirect
9511 calls with the 32 bit ABI and the HP assembler since there is no
9512 way to the specify argument locations in static functions. */
9513 || (!TARGET_64BIT
9514 && !TARGET_GAS
9515 && !cum->incoming
9516 && cum->indirect
9517 && GET_MODE_CLASS (mode) == MODE_FLOAT))
9519 retval
9520 = gen_rtx_PARALLEL
9521 (mode,
9522 gen_rtvec (2,
9523 gen_rtx_EXPR_LIST (VOIDmode,
9524 gen_rtx_REG (mode, fpr_reg_base),
9525 const0_rtx),
9526 gen_rtx_EXPR_LIST (VOIDmode,
9527 gen_rtx_REG (mode, gpr_reg_base),
9528 const0_rtx)));
9530 else
9532 /* See if we should pass this parameter in a general register. */
9533 if (TARGET_SOFT_FLOAT
9534 /* Indirect calls in the normal 32bit ABI require all arguments
9535 to be passed in general registers. */
9536 || (!TARGET_PORTABLE_RUNTIME
9537 && !TARGET_64BIT
9538 && !TARGET_ELF32
9539 && cum->indirect)
9540 /* If the parameter is not a scalar floating-point parameter,
9541 then it belongs in GPRs. */
9542 || GET_MODE_CLASS (mode) != MODE_FLOAT
9543 /* Structure with single SFmode field belongs in GPR. */
9544 || (type && AGGREGATE_TYPE_P (type)))
9545 retval = gen_rtx_REG (mode, gpr_reg_base);
9546 else
9547 retval = gen_rtx_REG (mode, fpr_reg_base);
9549 return retval;
9552 /* Arguments larger than one word are double word aligned. */
9554 static unsigned int
9555 pa_function_arg_boundary (machine_mode mode, const_tree type)
9557 bool singleword = (type
9558 ? (integer_zerop (TYPE_SIZE (type))
9559 || !TREE_CONSTANT (TYPE_SIZE (type))
9560 || int_size_in_bytes (type) <= UNITS_PER_WORD)
9561 : GET_MODE_SIZE (mode) <= UNITS_PER_WORD);
9563 return singleword ? PARM_BOUNDARY : MAX_PARM_BOUNDARY;
9566 /* If this arg would be passed totally in registers or totally on the stack,
9567 then this routine should return zero. */
9569 static int
9570 pa_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
9571 tree type, bool named ATTRIBUTE_UNUSED)
9573 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9574 unsigned int max_arg_words = 8;
9575 unsigned int offset = 0;
9577 if (!TARGET_64BIT)
9578 return 0;
9580 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
9581 offset = 1;
9583 if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
9584 /* Arg fits fully into registers. */
9585 return 0;
9586 else if (cum->words + offset >= max_arg_words)
9587 /* Arg fully on the stack. */
9588 return 0;
9589 else
9590 /* Arg is split. */
9591 return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
9595 /* A get_unnamed_section callback for switching to the text section.
9597 This function is only used with SOM. Because we don't support
9598 named subspaces, we can only create a new subspace or switch back
9599 to the default text subspace. */
9601 static void
9602 som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED)
9604 gcc_assert (TARGET_SOM);
9605 if (TARGET_GAS)
9607 if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
9609 /* We only want to emit a .nsubspa directive once at the
9610 start of the function. */
9611 cfun->machine->in_nsubspa = 1;
9613 /* Create a new subspace for the text. This provides
9614 better stub placement and one-only functions. */
9615 if (cfun->decl
9616 && DECL_ONE_ONLY (cfun->decl)
9617 && !DECL_WEAK (cfun->decl))
9619 output_section_asm_op ("\t.SPACE $TEXT$\n"
9620 "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
9621 "ACCESS=44,SORT=24,COMDAT");
9622 return;
9625 else
9627 /* There isn't a current function or the body of the current
9628 function has been completed. So, we are changing to the
9629 text section to output debugging information. Thus, we
9630 need to forget that we are in the text section so that
9631 varasm.c will call us when text_section is selected again. */
9632 gcc_assert (!cfun || !cfun->machine
9633 || cfun->machine->in_nsubspa == 2);
9634 in_section = NULL;
9636 output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
9637 return;
9639 output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
9642 /* A get_unnamed_section callback for switching to comdat data
9643 sections. This function is only used with SOM. */
9645 static void
9646 som_output_comdat_data_section_asm_op (const void *data)
9648 in_section = NULL;
9649 output_section_asm_op (data);
9652 /* Implement TARGET_ASM_INITIALIZE_SECTIONS */
9654 static void
9655 pa_som_asm_init_sections (void)
9657 text_section
9658 = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
9660 /* SOM puts readonly data in the default $LIT$ subspace when PIC code
9661 is not being generated. */
9662 som_readonly_data_section
9663 = get_unnamed_section (0, output_section_asm_op,
9664 "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
9666 /* When secondary definitions are not supported, SOM makes readonly
9667 data one-only by creating a new $LIT$ subspace in $TEXT$ with
9668 the comdat flag. */
9669 som_one_only_readonly_data_section
9670 = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
9671 "\t.SPACE $TEXT$\n"
9672 "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
9673 "ACCESS=0x2c,SORT=16,COMDAT");
9676 /* When secondary definitions are not supported, SOM makes data one-only
9677 by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */
9678 som_one_only_data_section
9679 = get_unnamed_section (SECTION_WRITE,
9680 som_output_comdat_data_section_asm_op,
9681 "\t.SPACE $PRIVATE$\n"
9682 "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
9683 "ACCESS=31,SORT=24,COMDAT");
9685 if (flag_tm)
9686 som_tm_clone_table_section
9687 = get_unnamed_section (0, output_section_asm_op,
9688 "\t.SPACE $PRIVATE$\n\t.SUBSPA $TM_CLONE_TABLE$");
9690 /* FIXME: HPUX ld generates incorrect GOT entries for "T" fixups
9691 which reference data within the $TEXT$ space (for example constant
9692 strings in the $LIT$ subspace).
9694 The assemblers (GAS and HP as) both have problems with handling
9695 the difference of two symbols which is the other correct way to
9696 reference constant data during PIC code generation.
9698 So, there's no way to reference constant data which is in the
9699 $TEXT$ space during PIC generation. Instead place all constant
9700 data into the $PRIVATE$ subspace (this reduces sharing, but it
9701 works correctly). */
9702 readonly_data_section = flag_pic ? data_section : som_readonly_data_section;
9704 /* We must not have a reference to an external symbol defined in a
9705 shared library in a readonly section, else the SOM linker will
9706 complain.
9708 So, we force exception information into the data section. */
9709 exception_section = data_section;
9712 /* Implement TARGET_ASM_TM_CLONE_TABLE_SECTION. */
9714 static section *
9715 pa_som_tm_clone_table_section (void)
9717 return som_tm_clone_table_section;
9720 /* On hpux10, the linker will give an error if we have a reference
9721 in the read-only data section to a symbol defined in a shared
9722 library. Therefore, expressions that might require a reloc can
9723 not be placed in the read-only data section. */
9725 static section *
9726 pa_select_section (tree exp, int reloc,
9727 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
9729 if (TREE_CODE (exp) == VAR_DECL
9730 && TREE_READONLY (exp)
9731 && !TREE_THIS_VOLATILE (exp)
9732 && DECL_INITIAL (exp)
9733 && (DECL_INITIAL (exp) == error_mark_node
9734 || TREE_CONSTANT (DECL_INITIAL (exp)))
9735 && !reloc)
9737 if (TARGET_SOM
9738 && DECL_ONE_ONLY (exp)
9739 && !DECL_WEAK (exp))
9740 return som_one_only_readonly_data_section;
9741 else
9742 return readonly_data_section;
9744 else if (CONSTANT_CLASS_P (exp) && !reloc)
9745 return readonly_data_section;
9746 else if (TARGET_SOM
9747 && TREE_CODE (exp) == VAR_DECL
9748 && DECL_ONE_ONLY (exp)
9749 && !DECL_WEAK (exp))
9750 return som_one_only_data_section;
9751 else
9752 return data_section;
9755 /* Implement pa_reloc_rw_mask. */
9757 static int
9758 pa_reloc_rw_mask (void)
9760 /* We force (const (plus (symbol) (const_int))) to memory when the
9761 const_int doesn't fit in a 14-bit integer. The SOM linker can't
9762 handle this construct in read-only memory and we want to avoid
9763 this for ELF. So, we always force an RTX needing relocation to
9764 the data section. */
9765 return 3;
9768 static void
9769 pa_globalize_label (FILE *stream, const char *name)
9771 /* We only handle DATA objects here, functions are globalized in
9772 ASM_DECLARE_FUNCTION_NAME. */
9773 if (! FUNCTION_NAME_P (name))
9775 fputs ("\t.EXPORT ", stream);
9776 assemble_name (stream, name);
9777 fputs (",DATA\n", stream);
9781 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
9783 static rtx
9784 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
9785 int incoming ATTRIBUTE_UNUSED)
9787 return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
9790 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9792 bool
9793 pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9795 /* SOM ABI says that objects larger than 64 bits are returned in memory.
9796 PA64 ABI says that objects larger than 128 bits are returned in memory.
9797 Note, int_size_in_bytes can return -1 if the size of the object is
9798 variable or larger than the maximum value that can be expressed as
9799 a HOST_WIDE_INT. It can also return zero for an empty type. The
9800 simplest way to handle variable and empty types is to pass them in
9801 memory. This avoids problems in defining the boundaries of argument
9802 slots, allocating registers, etc. */
9803 return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
9804 || int_size_in_bytes (type) <= 0);
9807 /* Structure to hold declaration and name of external symbols that are
9808 emitted by GCC. We generate a vector of these symbols and output them
9809 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
9810 This avoids putting out names that are never really used. */
9812 typedef struct GTY(()) extern_symbol
9814 tree decl;
9815 const char *name;
9816 } extern_symbol;
9818 /* Define gc'd vector type for extern_symbol. */
9820 /* Vector of extern_symbol pointers. */
9821 static GTY(()) vec<extern_symbol, va_gc> *extern_symbols;
9823 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9824 /* Mark DECL (name NAME) as an external reference (assembler output
9825 file FILE). This saves the names to output at the end of the file
9826 if actually referenced. */
9828 void
9829 pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
9831 gcc_assert (file == asm_out_file);
9832 extern_symbol p = {decl, name};
9833 vec_safe_push (extern_symbols, p);
9836 /* Output text required at the end of an assembler file.
9837 This includes deferred plabels and .import directives for
9838 all external symbols that were actually referenced. */
9840 static void
9841 pa_hpux_file_end (void)
9843 unsigned int i;
9844 extern_symbol *p;
9846 if (!NO_DEFERRED_PROFILE_COUNTERS)
9847 output_deferred_profile_counters ();
9849 output_deferred_plabels ();
9851 for (i = 0; vec_safe_iterate (extern_symbols, i, &p); i++)
9853 tree decl = p->decl;
9855 if (!TREE_ASM_WRITTEN (decl)
9856 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
9857 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
9860 vec_free (extern_symbols);
9862 #endif
9864 /* Return true if a change from mode FROM to mode TO for a register
9865 in register class RCLASS is invalid. */
9867 bool
9868 pa_cannot_change_mode_class (machine_mode from, machine_mode to,
9869 enum reg_class rclass)
9871 if (from == to)
9872 return false;
9874 /* Reject changes to/from complex and vector modes. */
9875 if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from)
9876 || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to))
9877 return true;
9879 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to))
9880 return false;
9882 /* There is no way to load QImode or HImode values directly from
9883 memory. SImode loads to the FP registers are not zero extended.
9884 On the 64-bit target, this conflicts with the definition of
9885 LOAD_EXTEND_OP. Thus, we can't allow changing between modes
9886 with different sizes in the floating-point registers. */
9887 if (MAYBE_FP_REG_CLASS_P (rclass))
9888 return true;
9890 /* HARD_REGNO_MODE_OK places modes with sizes larger than a word
9891 in specific sets of registers. Thus, we cannot allow changing
9892 to a larger mode when it's larger than a word. */
9893 if (GET_MODE_SIZE (to) > UNITS_PER_WORD
9894 && GET_MODE_SIZE (to) > GET_MODE_SIZE (from))
9895 return true;
9897 return false;
9900 /* Returns TRUE if it is a good idea to tie two pseudo registers
9901 when one has mode MODE1 and one has mode MODE2.
9902 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
9903 for any hard reg, then this must be FALSE for correct output.
9905 We should return FALSE for QImode and HImode because these modes
9906 are not ok in the floating-point registers. However, this prevents
9907 tieing these modes to SImode and DImode in the general registers.
9908 So, this isn't a good idea. We rely on HARD_REGNO_MODE_OK and
9909 CANNOT_CHANGE_MODE_CLASS to prevent these modes from being used
9910 in the floating-point registers. */
9912 bool
9913 pa_modes_tieable_p (machine_mode mode1, machine_mode mode2)
9915 /* Don't tie modes in different classes. */
9916 if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2))
9917 return false;
9919 return true;
9923 /* Length in units of the trampoline instruction code. */
9925 #define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 32 : 40))
9928 /* Output assembler code for a block containing the constant parts
9929 of a trampoline, leaving space for the variable parts.\
9931 The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
9932 and then branches to the specified routine.
9934 This code template is copied from text segment to stack location
9935 and then patched with pa_trampoline_init to contain valid values,
9936 and then entered as a subroutine.
9938 It is best to keep this as small as possible to avoid having to
9939 flush multiple lines in the cache. */
9941 static void
9942 pa_asm_trampoline_template (FILE *f)
9944 if (!TARGET_64BIT)
9946 fputs ("\tldw 36(%r22),%r21\n", f);
9947 fputs ("\tbb,>=,n %r21,30,.+16\n", f);
9948 if (ASSEMBLER_DIALECT == 0)
9949 fputs ("\tdepi 0,31,2,%r21\n", f);
9950 else
9951 fputs ("\tdepwi 0,31,2,%r21\n", f);
9952 fputs ("\tldw 4(%r21),%r19\n", f);
9953 fputs ("\tldw 0(%r21),%r21\n", f);
9954 if (TARGET_PA_20)
9956 fputs ("\tbve (%r21)\n", f);
9957 fputs ("\tldw 40(%r22),%r29\n", f);
9958 fputs ("\t.word 0\n", f);
9959 fputs ("\t.word 0\n", f);
9961 else
9963 fputs ("\tldsid (%r21),%r1\n", f);
9964 fputs ("\tmtsp %r1,%sr0\n", f);
9965 fputs ("\tbe 0(%sr0,%r21)\n", f);
9966 fputs ("\tldw 40(%r22),%r29\n", f);
9968 fputs ("\t.word 0\n", f);
9969 fputs ("\t.word 0\n", f);
9970 fputs ("\t.word 0\n", f);
9971 fputs ("\t.word 0\n", f);
9973 else
9975 fputs ("\t.dword 0\n", f);
9976 fputs ("\t.dword 0\n", f);
9977 fputs ("\t.dword 0\n", f);
9978 fputs ("\t.dword 0\n", f);
9979 fputs ("\tmfia %r31\n", f);
9980 fputs ("\tldd 24(%r31),%r1\n", f);
9981 fputs ("\tldd 24(%r1),%r27\n", f);
9982 fputs ("\tldd 16(%r1),%r1\n", f);
9983 fputs ("\tbve (%r1)\n", f);
9984 fputs ("\tldd 32(%r31),%r31\n", f);
9985 fputs ("\t.dword 0 ; fptr\n", f);
9986 fputs ("\t.dword 0 ; static link\n", f);
9990 /* Emit RTL insns to initialize the variable parts of a trampoline.
9991 FNADDR is an RTX for the address of the function's pure code.
9992 CXT is an RTX for the static chain value for the function.
9994 Move the function address to the trampoline template at offset 36.
9995 Move the static chain value to trampoline template at offset 40.
9996 Move the trampoline address to trampoline template at offset 44.
9997 Move r19 to trampoline template at offset 48. The latter two
9998 words create a plabel for the indirect call to the trampoline.
10000 A similar sequence is used for the 64-bit port but the plabel is
10001 at the beginning of the trampoline.
10003 Finally, the cache entries for the trampoline code are flushed.
10004 This is necessary to ensure that the trampoline instruction sequence
10005 is written to memory prior to any attempts at prefetching the code
10006 sequence. */
10008 static void
10009 pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
10011 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10012 rtx start_addr = gen_reg_rtx (Pmode);
10013 rtx end_addr = gen_reg_rtx (Pmode);
10014 rtx line_length = gen_reg_rtx (Pmode);
10015 rtx r_tramp, tmp;
10017 emit_block_move (m_tramp, assemble_trampoline_template (),
10018 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
10019 r_tramp = force_reg (Pmode, XEXP (m_tramp, 0));
10021 if (!TARGET_64BIT)
10023 tmp = adjust_address (m_tramp, Pmode, 36);
10024 emit_move_insn (tmp, fnaddr);
10025 tmp = adjust_address (m_tramp, Pmode, 40);
10026 emit_move_insn (tmp, chain_value);
10028 /* Create a fat pointer for the trampoline. */
10029 tmp = adjust_address (m_tramp, Pmode, 44);
10030 emit_move_insn (tmp, r_tramp);
10031 tmp = adjust_address (m_tramp, Pmode, 48);
10032 emit_move_insn (tmp, gen_rtx_REG (Pmode, 19));
10034 /* fdc and fic only use registers for the address to flush,
10035 they do not accept integer displacements. We align the
10036 start and end addresses to the beginning of their respective
10037 cache lines to minimize the number of lines flushed. */
10038 emit_insn (gen_andsi3 (start_addr, r_tramp,
10039 GEN_INT (-MIN_CACHELINE_SIZE)));
10040 tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp,
10041 TRAMPOLINE_CODE_SIZE-1));
10042 emit_insn (gen_andsi3 (end_addr, tmp,
10043 GEN_INT (-MIN_CACHELINE_SIZE)));
10044 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10045 emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length));
10046 emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length,
10047 gen_reg_rtx (Pmode),
10048 gen_reg_rtx (Pmode)));
10050 else
10052 tmp = adjust_address (m_tramp, Pmode, 56);
10053 emit_move_insn (tmp, fnaddr);
10054 tmp = adjust_address (m_tramp, Pmode, 64);
10055 emit_move_insn (tmp, chain_value);
10057 /* Create a fat pointer for the trampoline. */
10058 tmp = adjust_address (m_tramp, Pmode, 16);
10059 emit_move_insn (tmp, force_reg (Pmode, plus_constant (Pmode,
10060 r_tramp, 32)));
10061 tmp = adjust_address (m_tramp, Pmode, 24);
10062 emit_move_insn (tmp, gen_rtx_REG (Pmode, 27));
10064 /* fdc and fic only use registers for the address to flush,
10065 they do not accept integer displacements. We align the
10066 start and end addresses to the beginning of their respective
10067 cache lines to minimize the number of lines flushed. */
10068 tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp, 32));
10069 emit_insn (gen_anddi3 (start_addr, tmp,
10070 GEN_INT (-MIN_CACHELINE_SIZE)));
10071 tmp = force_reg (Pmode, plus_constant (Pmode, tmp,
10072 TRAMPOLINE_CODE_SIZE - 1));
10073 emit_insn (gen_anddi3 (end_addr, tmp,
10074 GEN_INT (-MIN_CACHELINE_SIZE)));
10075 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10076 emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length));
10077 emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length,
10078 gen_reg_rtx (Pmode),
10079 gen_reg_rtx (Pmode)));
10082 #ifdef HAVE_ENABLE_EXECUTE_STACK
10083  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
10084      LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
10085 #endif
10088 /* Perform any machine-specific adjustment in the address of the trampoline.
10089 ADDR contains the address that was passed to pa_trampoline_init.
10090 Adjust the trampoline address to point to the plabel at offset 44. */
10092 static rtx
10093 pa_trampoline_adjust_address (rtx addr)
10095 if (!TARGET_64BIT)
10096 addr = memory_address (Pmode, plus_constant (Pmode, addr, 46));
10097 return addr;
10100 static rtx
10101 pa_delegitimize_address (rtx orig_x)
10103 rtx x = delegitimize_mem_from_attrs (orig_x);
10105 if (GET_CODE (x) == LO_SUM
10106 && GET_CODE (XEXP (x, 1)) == UNSPEC
10107 && XINT (XEXP (x, 1), 1) == UNSPEC_DLTIND14R)
10108 return gen_const_mem (Pmode, XVECEXP (XEXP (x, 1), 0, 0));
10109 return x;
10112 static rtx
10113 pa_internal_arg_pointer (void)
10115 /* The argument pointer and the hard frame pointer are the same in
10116 the 32-bit runtime, so we don't need a copy. */
10117 if (TARGET_64BIT)
10118 return copy_to_reg (virtual_incoming_args_rtx);
10119 else
10120 return virtual_incoming_args_rtx;
10123 /* Given FROM and TO register numbers, say whether this elimination is allowed.
10124 Frame pointer elimination is automatically handled. */
10126 static bool
10127 pa_can_eliminate (const int from, const int to)
10129 /* The argument cannot be eliminated in the 64-bit runtime. */
10130 if (TARGET_64BIT && from == ARG_POINTER_REGNUM)
10131 return false;
10133 return (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
10134 ? ! frame_pointer_needed
10135 : true);
10138 /* Define the offset between two registers, FROM to be eliminated and its
10139 replacement TO, at the start of a routine. */
10140 HOST_WIDE_INT
10141 pa_initial_elimination_offset (int from, int to)
10143 HOST_WIDE_INT offset;
10145 if ((from == HARD_FRAME_POINTER_REGNUM || from == FRAME_POINTER_REGNUM)
10146 && to == STACK_POINTER_REGNUM)
10147 offset = -pa_compute_frame_size (get_frame_size (), 0);
10148 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10149 offset = 0;
10150 else
10151 gcc_unreachable ();
10153 return offset;
10156 static void
10157 pa_conditional_register_usage (void)
10159 int i;
10161 if (!TARGET_64BIT && !TARGET_PA_11)
10163 for (i = 56; i <= FP_REG_LAST; i++)
10164 fixed_regs[i] = call_used_regs[i] = 1;
10165 for (i = 33; i < 56; i += 2)
10166 fixed_regs[i] = call_used_regs[i] = 1;
10168 if (TARGET_DISABLE_FPREGS || TARGET_SOFT_FLOAT)
10170 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
10171 fixed_regs[i] = call_used_regs[i] = 1;
10173 if (flag_pic)
10174 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
10177 /* Target hook for c_mode_for_suffix. */
10179 static machine_mode
10180 pa_c_mode_for_suffix (char suffix)
10182 if (HPUX_LONG_DOUBLE_LIBRARY)
10184 if (suffix == 'q')
10185 return TFmode;
10188 return VOIDmode;
10191 /* Target hook for function_section. */
10193 static section *
10194 pa_function_section (tree decl, enum node_frequency freq,
10195 bool startup, bool exit)
10197 /* Put functions in text section if target doesn't have named sections. */
10198 if (!targetm_common.have_named_sections)
10199 return text_section;
10201 /* Force nested functions into the same section as the containing
10202 function. */
10203 if (decl
10204 && DECL_SECTION_NAME (decl) == NULL
10205 && DECL_CONTEXT (decl) != NULL_TREE
10206 && TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL
10207 && DECL_SECTION_NAME (DECL_CONTEXT (decl)) == NULL)
10208 return function_section (DECL_CONTEXT (decl));
10210 /* Otherwise, use the default function section. */
10211 return default_function_section (decl, freq, startup, exit);
10214 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
10216 In 64-bit mode, we reject CONST_DOUBLES. We also reject CONST_INTS
10217 that need more than three instructions to load prior to reload. This
10218 limit is somewhat arbitrary. It takes three instructions to load a
10219 CONST_INT from memory but two are memory accesses. It may be better
10220 to increase the allowed range for CONST_INTS. We may also be able
10221 to handle CONST_DOUBLES. */
10223 static bool
10224 pa_legitimate_constant_p (machine_mode mode, rtx x)
10226 if (GET_MODE_CLASS (mode) == MODE_FLOAT && x != CONST0_RTX (mode))
10227 return false;
10229 if (!NEW_HP_ASSEMBLER && !TARGET_GAS && GET_CODE (x) == LABEL_REF)
10230 return false;
10232 /* TLS_MODEL_GLOBAL_DYNAMIC and TLS_MODEL_LOCAL_DYNAMIC are not
10233 legitimate constants. The other variants can't be handled by
10234 the move patterns after reload starts. */
10235 if (tls_referenced_p (x))
10236 return false;
10238 if (TARGET_64BIT && GET_CODE (x) == CONST_DOUBLE)
10239 return false;
10241 if (TARGET_64BIT
10242 && HOST_BITS_PER_WIDE_INT > 32
10243 && GET_CODE (x) == CONST_INT
10244 && !reload_in_progress
10245 && !reload_completed
10246 && !LEGITIMATE_64BIT_CONST_INT_P (INTVAL (x))
10247 && !pa_cint_ok_for_move (INTVAL (x)))
10248 return false;
10250 if (function_label_operand (x, mode))
10251 return false;
10253 return true;
10256 /* Implement TARGET_SECTION_TYPE_FLAGS. */
10258 static unsigned int
10259 pa_section_type_flags (tree decl, const char *name, int reloc)
10261 unsigned int flags;
10263 flags = default_section_type_flags (decl, name, reloc);
10265 /* Function labels are placed in the constant pool. This can
10266 cause a section conflict if decls are put in ".data.rel.ro"
10267 or ".data.rel.ro.local" using the __attribute__ construct. */
10268 if (strcmp (name, ".data.rel.ro") == 0
10269 || strcmp (name, ".data.rel.ro.local") == 0)
10270 flags |= SECTION_WRITE | SECTION_RELRO;
10272 return flags;
10275 /* pa_legitimate_address_p recognizes an RTL expression that is a
10276 valid memory address for an instruction. The MODE argument is the
10277 machine mode for the MEM expression that wants to use this address.
10279 On HP PA-RISC, the legitimate address forms are REG+SMALLINT,
10280 REG+REG, and REG+(REG*SCALE). The indexed address forms are only
10281 available with floating point loads and stores, and integer loads.
10282 We get better code by allowing indexed addresses in the initial
10283 RTL generation.
10285 The acceptance of indexed addresses as legitimate implies that we
10286 must provide patterns for doing indexed integer stores, or the move
10287 expanders must force the address of an indexed store to a register.
10288 We have adopted the latter approach.
10290 Another function of pa_legitimate_address_p is to ensure that
10291 the base register is a valid pointer for indexed instructions.
10292 On targets that have non-equivalent space registers, we have to
10293 know at the time of assembler output which register in a REG+REG
10294 pair is the base register. The REG_POINTER flag is sometimes lost
10295 in reload and the following passes, so it can't be relied on during
10296 code generation. Thus, we either have to canonicalize the order
10297 of the registers in REG+REG indexed addresses, or treat REG+REG
10298 addresses separately and provide patterns for both permutations.
10300 The latter approach requires several hundred additional lines of
10301 code in pa.md. The downside to canonicalizing is that a PLUS
10302 in the wrong order can't combine to form to make a scaled indexed
10303 memory operand. As we won't need to canonicalize the operands if
10304 the REG_POINTER lossage can be fixed, it seems better canonicalize.
10306 We initially break out scaled indexed addresses in canonical order
10307 in pa_emit_move_sequence. LEGITIMIZE_ADDRESS also canonicalizes
10308 scaled indexed addresses during RTL generation. However, fold_rtx
10309 has its own opinion on how the operands of a PLUS should be ordered.
10310 If one of the operands is equivalent to a constant, it will make
10311 that operand the second operand. As the base register is likely to
10312 be equivalent to a SYMBOL_REF, we have made it the second operand.
10314 pa_legitimate_address_p accepts REG+REG as legitimate when the
10315 operands are in the order INDEX+BASE on targets with non-equivalent
10316 space registers, and in any order on targets with equivalent space
10317 registers. It accepts both MULT+BASE and BASE+MULT for scaled indexing.
10319 We treat a SYMBOL_REF as legitimate if it is part of the current
10320 function's constant-pool, because such addresses can actually be
10321 output as REG+SMALLINT. */
10323 static bool
10324 pa_legitimate_address_p (machine_mode mode, rtx x, bool strict)
10326 if ((REG_P (x)
10327 && (strict ? STRICT_REG_OK_FOR_BASE_P (x)
10328 : REG_OK_FOR_BASE_P (x)))
10329 || ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC
10330 || GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC)
10331 && REG_P (XEXP (x, 0))
10332 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10333 : REG_OK_FOR_BASE_P (XEXP (x, 0)))))
10334 return true;
10336 if (GET_CODE (x) == PLUS)
10338 rtx base, index;
10340 /* For REG+REG, the base register should be in XEXP (x, 1),
10341 so check it first. */
10342 if (REG_P (XEXP (x, 1))
10343 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 1))
10344 : REG_OK_FOR_BASE_P (XEXP (x, 1))))
10345 base = XEXP (x, 1), index = XEXP (x, 0);
10346 else if (REG_P (XEXP (x, 0))
10347 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10348 : REG_OK_FOR_BASE_P (XEXP (x, 0))))
10349 base = XEXP (x, 0), index = XEXP (x, 1);
10350 else
10351 return false;
10353 if (GET_CODE (index) == CONST_INT)
10355 if (INT_5_BITS (index))
10356 return true;
10358 /* When INT14_OK_STRICT is false, a secondary reload is needed
10359 to adjust the displacement of SImode and DImode floating point
10360 instructions but this may fail when the register also needs
10361 reloading. So, we return false when STRICT is true. We
10362 also reject long displacements for float mode addresses since
10363 the majority of accesses will use floating point instructions
10364 that don't support 14-bit offsets. */
10365 if (!INT14_OK_STRICT
10366 && (strict || !(reload_in_progress || reload_completed))
10367 && mode != QImode
10368 && mode != HImode)
10369 return false;
10371 return base14_operand (index, mode);
10374 if (!TARGET_DISABLE_INDEXING
10375 /* Only accept the "canonical" INDEX+BASE operand order
10376 on targets with non-equivalent space registers. */
10377 && (TARGET_NO_SPACE_REGS
10378 ? REG_P (index)
10379 : (base == XEXP (x, 1) && REG_P (index)
10380 && (reload_completed
10381 || (reload_in_progress && HARD_REGISTER_P (base))
10382 || REG_POINTER (base))
10383 && (reload_completed
10384 || (reload_in_progress && HARD_REGISTER_P (index))
10385 || !REG_POINTER (index))))
10386 && MODE_OK_FOR_UNSCALED_INDEXING_P (mode)
10387 && (strict ? STRICT_REG_OK_FOR_INDEX_P (index)
10388 : REG_OK_FOR_INDEX_P (index))
10389 && borx_reg_operand (base, Pmode)
10390 && borx_reg_operand (index, Pmode))
10391 return true;
10393 if (!TARGET_DISABLE_INDEXING
10394 && GET_CODE (index) == MULT
10395 && MODE_OK_FOR_SCALED_INDEXING_P (mode)
10396 && REG_P (XEXP (index, 0))
10397 && GET_MODE (XEXP (index, 0)) == Pmode
10398 && (strict ? STRICT_REG_OK_FOR_INDEX_P (XEXP (index, 0))
10399 : REG_OK_FOR_INDEX_P (XEXP (index, 0)))
10400 && GET_CODE (XEXP (index, 1)) == CONST_INT
10401 && INTVAL (XEXP (index, 1))
10402 == (HOST_WIDE_INT) GET_MODE_SIZE (mode)
10403 && borx_reg_operand (base, Pmode))
10404 return true;
10406 return false;
10409 if (GET_CODE (x) == LO_SUM)
10411 rtx y = XEXP (x, 0);
10413 if (GET_CODE (y) == SUBREG)
10414 y = SUBREG_REG (y);
10416 if (REG_P (y)
10417 && (strict ? STRICT_REG_OK_FOR_BASE_P (y)
10418 : REG_OK_FOR_BASE_P (y)))
10420 /* Needed for -fPIC */
10421 if (mode == Pmode
10422 && GET_CODE (XEXP (x, 1)) == UNSPEC)
10423 return true;
10425 if (!INT14_OK_STRICT
10426 && (strict || !(reload_in_progress || reload_completed))
10427 && mode != QImode
10428 && mode != HImode)
10429 return false;
10431 if (CONSTANT_P (XEXP (x, 1)))
10432 return true;
10434 return false;
10437 if (GET_CODE (x) == CONST_INT && INT_5_BITS (x))
10438 return true;
10440 return false;
10443 /* Look for machine dependent ways to make the invalid address AD a
10444 valid address.
10446 For the PA, transform:
10448 memory(X + <large int>)
10450 into:
10452 if (<large int> & mask) >= 16
10453 Y = (<large int> & ~mask) + mask + 1 Round up.
10454 else
10455 Y = (<large int> & ~mask) Round down.
10456 Z = X + Y
10457 memory (Z + (<large int> - Y));
10459 This makes reload inheritance and reload_cse work better since Z
10460 can be reused.
10462 There may be more opportunities to improve code with this hook. */
10465 pa_legitimize_reload_address (rtx ad, machine_mode mode,
10466 int opnum, int type,
10467 int ind_levels ATTRIBUTE_UNUSED)
10469 long offset, newoffset, mask;
10470 rtx new_rtx, temp = NULL_RTX;
10472 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
10473 && !INT14_OK_STRICT ? 0x1f : 0x3fff);
10475 if (optimize && GET_CODE (ad) == PLUS)
10476 temp = simplify_binary_operation (PLUS, Pmode,
10477 XEXP (ad, 0), XEXP (ad, 1));
10479 new_rtx = temp ? temp : ad;
10481 if (optimize
10482 && GET_CODE (new_rtx) == PLUS
10483 && GET_CODE (XEXP (new_rtx, 0)) == REG
10484 && GET_CODE (XEXP (new_rtx, 1)) == CONST_INT)
10486 offset = INTVAL (XEXP ((new_rtx), 1));
10488 /* Choose rounding direction. Round up if we are >= halfway. */
10489 if ((offset & mask) >= ((mask + 1) / 2))
10490 newoffset = (offset & ~mask) + mask + 1;
10491 else
10492 newoffset = offset & ~mask;
10494 /* Ensure that long displacements are aligned. */
10495 if (mask == 0x3fff
10496 && (GET_MODE_CLASS (mode) == MODE_FLOAT
10497 || (TARGET_64BIT && (mode) == DImode)))
10498 newoffset &= ~(GET_MODE_SIZE (mode) - 1);
10500 if (newoffset != 0 && VAL_14_BITS_P (newoffset))
10502 temp = gen_rtx_PLUS (Pmode, XEXP (new_rtx, 0),
10503 GEN_INT (newoffset));
10504 ad = gen_rtx_PLUS (Pmode, temp, GEN_INT (offset - newoffset));
10505 push_reload (XEXP (ad, 0), 0, &XEXP (ad, 0), 0,
10506 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
10507 opnum, (enum reload_type) type);
10508 return ad;
10512 return NULL_RTX;
10515 /* Output address vector. */
10517 void
10518 pa_output_addr_vec (rtx lab, rtx body)
10520 int idx, vlen = XVECLEN (body, 0);
10522 targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10523 if (TARGET_GAS)
10524 fputs ("\t.begin_brtab\n", asm_out_file);
10525 for (idx = 0; idx < vlen; idx++)
10527 ASM_OUTPUT_ADDR_VEC_ELT
10528 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
10530 if (TARGET_GAS)
10531 fputs ("\t.end_brtab\n", asm_out_file);
10534 /* Output address difference vector. */
10536 void
10537 pa_output_addr_diff_vec (rtx lab, rtx body)
10539 rtx base = XEXP (XEXP (body, 0), 0);
10540 int idx, vlen = XVECLEN (body, 1);
10542 targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10543 if (TARGET_GAS)
10544 fputs ("\t.begin_brtab\n", asm_out_file);
10545 for (idx = 0; idx < vlen; idx++)
10547 ASM_OUTPUT_ADDR_DIFF_ELT
10548 (asm_out_file,
10549 body,
10550 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
10551 CODE_LABEL_NUMBER (base));
10553 if (TARGET_GAS)
10554 fputs ("\t.end_brtab\n", asm_out_file);
10557 #include "gt-pa.h"