Merged trunk at revision 161680 into branch.
[official-gcc.git] / gcc / config / pa / pa.c
blob2640f05e1bcd1ead6c2b4927e370ebe6e92a0dec
1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "rtl.h"
28 #include "regs.h"
29 #include "hard-reg-set.h"
30 #include "insn-config.h"
31 #include "conditions.h"
32 #include "insn-attr.h"
33 #include "flags.h"
34 #include "tree.h"
35 #include "output.h"
36 #include "except.h"
37 #include "expr.h"
38 #include "optabs.h"
39 #include "reload.h"
40 #include "integrate.h"
41 #include "function.h"
42 #include "toplev.h"
43 #include "ggc.h"
44 #include "recog.h"
45 #include "predict.h"
46 #include "tm_p.h"
47 #include "target.h"
48 #include "target-def.h"
49 #include "df.h"
51 /* Return nonzero if there is a bypass for the output of
52 OUT_INSN and the fp store IN_INSN. */
53 int
54 hppa_fpstore_bypass_p (rtx out_insn, rtx in_insn)
56 enum machine_mode store_mode;
57 enum machine_mode other_mode;
58 rtx set;
60 if (recog_memoized (in_insn) < 0
61 || (get_attr_type (in_insn) != TYPE_FPSTORE
62 && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
63 || recog_memoized (out_insn) < 0)
64 return 0;
66 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
68 set = single_set (out_insn);
69 if (!set)
70 return 0;
72 other_mode = GET_MODE (SET_SRC (set));
74 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
78 #ifndef DO_FRAME_NOTES
79 #ifdef INCOMING_RETURN_ADDR_RTX
80 #define DO_FRAME_NOTES 1
81 #else
82 #define DO_FRAME_NOTES 0
83 #endif
84 #endif
86 static void copy_reg_pointer (rtx, rtx);
87 static void fix_range (const char *);
88 static bool pa_handle_option (size_t, const char *, int);
89 static int hppa_address_cost (rtx, bool);
90 static bool hppa_rtx_costs (rtx, int, int, int *, bool);
91 static inline rtx force_mode (enum machine_mode, rtx);
92 static void pa_reorg (void);
93 static void pa_combine_instructions (void);
94 static int pa_can_combine_p (rtx, rtx, rtx, int, rtx, rtx, rtx);
95 static bool forward_branch_p (rtx);
96 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
97 static int compute_movmem_length (rtx);
98 static int compute_clrmem_length (rtx);
99 static bool pa_assemble_integer (rtx, unsigned int, int);
100 static void remove_useless_addtr_insns (int);
101 static void store_reg (int, HOST_WIDE_INT, int);
102 static void store_reg_modify (int, int, HOST_WIDE_INT);
103 static void load_reg (int, HOST_WIDE_INT, int);
104 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
105 static rtx pa_function_value (const_tree, const_tree, bool);
106 static void pa_output_function_prologue (FILE *, HOST_WIDE_INT);
107 static void update_total_code_bytes (unsigned int);
108 static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT);
109 static int pa_adjust_cost (rtx, rtx, rtx, int);
110 static int pa_adjust_priority (rtx, int);
111 static int pa_issue_rate (void);
112 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
113 static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
114 ATTRIBUTE_UNUSED;
115 static void pa_encode_section_info (tree, rtx, int);
116 static const char *pa_strip_name_encoding (const char *);
117 static bool pa_function_ok_for_sibcall (tree, tree);
118 static void pa_globalize_label (FILE *, const char *)
119 ATTRIBUTE_UNUSED;
120 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
121 HOST_WIDE_INT, tree);
122 #if !defined(USE_COLLECT2)
123 static void pa_asm_out_constructor (rtx, int);
124 static void pa_asm_out_destructor (rtx, int);
125 #endif
126 static void pa_init_builtins (void);
127 static rtx hppa_builtin_saveregs (void);
128 static void hppa_va_start (tree, rtx);
129 static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
130 static bool pa_scalar_mode_supported_p (enum machine_mode);
131 static bool pa_commutative_p (const_rtx x, int outer_code);
132 static void copy_fp_args (rtx) ATTRIBUTE_UNUSED;
133 static int length_fp_args (rtx) ATTRIBUTE_UNUSED;
134 static rtx hppa_legitimize_address (rtx, rtx, enum machine_mode);
135 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
136 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
137 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
138 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
139 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
140 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
141 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
142 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
143 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
144 static void output_deferred_plabels (void);
145 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
146 #ifdef ASM_OUTPUT_EXTERNAL_REAL
147 static void pa_hpux_file_end (void);
148 #endif
149 #ifdef HPUX_LONG_DOUBLE_LIBRARY
150 static void pa_hpux_init_libfuncs (void);
151 #endif
152 static rtx pa_struct_value_rtx (tree, int);
153 static bool pa_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
154 const_tree, bool);
155 static int pa_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
156 tree, bool);
157 static struct machine_function * pa_init_machine_status (void);
158 static reg_class_t pa_secondary_reload (bool, rtx, reg_class_t,
159 enum machine_mode,
160 secondary_reload_info *);
161 static void pa_extra_live_on_entry (bitmap);
162 static enum machine_mode pa_promote_function_mode (const_tree,
163 enum machine_mode, int *,
164 const_tree, int);
166 static void pa_asm_trampoline_template (FILE *);
167 static void pa_trampoline_init (rtx, tree, rtx);
168 static rtx pa_trampoline_adjust_address (rtx);
169 static rtx pa_delegitimize_address (rtx);
171 /* The following extra sections are only used for SOM. */
172 static GTY(()) section *som_readonly_data_section;
173 static GTY(()) section *som_one_only_readonly_data_section;
174 static GTY(()) section *som_one_only_data_section;
176 /* Which cpu we are scheduling for. */
177 enum processor_type pa_cpu = TARGET_SCHED_DEFAULT;
179 /* The UNIX standard to use for predefines and linking. */
180 int flag_pa_unix = TARGET_HPUX_11_11 ? 1998 : TARGET_HPUX_10_10 ? 1995 : 1993;
182 /* Counts for the number of callee-saved general and floating point
183 registers which were saved by the current function's prologue. */
184 static int gr_saved, fr_saved;
186 /* Boolean indicating whether the return pointer was saved by the
187 current function's prologue. */
188 static bool rp_saved;
190 static rtx find_addr_reg (rtx);
192 /* Keep track of the number of bytes we have output in the CODE subspace
193 during this compilation so we'll know when to emit inline long-calls. */
194 unsigned long total_code_bytes;
196 /* The last address of the previous function plus the number of bytes in
197 associated thunks that have been output. This is used to determine if
198 a thunk can use an IA-relative branch to reach its target function. */
199 static unsigned int last_address;
201 /* Variables to handle plabels that we discover are necessary at assembly
202 output time. They are output after the current function. */
203 struct GTY(()) deferred_plabel
205 rtx internal_label;
206 rtx symbol;
208 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
209 deferred_plabels;
210 static size_t n_deferred_plabels = 0;
213 /* Initialize the GCC target structure. */
215 #undef TARGET_ASM_ALIGNED_HI_OP
216 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
217 #undef TARGET_ASM_ALIGNED_SI_OP
218 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
219 #undef TARGET_ASM_ALIGNED_DI_OP
220 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
221 #undef TARGET_ASM_UNALIGNED_HI_OP
222 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
223 #undef TARGET_ASM_UNALIGNED_SI_OP
224 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
225 #undef TARGET_ASM_UNALIGNED_DI_OP
226 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
227 #undef TARGET_ASM_INTEGER
228 #define TARGET_ASM_INTEGER pa_assemble_integer
230 #undef TARGET_ASM_FUNCTION_PROLOGUE
231 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
232 #undef TARGET_ASM_FUNCTION_EPILOGUE
233 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
235 #undef TARGET_FUNCTION_VALUE
236 #define TARGET_FUNCTION_VALUE pa_function_value
238 #undef TARGET_LEGITIMIZE_ADDRESS
239 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
241 #undef TARGET_SCHED_ADJUST_COST
242 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
243 #undef TARGET_SCHED_ADJUST_PRIORITY
244 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
245 #undef TARGET_SCHED_ISSUE_RATE
246 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
248 #undef TARGET_ENCODE_SECTION_INFO
249 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
250 #undef TARGET_STRIP_NAME_ENCODING
251 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
253 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
254 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
256 #undef TARGET_COMMUTATIVE_P
257 #define TARGET_COMMUTATIVE_P pa_commutative_p
259 #undef TARGET_ASM_OUTPUT_MI_THUNK
260 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
261 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
262 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
264 #undef TARGET_ASM_FILE_END
265 #ifdef ASM_OUTPUT_EXTERNAL_REAL
266 #define TARGET_ASM_FILE_END pa_hpux_file_end
267 #else
268 #define TARGET_ASM_FILE_END output_deferred_plabels
269 #endif
271 #if !defined(USE_COLLECT2)
272 #undef TARGET_ASM_CONSTRUCTOR
273 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
274 #undef TARGET_ASM_DESTRUCTOR
275 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
276 #endif
278 #undef TARGET_DEFAULT_TARGET_FLAGS
279 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
280 #undef TARGET_HANDLE_OPTION
281 #define TARGET_HANDLE_OPTION pa_handle_option
283 #undef TARGET_INIT_BUILTINS
284 #define TARGET_INIT_BUILTINS pa_init_builtins
286 #undef TARGET_RTX_COSTS
287 #define TARGET_RTX_COSTS hppa_rtx_costs
288 #undef TARGET_ADDRESS_COST
289 #define TARGET_ADDRESS_COST hppa_address_cost
291 #undef TARGET_MACHINE_DEPENDENT_REORG
292 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
294 #ifdef HPUX_LONG_DOUBLE_LIBRARY
295 #undef TARGET_INIT_LIBFUNCS
296 #define TARGET_INIT_LIBFUNCS pa_hpux_init_libfuncs
297 #endif
299 #undef TARGET_PROMOTE_FUNCTION_MODE
300 #define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
301 #undef TARGET_PROMOTE_PROTOTYPES
302 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
304 #undef TARGET_STRUCT_VALUE_RTX
305 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
306 #undef TARGET_RETURN_IN_MEMORY
307 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
308 #undef TARGET_MUST_PASS_IN_STACK
309 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
310 #undef TARGET_PASS_BY_REFERENCE
311 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
312 #undef TARGET_CALLEE_COPIES
313 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true
314 #undef TARGET_ARG_PARTIAL_BYTES
315 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
317 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
318 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
319 #undef TARGET_EXPAND_BUILTIN_VA_START
320 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
321 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
322 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
324 #undef TARGET_SCALAR_MODE_SUPPORTED_P
325 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
327 #undef TARGET_CANNOT_FORCE_CONST_MEM
328 #define TARGET_CANNOT_FORCE_CONST_MEM pa_tls_referenced_p
330 #undef TARGET_SECONDARY_RELOAD
331 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
333 #undef TARGET_EXTRA_LIVE_ON_ENTRY
334 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
336 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
337 #define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
338 #undef TARGET_TRAMPOLINE_INIT
339 #define TARGET_TRAMPOLINE_INIT pa_trampoline_init
340 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
341 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
342 #undef TARGET_DELEGITIMIZE_ADDRESS
343 #define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
345 struct gcc_target targetm = TARGET_INITIALIZER;
347 /* Parse the -mfixed-range= option string. */
349 static void
350 fix_range (const char *const_str)
352 int i, first, last;
353 char *str, *dash, *comma;
355 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
356 REG2 are either register names or register numbers. The effect
357 of this option is to mark the registers in the range from REG1 to
358 REG2 as ``fixed'' so they won't be used by the compiler. This is
359 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */
361 i = strlen (const_str);
362 str = (char *) alloca (i + 1);
363 memcpy (str, const_str, i + 1);
365 while (1)
367 dash = strchr (str, '-');
368 if (!dash)
370 warning (0, "value of -mfixed-range must have form REG1-REG2");
371 return;
373 *dash = '\0';
375 comma = strchr (dash + 1, ',');
376 if (comma)
377 *comma = '\0';
379 first = decode_reg_name (str);
380 if (first < 0)
382 warning (0, "unknown register name: %s", str);
383 return;
386 last = decode_reg_name (dash + 1);
387 if (last < 0)
389 warning (0, "unknown register name: %s", dash + 1);
390 return;
393 *dash = '-';
395 if (first > last)
397 warning (0, "%s-%s is an empty range", str, dash + 1);
398 return;
401 for (i = first; i <= last; ++i)
402 fixed_regs[i] = call_used_regs[i] = 1;
404 if (!comma)
405 break;
407 *comma = ',';
408 str = comma + 1;
411 /* Check if all floating point registers have been fixed. */
412 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
413 if (!fixed_regs[i])
414 break;
416 if (i > FP_REG_LAST)
417 target_flags |= MASK_DISABLE_FPREGS;
420 /* Implement TARGET_HANDLE_OPTION. */
422 static bool
423 pa_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
425 switch (code)
427 case OPT_mnosnake:
428 case OPT_mpa_risc_1_0:
429 case OPT_march_1_0:
430 target_flags &= ~(MASK_PA_11 | MASK_PA_20);
431 return true;
433 case OPT_msnake:
434 case OPT_mpa_risc_1_1:
435 case OPT_march_1_1:
436 target_flags &= ~MASK_PA_20;
437 target_flags |= MASK_PA_11;
438 return true;
440 case OPT_mpa_risc_2_0:
441 case OPT_march_2_0:
442 target_flags |= MASK_PA_11 | MASK_PA_20;
443 return true;
445 case OPT_mschedule_:
446 if (strcmp (arg, "8000") == 0)
447 pa_cpu = PROCESSOR_8000;
448 else if (strcmp (arg, "7100") == 0)
449 pa_cpu = PROCESSOR_7100;
450 else if (strcmp (arg, "700") == 0)
451 pa_cpu = PROCESSOR_700;
452 else if (strcmp (arg, "7100LC") == 0)
453 pa_cpu = PROCESSOR_7100LC;
454 else if (strcmp (arg, "7200") == 0)
455 pa_cpu = PROCESSOR_7200;
456 else if (strcmp (arg, "7300") == 0)
457 pa_cpu = PROCESSOR_7300;
458 else
459 return false;
460 return true;
462 case OPT_mfixed_range_:
463 fix_range (arg);
464 return true;
466 #if TARGET_HPUX
467 case OPT_munix_93:
468 flag_pa_unix = 1993;
469 return true;
470 #endif
472 #if TARGET_HPUX_10_10
473 case OPT_munix_95:
474 flag_pa_unix = 1995;
475 return true;
476 #endif
478 #if TARGET_HPUX_11_11
479 case OPT_munix_98:
480 flag_pa_unix = 1998;
481 return true;
482 #endif
484 default:
485 return true;
489 void
490 override_options (void)
492 /* Unconditional branches in the delay slot are not compatible with dwarf2
493 call frame information. There is no benefit in using this optimization
494 on PA8000 and later processors. */
495 if (pa_cpu >= PROCESSOR_8000
496 || (! USING_SJLJ_EXCEPTIONS && flag_exceptions)
497 || flag_unwind_tables)
498 target_flags &= ~MASK_JUMP_IN_DELAY;
500 if (flag_pic && TARGET_PORTABLE_RUNTIME)
502 warning (0, "PIC code generation is not supported in the portable runtime model");
505 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
507 warning (0, "PIC code generation is not compatible with fast indirect calls");
510 if (! TARGET_GAS && write_symbols != NO_DEBUG)
512 warning (0, "-g is only supported when using GAS on this processor,");
513 warning (0, "-g option disabled");
514 write_symbols = NO_DEBUG;
517 /* We only support the "big PIC" model now. And we always generate PIC
518 code when in 64bit mode. */
519 if (flag_pic == 1 || TARGET_64BIT)
520 flag_pic = 2;
522 /* Disable -freorder-blocks-and-partition as we don't support hot and
523 cold partitioning. */
524 if (flag_reorder_blocks_and_partition)
526 inform (input_location,
527 "-freorder-blocks-and-partition does not work "
528 "on this architecture");
529 flag_reorder_blocks_and_partition = 0;
530 flag_reorder_blocks = 1;
533 /* We can't guarantee that .dword is available for 32-bit targets. */
534 if (UNITS_PER_WORD == 4)
535 targetm.asm_out.aligned_op.di = NULL;
537 /* The unaligned ops are only available when using GAS. */
538 if (!TARGET_GAS)
540 targetm.asm_out.unaligned_op.hi = NULL;
541 targetm.asm_out.unaligned_op.si = NULL;
542 targetm.asm_out.unaligned_op.di = NULL;
545 init_machine_status = pa_init_machine_status;
548 static void
549 pa_init_builtins (void)
551 #ifdef DONT_HAVE_FPUTC_UNLOCKED
552 built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED] =
553 built_in_decls[(int) BUILT_IN_PUTC_UNLOCKED];
554 implicit_built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED]
555 = implicit_built_in_decls[(int) BUILT_IN_PUTC_UNLOCKED];
556 #endif
557 #if TARGET_HPUX_11
558 if (built_in_decls [BUILT_IN_FINITE])
559 set_user_assembler_name (built_in_decls [BUILT_IN_FINITE], "_Isfinite");
560 if (built_in_decls [BUILT_IN_FINITEF])
561 set_user_assembler_name (built_in_decls [BUILT_IN_FINITEF], "_Isfinitef");
562 #endif
565 /* Function to init struct machine_function.
566 This will be called, via a pointer variable,
567 from push_function_context. */
569 static struct machine_function *
570 pa_init_machine_status (void)
572 return ggc_alloc_cleared_machine_function ();
575 /* If FROM is a probable pointer register, mark TO as a probable
576 pointer register with the same pointer alignment as FROM. */
578 static void
579 copy_reg_pointer (rtx to, rtx from)
581 if (REG_POINTER (from))
582 mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
585 /* Return 1 if X contains a symbolic expression. We know these
586 expressions will have one of a few well defined forms, so
587 we need only check those forms. */
589 symbolic_expression_p (rtx x)
592 /* Strip off any HIGH. */
593 if (GET_CODE (x) == HIGH)
594 x = XEXP (x, 0);
596 return (symbolic_operand (x, VOIDmode));
599 /* Accept any constant that can be moved in one instruction into a
600 general register. */
602 cint_ok_for_move (HOST_WIDE_INT ival)
604 /* OK if ldo, ldil, or zdepi, can be used. */
605 return (VAL_14_BITS_P (ival)
606 || ldil_cint_p (ival)
607 || zdepi_cint_p (ival));
610 /* Return truth value of whether OP can be used as an operand in a
611 adddi3 insn. */
613 adddi3_operand (rtx op, enum machine_mode mode)
615 return (register_operand (op, mode)
616 || (GET_CODE (op) == CONST_INT
617 && (TARGET_64BIT ? INT_14_BITS (op) : INT_11_BITS (op))));
620 /* True iff the operand OP can be used as the destination operand of
621 an integer store. This also implies the operand could be used as
622 the source operand of an integer load. Symbolic, lo_sum and indexed
623 memory operands are not allowed. We accept reloading pseudos and
624 other memory operands. */
626 integer_store_memory_operand (rtx op, enum machine_mode mode)
628 return ((reload_in_progress
629 && REG_P (op)
630 && REGNO (op) >= FIRST_PSEUDO_REGISTER
631 && reg_renumber [REGNO (op)] < 0)
632 || (GET_CODE (op) == MEM
633 && (reload_in_progress || memory_address_p (mode, XEXP (op, 0)))
634 && !symbolic_memory_operand (op, VOIDmode)
635 && !IS_LO_SUM_DLT_ADDR_P (XEXP (op, 0))
636 && !IS_INDEX_ADDR_P (XEXP (op, 0))));
639 /* True iff ldil can be used to load this CONST_INT. The least
640 significant 11 bits of the value must be zero and the value must
641 not change sign when extended from 32 to 64 bits. */
643 ldil_cint_p (HOST_WIDE_INT ival)
645 HOST_WIDE_INT x = ival & (((HOST_WIDE_INT) -1 << 31) | 0x7ff);
647 return x == 0 || x == ((HOST_WIDE_INT) -1 << 31);
650 /* True iff zdepi can be used to generate this CONST_INT.
651 zdepi first sign extends a 5-bit signed number to a given field
652 length, then places this field anywhere in a zero. */
654 zdepi_cint_p (unsigned HOST_WIDE_INT x)
656 unsigned HOST_WIDE_INT lsb_mask, t;
658 /* This might not be obvious, but it's at least fast.
659 This function is critical; we don't have the time loops would take. */
660 lsb_mask = x & -x;
661 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
662 /* Return true iff t is a power of two. */
663 return ((t & (t - 1)) == 0);
666 /* True iff depi or extru can be used to compute (reg & mask).
667 Accept bit pattern like these:
668 0....01....1
669 1....10....0
670 1..10..01..1 */
672 and_mask_p (unsigned HOST_WIDE_INT mask)
674 mask = ~mask;
675 mask += mask & -mask;
676 return (mask & (mask - 1)) == 0;
679 /* True iff depi can be used to compute (reg | MASK). */
681 ior_mask_p (unsigned HOST_WIDE_INT mask)
683 mask += mask & -mask;
684 return (mask & (mask - 1)) == 0;
687 /* Legitimize PIC addresses. If the address is already
688 position-independent, we return ORIG. Newly generated
689 position-independent addresses go to REG. If we need more
690 than one register, we lose. */
693 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
695 rtx pic_ref = orig;
697 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
699 /* Labels need special handling. */
700 if (pic_label_operand (orig, mode))
702 rtx insn;
704 /* We do not want to go through the movXX expanders here since that
705 would create recursion.
707 Nor do we really want to call a generator for a named pattern
708 since that requires multiple patterns if we want to support
709 multiple word sizes.
711 So instead we just emit the raw set, which avoids the movXX
712 expanders completely. */
713 mark_reg_pointer (reg, BITS_PER_UNIT);
714 insn = emit_insn (gen_rtx_SET (VOIDmode, reg, orig));
716 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
717 add_reg_note (insn, REG_EQUAL, orig);
719 /* During and after reload, we need to generate a REG_LABEL_OPERAND note
720 and update LABEL_NUSES because this is not done automatically. */
721 if (reload_in_progress || reload_completed)
723 /* Extract LABEL_REF. */
724 if (GET_CODE (orig) == CONST)
725 orig = XEXP (XEXP (orig, 0), 0);
726 /* Extract CODE_LABEL. */
727 orig = XEXP (orig, 0);
728 add_reg_note (insn, REG_LABEL_OPERAND, orig);
729 LABEL_NUSES (orig)++;
731 crtl->uses_pic_offset_table = 1;
732 return reg;
734 if (GET_CODE (orig) == SYMBOL_REF)
736 rtx insn, tmp_reg;
738 gcc_assert (reg);
740 /* Before reload, allocate a temporary register for the intermediate
741 result. This allows the sequence to be deleted when the final
742 result is unused and the insns are trivially dead. */
743 tmp_reg = ((reload_in_progress || reload_completed)
744 ? reg : gen_reg_rtx (Pmode));
746 if (function_label_operand (orig, mode))
748 /* Force function label into memory in word mode. */
749 orig = XEXP (force_const_mem (word_mode, orig), 0);
750 /* Load plabel address from DLT. */
751 emit_move_insn (tmp_reg,
752 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
753 gen_rtx_HIGH (word_mode, orig)));
754 pic_ref
755 = gen_const_mem (Pmode,
756 gen_rtx_LO_SUM (Pmode, tmp_reg,
757 gen_rtx_UNSPEC (Pmode,
758 gen_rtvec (1, orig),
759 UNSPEC_DLTIND14R)));
760 emit_move_insn (reg, pic_ref);
761 /* Now load address of function descriptor. */
762 pic_ref = gen_rtx_MEM (Pmode, reg);
764 else
766 /* Load symbol reference from DLT. */
767 emit_move_insn (tmp_reg,
768 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
769 gen_rtx_HIGH (word_mode, orig)));
770 pic_ref
771 = gen_const_mem (Pmode,
772 gen_rtx_LO_SUM (Pmode, tmp_reg,
773 gen_rtx_UNSPEC (Pmode,
774 gen_rtvec (1, orig),
775 UNSPEC_DLTIND14R)));
778 crtl->uses_pic_offset_table = 1;
779 mark_reg_pointer (reg, BITS_PER_UNIT);
780 insn = emit_move_insn (reg, pic_ref);
782 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
783 set_unique_reg_note (insn, REG_EQUAL, orig);
785 return reg;
787 else if (GET_CODE (orig) == CONST)
789 rtx base;
791 if (GET_CODE (XEXP (orig, 0)) == PLUS
792 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
793 return orig;
795 gcc_assert (reg);
796 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
798 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
799 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
800 base == reg ? 0 : reg);
802 if (GET_CODE (orig) == CONST_INT)
804 if (INT_14_BITS (orig))
805 return plus_constant (base, INTVAL (orig));
806 orig = force_reg (Pmode, orig);
808 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
809 /* Likewise, should we set special REG_NOTEs here? */
812 return pic_ref;
815 static GTY(()) rtx gen_tls_tga;
817 static rtx
818 gen_tls_get_addr (void)
820 if (!gen_tls_tga)
821 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
822 return gen_tls_tga;
825 static rtx
826 hppa_tls_call (rtx arg)
828 rtx ret;
830 ret = gen_reg_rtx (Pmode);
831 emit_library_call_value (gen_tls_get_addr (), ret,
832 LCT_CONST, Pmode, 1, arg, Pmode);
834 return ret;
837 static rtx
838 legitimize_tls_address (rtx addr)
840 rtx ret, insn, tmp, t1, t2, tp;
841 enum tls_model model = SYMBOL_REF_TLS_MODEL (addr);
843 switch (model)
845 case TLS_MODEL_GLOBAL_DYNAMIC:
846 tmp = gen_reg_rtx (Pmode);
847 if (flag_pic)
848 emit_insn (gen_tgd_load_pic (tmp, addr));
849 else
850 emit_insn (gen_tgd_load (tmp, addr));
851 ret = hppa_tls_call (tmp);
852 break;
854 case TLS_MODEL_LOCAL_DYNAMIC:
855 ret = gen_reg_rtx (Pmode);
856 tmp = gen_reg_rtx (Pmode);
857 start_sequence ();
858 if (flag_pic)
859 emit_insn (gen_tld_load_pic (tmp, addr));
860 else
861 emit_insn (gen_tld_load (tmp, addr));
862 t1 = hppa_tls_call (tmp);
863 insn = get_insns ();
864 end_sequence ();
865 t2 = gen_reg_rtx (Pmode);
866 emit_libcall_block (insn, t2, t1,
867 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
868 UNSPEC_TLSLDBASE));
869 emit_insn (gen_tld_offset_load (ret, addr, t2));
870 break;
872 case TLS_MODEL_INITIAL_EXEC:
873 tp = gen_reg_rtx (Pmode);
874 tmp = gen_reg_rtx (Pmode);
875 ret = gen_reg_rtx (Pmode);
876 emit_insn (gen_tp_load (tp));
877 if (flag_pic)
878 emit_insn (gen_tie_load_pic (tmp, addr));
879 else
880 emit_insn (gen_tie_load (tmp, addr));
881 emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
882 break;
884 case TLS_MODEL_LOCAL_EXEC:
885 tp = gen_reg_rtx (Pmode);
886 ret = gen_reg_rtx (Pmode);
887 emit_insn (gen_tp_load (tp));
888 emit_insn (gen_tle_load (ret, addr, tp));
889 break;
891 default:
892 gcc_unreachable ();
895 return ret;
898 /* Try machine-dependent ways of modifying an illegitimate address
899 to be legitimate. If we find one, return the new, valid address.
900 This macro is used in only one place: `memory_address' in explow.c.
902 OLDX is the address as it was before break_out_memory_refs was called.
903 In some cases it is useful to look at this to decide what needs to be done.
905 It is always safe for this macro to do nothing. It exists to recognize
906 opportunities to optimize the output.
908 For the PA, transform:
910 memory(X + <large int>)
912 into:
914 if (<large int> & mask) >= 16
915 Y = (<large int> & ~mask) + mask + 1 Round up.
916 else
917 Y = (<large int> & ~mask) Round down.
918 Z = X + Y
919 memory (Z + (<large int> - Y));
921 This is for CSE to find several similar references, and only use one Z.
923 X can either be a SYMBOL_REF or REG, but because combine cannot
924 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
925 D will not fit in 14 bits.
927 MODE_FLOAT references allow displacements which fit in 5 bits, so use
928 0x1f as the mask.
930 MODE_INT references allow displacements which fit in 14 bits, so use
931 0x3fff as the mask.
933 This relies on the fact that most mode MODE_FLOAT references will use FP
934 registers and most mode MODE_INT references will use integer registers.
935 (In the rare case of an FP register used in an integer MODE, we depend
936 on secondary reloads to clean things up.)
939 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
940 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
941 addressing modes to be used).
943 Put X and Z into registers. Then put the entire expression into
944 a register. */
947 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
948 enum machine_mode mode)
950 rtx orig = x;
952 /* We need to canonicalize the order of operands in unscaled indexed
953 addresses since the code that checks if an address is valid doesn't
954 always try both orders. */
955 if (!TARGET_NO_SPACE_REGS
956 && GET_CODE (x) == PLUS
957 && GET_MODE (x) == Pmode
958 && REG_P (XEXP (x, 0))
959 && REG_P (XEXP (x, 1))
960 && REG_POINTER (XEXP (x, 0))
961 && !REG_POINTER (XEXP (x, 1)))
962 return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
964 if (PA_SYMBOL_REF_TLS_P (x))
965 return legitimize_tls_address (x);
966 else if (flag_pic)
967 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
969 /* Strip off CONST. */
970 if (GET_CODE (x) == CONST)
971 x = XEXP (x, 0);
973 /* Special case. Get the SYMBOL_REF into a register and use indexing.
974 That should always be safe. */
975 if (GET_CODE (x) == PLUS
976 && GET_CODE (XEXP (x, 0)) == REG
977 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
979 rtx reg = force_reg (Pmode, XEXP (x, 1));
980 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
983 /* Note we must reject symbols which represent function addresses
984 since the assembler/linker can't handle arithmetic on plabels. */
985 if (GET_CODE (x) == PLUS
986 && GET_CODE (XEXP (x, 1)) == CONST_INT
987 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
988 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
989 || GET_CODE (XEXP (x, 0)) == REG))
991 rtx int_part, ptr_reg;
992 int newoffset;
993 int offset = INTVAL (XEXP (x, 1));
994 int mask;
996 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
997 ? (INT14_OK_STRICT ? 0x3fff : 0x1f) : 0x3fff);
999 /* Choose which way to round the offset. Round up if we
1000 are >= halfway to the next boundary. */
1001 if ((offset & mask) >= ((mask + 1) / 2))
1002 newoffset = (offset & ~ mask) + mask + 1;
1003 else
1004 newoffset = (offset & ~ mask);
1006 /* If the newoffset will not fit in 14 bits (ldo), then
1007 handling this would take 4 or 5 instructions (2 to load
1008 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1009 add the new offset and the SYMBOL_REF.) Combine can
1010 not handle 4->2 or 5->2 combinations, so do not create
1011 them. */
1012 if (! VAL_14_BITS_P (newoffset)
1013 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1015 rtx const_part = plus_constant (XEXP (x, 0), newoffset);
1016 rtx tmp_reg
1017 = force_reg (Pmode,
1018 gen_rtx_HIGH (Pmode, const_part));
1019 ptr_reg
1020 = force_reg (Pmode,
1021 gen_rtx_LO_SUM (Pmode,
1022 tmp_reg, const_part));
1024 else
1026 if (! VAL_14_BITS_P (newoffset))
1027 int_part = force_reg (Pmode, GEN_INT (newoffset));
1028 else
1029 int_part = GEN_INT (newoffset);
1031 ptr_reg = force_reg (Pmode,
1032 gen_rtx_PLUS (Pmode,
1033 force_reg (Pmode, XEXP (x, 0)),
1034 int_part));
1036 return plus_constant (ptr_reg, offset - newoffset);
1039 /* Handle (plus (mult (a) (shadd_constant)) (b)). */
1041 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
1042 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1043 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
1044 && (OBJECT_P (XEXP (x, 1))
1045 || GET_CODE (XEXP (x, 1)) == SUBREG)
1046 && GET_CODE (XEXP (x, 1)) != CONST)
1048 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1049 rtx reg1, reg2;
1051 reg1 = XEXP (x, 1);
1052 if (GET_CODE (reg1) != REG)
1053 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1055 reg2 = XEXP (XEXP (x, 0), 0);
1056 if (GET_CODE (reg2) != REG)
1057 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1059 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1060 gen_rtx_MULT (Pmode,
1061 reg2,
1062 GEN_INT (val)),
1063 reg1));
1066 /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
1068 Only do so for floating point modes since this is more speculative
1069 and we lose if it's an integer store. */
1070 if (GET_CODE (x) == PLUS
1071 && GET_CODE (XEXP (x, 0)) == PLUS
1072 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
1073 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
1074 && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
1075 && (mode == SFmode || mode == DFmode))
1078 /* First, try and figure out what to use as a base register. */
1079 rtx reg1, reg2, base, idx;
1081 reg1 = XEXP (XEXP (x, 0), 1);
1082 reg2 = XEXP (x, 1);
1083 base = NULL_RTX;
1084 idx = NULL_RTX;
1086 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1087 then emit_move_sequence will turn on REG_POINTER so we'll know
1088 it's a base register below. */
1089 if (GET_CODE (reg1) != REG)
1090 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1092 if (GET_CODE (reg2) != REG)
1093 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1095 /* Figure out what the base and index are. */
1097 if (GET_CODE (reg1) == REG
1098 && REG_POINTER (reg1))
1100 base = reg1;
1101 idx = gen_rtx_PLUS (Pmode,
1102 gen_rtx_MULT (Pmode,
1103 XEXP (XEXP (XEXP (x, 0), 0), 0),
1104 XEXP (XEXP (XEXP (x, 0), 0), 1)),
1105 XEXP (x, 1));
1107 else if (GET_CODE (reg2) == REG
1108 && REG_POINTER (reg2))
1110 base = reg2;
1111 idx = XEXP (x, 0);
1114 if (base == 0)
1115 return orig;
1117 /* If the index adds a large constant, try to scale the
1118 constant so that it can be loaded with only one insn. */
1119 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1120 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1121 / INTVAL (XEXP (XEXP (idx, 0), 1)))
1122 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1124 /* Divide the CONST_INT by the scale factor, then add it to A. */
1125 int val = INTVAL (XEXP (idx, 1));
1127 val /= INTVAL (XEXP (XEXP (idx, 0), 1));
1128 reg1 = XEXP (XEXP (idx, 0), 0);
1129 if (GET_CODE (reg1) != REG)
1130 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1132 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1134 /* We can now generate a simple scaled indexed address. */
1135 return
1136 force_reg
1137 (Pmode, gen_rtx_PLUS (Pmode,
1138 gen_rtx_MULT (Pmode, reg1,
1139 XEXP (XEXP (idx, 0), 1)),
1140 base));
1143 /* If B + C is still a valid base register, then add them. */
1144 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1145 && INTVAL (XEXP (idx, 1)) <= 4096
1146 && INTVAL (XEXP (idx, 1)) >= -4096)
1148 int val = INTVAL (XEXP (XEXP (idx, 0), 1));
1149 rtx reg1, reg2;
1151 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1153 reg2 = XEXP (XEXP (idx, 0), 0);
1154 if (GET_CODE (reg2) != CONST_INT)
1155 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1157 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1158 gen_rtx_MULT (Pmode,
1159 reg2,
1160 GEN_INT (val)),
1161 reg1));
1164 /* Get the index into a register, then add the base + index and
1165 return a register holding the result. */
1167 /* First get A into a register. */
1168 reg1 = XEXP (XEXP (idx, 0), 0);
1169 if (GET_CODE (reg1) != REG)
1170 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1172 /* And get B into a register. */
1173 reg2 = XEXP (idx, 1);
1174 if (GET_CODE (reg2) != REG)
1175 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1177 reg1 = force_reg (Pmode,
1178 gen_rtx_PLUS (Pmode,
1179 gen_rtx_MULT (Pmode, reg1,
1180 XEXP (XEXP (idx, 0), 1)),
1181 reg2));
1183 /* Add the result to our base register and return. */
1184 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1188 /* Uh-oh. We might have an address for x[n-100000]. This needs
1189 special handling to avoid creating an indexed memory address
1190 with x-100000 as the base.
1192 If the constant part is small enough, then it's still safe because
1193 there is a guard page at the beginning and end of the data segment.
1195 Scaled references are common enough that we want to try and rearrange the
1196 terms so that we can use indexing for these addresses too. Only
1197 do the optimization for floatint point modes. */
1199 if (GET_CODE (x) == PLUS
1200 && symbolic_expression_p (XEXP (x, 1)))
1202 /* Ugly. We modify things here so that the address offset specified
1203 by the index expression is computed first, then added to x to form
1204 the entire address. */
1206 rtx regx1, regx2, regy1, regy2, y;
1208 /* Strip off any CONST. */
1209 y = XEXP (x, 1);
1210 if (GET_CODE (y) == CONST)
1211 y = XEXP (y, 0);
1213 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1215 /* See if this looks like
1216 (plus (mult (reg) (shadd_const))
1217 (const (plus (symbol_ref) (const_int))))
1219 Where const_int is small. In that case the const
1220 expression is a valid pointer for indexing.
1222 If const_int is big, but can be divided evenly by shadd_const
1223 and added to (reg). This allows more scaled indexed addresses. */
1224 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1225 && GET_CODE (XEXP (x, 0)) == MULT
1226 && GET_CODE (XEXP (y, 1)) == CONST_INT
1227 && INTVAL (XEXP (y, 1)) >= -4096
1228 && INTVAL (XEXP (y, 1)) <= 4095
1229 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1230 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1232 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1233 rtx reg1, reg2;
1235 reg1 = XEXP (x, 1);
1236 if (GET_CODE (reg1) != REG)
1237 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1239 reg2 = XEXP (XEXP (x, 0), 0);
1240 if (GET_CODE (reg2) != REG)
1241 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1243 return force_reg (Pmode,
1244 gen_rtx_PLUS (Pmode,
1245 gen_rtx_MULT (Pmode,
1246 reg2,
1247 GEN_INT (val)),
1248 reg1));
1250 else if ((mode == DFmode || mode == SFmode)
1251 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1252 && GET_CODE (XEXP (x, 0)) == MULT
1253 && GET_CODE (XEXP (y, 1)) == CONST_INT
1254 && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
1255 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1256 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1258 regx1
1259 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1260 / INTVAL (XEXP (XEXP (x, 0), 1))));
1261 regx2 = XEXP (XEXP (x, 0), 0);
1262 if (GET_CODE (regx2) != REG)
1263 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1264 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1265 regx2, regx1));
1266 return
1267 force_reg (Pmode,
1268 gen_rtx_PLUS (Pmode,
1269 gen_rtx_MULT (Pmode, regx2,
1270 XEXP (XEXP (x, 0), 1)),
1271 force_reg (Pmode, XEXP (y, 0))));
1273 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1274 && INTVAL (XEXP (y, 1)) >= -4096
1275 && INTVAL (XEXP (y, 1)) <= 4095)
1277 /* This is safe because of the guard page at the
1278 beginning and end of the data space. Just
1279 return the original address. */
1280 return orig;
1282 else
1284 /* Doesn't look like one we can optimize. */
1285 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1286 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1287 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1288 regx1 = force_reg (Pmode,
1289 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1290 regx1, regy2));
1291 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1296 return orig;
1299 /* For the HPPA, REG and REG+CONST is cost 0
1300 and addresses involving symbolic constants are cost 2.
1302 PIC addresses are very expensive.
1304 It is no coincidence that this has the same structure
1305 as GO_IF_LEGITIMATE_ADDRESS. */
1307 static int
1308 hppa_address_cost (rtx X,
1309 bool speed ATTRIBUTE_UNUSED)
1311 switch (GET_CODE (X))
1313 case REG:
1314 case PLUS:
1315 case LO_SUM:
1316 return 1;
1317 case HIGH:
1318 return 2;
1319 default:
1320 return 4;
1324 /* Compute a (partial) cost for rtx X. Return true if the complete
1325 cost has been computed, and false if subexpressions should be
1326 scanned. In either case, *TOTAL contains the cost result. */
1328 static bool
1329 hppa_rtx_costs (rtx x, int code, int outer_code, int *total,
1330 bool speed ATTRIBUTE_UNUSED)
1332 switch (code)
1334 case CONST_INT:
1335 if (INTVAL (x) == 0)
1336 *total = 0;
1337 else if (INT_14_BITS (x))
1338 *total = 1;
1339 else
1340 *total = 2;
1341 return true;
1343 case HIGH:
1344 *total = 2;
1345 return true;
1347 case CONST:
1348 case LABEL_REF:
1349 case SYMBOL_REF:
1350 *total = 4;
1351 return true;
1353 case CONST_DOUBLE:
1354 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1355 && outer_code != SET)
1356 *total = 0;
1357 else
1358 *total = 8;
1359 return true;
1361 case MULT:
1362 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1363 *total = COSTS_N_INSNS (3);
1364 else if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1365 *total = COSTS_N_INSNS (8);
1366 else
1367 *total = COSTS_N_INSNS (20);
1368 return true;
1370 case DIV:
1371 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1373 *total = COSTS_N_INSNS (14);
1374 return true;
1376 /* FALLTHRU */
1378 case UDIV:
1379 case MOD:
1380 case UMOD:
1381 *total = COSTS_N_INSNS (60);
1382 return true;
1384 case PLUS: /* this includes shNadd insns */
1385 case MINUS:
1386 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1387 *total = COSTS_N_INSNS (3);
1388 else
1389 *total = COSTS_N_INSNS (1);
1390 return true;
1392 case ASHIFT:
1393 case ASHIFTRT:
1394 case LSHIFTRT:
1395 *total = COSTS_N_INSNS (1);
1396 return true;
1398 default:
1399 return false;
1403 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1404 new rtx with the correct mode. */
1405 static inline rtx
1406 force_mode (enum machine_mode mode, rtx orig)
1408 if (mode == GET_MODE (orig))
1409 return orig;
1411 gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
1413 return gen_rtx_REG (mode, REGNO (orig));
1416 /* Return 1 if *X is a thread-local symbol. */
1418 static int
1419 pa_tls_symbol_ref_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
1421 return PA_SYMBOL_REF_TLS_P (*x);
1424 /* Return 1 if X contains a thread-local symbol. */
1426 bool
1427 pa_tls_referenced_p (rtx x)
1429 if (!TARGET_HAVE_TLS)
1430 return false;
1432 return for_each_rtx (&x, &pa_tls_symbol_ref_1, 0);
1435 /* Emit insns to move operands[1] into operands[0].
1437 Return 1 if we have written out everything that needs to be done to
1438 do the move. Otherwise, return 0 and the caller will emit the move
1439 normally.
1441 Note SCRATCH_REG may not be in the proper mode depending on how it
1442 will be used. This routine is responsible for creating a new copy
1443 of SCRATCH_REG in the proper mode. */
1446 emit_move_sequence (rtx *operands, enum machine_mode mode, rtx scratch_reg)
1448 register rtx operand0 = operands[0];
1449 register rtx operand1 = operands[1];
1450 register rtx tem;
1452 /* We can only handle indexed addresses in the destination operand
1453 of floating point stores. Thus, we need to break out indexed
1454 addresses from the destination operand. */
1455 if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1457 gcc_assert (can_create_pseudo_p ());
1459 tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1460 operand0 = replace_equiv_address (operand0, tem);
1463 /* On targets with non-equivalent space registers, break out unscaled
1464 indexed addresses from the source operand before the final CSE.
1465 We have to do this because the REG_POINTER flag is not correctly
1466 carried through various optimization passes and CSE may substitute
1467 a pseudo without the pointer set for one with the pointer set. As
1468 a result, we loose various opportunities to create insns with
1469 unscaled indexed addresses. */
1470 if (!TARGET_NO_SPACE_REGS
1471 && !cse_not_expected
1472 && GET_CODE (operand1) == MEM
1473 && GET_CODE (XEXP (operand1, 0)) == PLUS
1474 && REG_P (XEXP (XEXP (operand1, 0), 0))
1475 && REG_P (XEXP (XEXP (operand1, 0), 1)))
1476 operand1
1477 = replace_equiv_address (operand1,
1478 copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1480 if (scratch_reg
1481 && reload_in_progress && GET_CODE (operand0) == REG
1482 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1483 operand0 = reg_equiv_mem[REGNO (operand0)];
1484 else if (scratch_reg
1485 && reload_in_progress && GET_CODE (operand0) == SUBREG
1486 && GET_CODE (SUBREG_REG (operand0)) == REG
1487 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1489 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1490 the code which tracks sets/uses for delete_output_reload. */
1491 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1492 reg_equiv_mem [REGNO (SUBREG_REG (operand0))],
1493 SUBREG_BYTE (operand0));
1494 operand0 = alter_subreg (&temp);
1497 if (scratch_reg
1498 && reload_in_progress && GET_CODE (operand1) == REG
1499 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1500 operand1 = reg_equiv_mem[REGNO (operand1)];
1501 else if (scratch_reg
1502 && reload_in_progress && GET_CODE (operand1) == SUBREG
1503 && GET_CODE (SUBREG_REG (operand1)) == REG
1504 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1506 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1507 the code which tracks sets/uses for delete_output_reload. */
1508 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1509 reg_equiv_mem [REGNO (SUBREG_REG (operand1))],
1510 SUBREG_BYTE (operand1));
1511 operand1 = alter_subreg (&temp);
1514 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1515 && ((tem = find_replacement (&XEXP (operand0, 0)))
1516 != XEXP (operand0, 0)))
1517 operand0 = replace_equiv_address (operand0, tem);
1519 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1520 && ((tem = find_replacement (&XEXP (operand1, 0)))
1521 != XEXP (operand1, 0)))
1522 operand1 = replace_equiv_address (operand1, tem);
1524 /* Handle secondary reloads for loads/stores of FP registers from
1525 REG+D addresses where D does not fit in 5 or 14 bits, including
1526 (subreg (mem (addr))) cases. */
1527 if (scratch_reg
1528 && fp_reg_operand (operand0, mode)
1529 && ((GET_CODE (operand1) == MEM
1530 && !memory_address_p ((GET_MODE_SIZE (mode) == 4 ? SFmode : DFmode),
1531 XEXP (operand1, 0)))
1532 || ((GET_CODE (operand1) == SUBREG
1533 && GET_CODE (XEXP (operand1, 0)) == MEM
1534 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1535 ? SFmode : DFmode),
1536 XEXP (XEXP (operand1, 0), 0))))))
1538 if (GET_CODE (operand1) == SUBREG)
1539 operand1 = XEXP (operand1, 0);
1541 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1542 it in WORD_MODE regardless of what mode it was originally given
1543 to us. */
1544 scratch_reg = force_mode (word_mode, scratch_reg);
1546 /* D might not fit in 14 bits either; for such cases load D into
1547 scratch reg. */
1548 if (!memory_address_p (Pmode, XEXP (operand1, 0)))
1550 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1551 emit_move_insn (scratch_reg,
1552 gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
1553 Pmode,
1554 XEXP (XEXP (operand1, 0), 0),
1555 scratch_reg));
1557 else
1558 emit_move_insn (scratch_reg, XEXP (operand1, 0));
1559 emit_insn (gen_rtx_SET (VOIDmode, operand0,
1560 replace_equiv_address (operand1, scratch_reg)));
1561 return 1;
1563 else if (scratch_reg
1564 && fp_reg_operand (operand1, mode)
1565 && ((GET_CODE (operand0) == MEM
1566 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1567 ? SFmode : DFmode),
1568 XEXP (operand0, 0)))
1569 || ((GET_CODE (operand0) == SUBREG)
1570 && GET_CODE (XEXP (operand0, 0)) == MEM
1571 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1572 ? SFmode : DFmode),
1573 XEXP (XEXP (operand0, 0), 0)))))
1575 if (GET_CODE (operand0) == SUBREG)
1576 operand0 = XEXP (operand0, 0);
1578 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1579 it in WORD_MODE regardless of what mode it was originally given
1580 to us. */
1581 scratch_reg = force_mode (word_mode, scratch_reg);
1583 /* D might not fit in 14 bits either; for such cases load D into
1584 scratch reg. */
1585 if (!memory_address_p (Pmode, XEXP (operand0, 0)))
1587 emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
1588 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0,
1589 0)),
1590 Pmode,
1591 XEXP (XEXP (operand0, 0),
1593 scratch_reg));
1595 else
1596 emit_move_insn (scratch_reg, XEXP (operand0, 0));
1597 emit_insn (gen_rtx_SET (VOIDmode,
1598 replace_equiv_address (operand0, scratch_reg),
1599 operand1));
1600 return 1;
1602 /* Handle secondary reloads for loads of FP registers from constant
1603 expressions by forcing the constant into memory.
1605 Use scratch_reg to hold the address of the memory location.
1607 The proper fix is to change PREFERRED_RELOAD_CLASS to return
1608 NO_REGS when presented with a const_int and a register class
1609 containing only FP registers. Doing so unfortunately creates
1610 more problems than it solves. Fix this for 2.5. */
1611 else if (scratch_reg
1612 && CONSTANT_P (operand1)
1613 && fp_reg_operand (operand0, mode))
1615 rtx const_mem, xoperands[2];
1617 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1618 it in WORD_MODE regardless of what mode it was originally given
1619 to us. */
1620 scratch_reg = force_mode (word_mode, scratch_reg);
1622 /* Force the constant into memory and put the address of the
1623 memory location into scratch_reg. */
1624 const_mem = force_const_mem (mode, operand1);
1625 xoperands[0] = scratch_reg;
1626 xoperands[1] = XEXP (const_mem, 0);
1627 emit_move_sequence (xoperands, Pmode, 0);
1629 /* Now load the destination register. */
1630 emit_insn (gen_rtx_SET (mode, operand0,
1631 replace_equiv_address (const_mem, scratch_reg)));
1632 return 1;
1634 /* Handle secondary reloads for SAR. These occur when trying to load
1635 the SAR from memory, FP register, or with a constant. */
1636 else if (scratch_reg
1637 && GET_CODE (operand0) == REG
1638 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1639 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1640 && (GET_CODE (operand1) == MEM
1641 || GET_CODE (operand1) == CONST_INT
1642 || (GET_CODE (operand1) == REG
1643 && FP_REG_CLASS_P (REGNO_REG_CLASS (REGNO (operand1))))))
1645 /* D might not fit in 14 bits either; for such cases load D into
1646 scratch reg. */
1647 if (GET_CODE (operand1) == MEM
1648 && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0)))
1650 /* We are reloading the address into the scratch register, so we
1651 want to make sure the scratch register is a full register. */
1652 scratch_reg = force_mode (word_mode, scratch_reg);
1654 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1655 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1656 0)),
1657 Pmode,
1658 XEXP (XEXP (operand1, 0),
1660 scratch_reg));
1662 /* Now we are going to load the scratch register from memory,
1663 we want to load it in the same width as the original MEM,
1664 which must be the same as the width of the ultimate destination,
1665 OPERAND0. */
1666 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1668 emit_move_insn (scratch_reg,
1669 replace_equiv_address (operand1, scratch_reg));
1671 else
1673 /* We want to load the scratch register using the same mode as
1674 the ultimate destination. */
1675 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1677 emit_move_insn (scratch_reg, operand1);
1680 /* And emit the insn to set the ultimate destination. We know that
1681 the scratch register has the same mode as the destination at this
1682 point. */
1683 emit_move_insn (operand0, scratch_reg);
1684 return 1;
1686 /* Handle the most common case: storing into a register. */
1687 else if (register_operand (operand0, mode))
1689 if (register_operand (operand1, mode)
1690 || (GET_CODE (operand1) == CONST_INT
1691 && cint_ok_for_move (INTVAL (operand1)))
1692 || (operand1 == CONST0_RTX (mode))
1693 || (GET_CODE (operand1) == HIGH
1694 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1695 /* Only `general_operands' can come here, so MEM is ok. */
1696 || GET_CODE (operand1) == MEM)
1698 /* Various sets are created during RTL generation which don't
1699 have the REG_POINTER flag correctly set. After the CSE pass,
1700 instruction recognition can fail if we don't consistently
1701 set this flag when performing register copies. This should
1702 also improve the opportunities for creating insns that use
1703 unscaled indexing. */
1704 if (REG_P (operand0) && REG_P (operand1))
1706 if (REG_POINTER (operand1)
1707 && !REG_POINTER (operand0)
1708 && !HARD_REGISTER_P (operand0))
1709 copy_reg_pointer (operand0, operand1);
1712 /* When MEMs are broken out, the REG_POINTER flag doesn't
1713 get set. In some cases, we can set the REG_POINTER flag
1714 from the declaration for the MEM. */
1715 if (REG_P (operand0)
1716 && GET_CODE (operand1) == MEM
1717 && !REG_POINTER (operand0))
1719 tree decl = MEM_EXPR (operand1);
1721 /* Set the register pointer flag and register alignment
1722 if the declaration for this memory reference is a
1723 pointer type. */
1724 if (decl)
1726 tree type;
1728 /* If this is a COMPONENT_REF, use the FIELD_DECL from
1729 tree operand 1. */
1730 if (TREE_CODE (decl) == COMPONENT_REF)
1731 decl = TREE_OPERAND (decl, 1);
1733 type = TREE_TYPE (decl);
1734 type = strip_array_types (type);
1736 if (POINTER_TYPE_P (type))
1738 int align;
1740 type = TREE_TYPE (type);
1741 /* Using TYPE_ALIGN_OK is rather conservative as
1742 only the ada frontend actually sets it. */
1743 align = (TYPE_ALIGN_OK (type) ? TYPE_ALIGN (type)
1744 : BITS_PER_UNIT);
1745 mark_reg_pointer (operand0, align);
1750 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1751 return 1;
1754 else if (GET_CODE (operand0) == MEM)
1756 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1757 && !(reload_in_progress || reload_completed))
1759 rtx temp = gen_reg_rtx (DFmode);
1761 emit_insn (gen_rtx_SET (VOIDmode, temp, operand1));
1762 emit_insn (gen_rtx_SET (VOIDmode, operand0, temp));
1763 return 1;
1765 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1767 /* Run this case quickly. */
1768 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1769 return 1;
1771 if (! (reload_in_progress || reload_completed))
1773 operands[0] = validize_mem (operand0);
1774 operands[1] = operand1 = force_reg (mode, operand1);
1778 /* Simplify the source if we need to.
1779 Note we do have to handle function labels here, even though we do
1780 not consider them legitimate constants. Loop optimizations can
1781 call the emit_move_xxx with one as a source. */
1782 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1783 || function_label_operand (operand1, mode)
1784 || (GET_CODE (operand1) == HIGH
1785 && symbolic_operand (XEXP (operand1, 0), mode)))
1787 int ishighonly = 0;
1789 if (GET_CODE (operand1) == HIGH)
1791 ishighonly = 1;
1792 operand1 = XEXP (operand1, 0);
1794 if (symbolic_operand (operand1, mode))
1796 /* Argh. The assembler and linker can't handle arithmetic
1797 involving plabels.
1799 So we force the plabel into memory, load operand0 from
1800 the memory location, then add in the constant part. */
1801 if ((GET_CODE (operand1) == CONST
1802 && GET_CODE (XEXP (operand1, 0)) == PLUS
1803 && function_label_operand (XEXP (XEXP (operand1, 0), 0), Pmode))
1804 || function_label_operand (operand1, mode))
1806 rtx temp, const_part;
1808 /* Figure out what (if any) scratch register to use. */
1809 if (reload_in_progress || reload_completed)
1811 scratch_reg = scratch_reg ? scratch_reg : operand0;
1812 /* SCRATCH_REG will hold an address and maybe the actual
1813 data. We want it in WORD_MODE regardless of what mode it
1814 was originally given to us. */
1815 scratch_reg = force_mode (word_mode, scratch_reg);
1817 else if (flag_pic)
1818 scratch_reg = gen_reg_rtx (Pmode);
1820 if (GET_CODE (operand1) == CONST)
1822 /* Save away the constant part of the expression. */
1823 const_part = XEXP (XEXP (operand1, 0), 1);
1824 gcc_assert (GET_CODE (const_part) == CONST_INT);
1826 /* Force the function label into memory. */
1827 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
1829 else
1831 /* No constant part. */
1832 const_part = NULL_RTX;
1834 /* Force the function label into memory. */
1835 temp = force_const_mem (mode, operand1);
1839 /* Get the address of the memory location. PIC-ify it if
1840 necessary. */
1841 temp = XEXP (temp, 0);
1842 if (flag_pic)
1843 temp = legitimize_pic_address (temp, mode, scratch_reg);
1845 /* Put the address of the memory location into our destination
1846 register. */
1847 operands[1] = temp;
1848 emit_move_sequence (operands, mode, scratch_reg);
1850 /* Now load from the memory location into our destination
1851 register. */
1852 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
1853 emit_move_sequence (operands, mode, scratch_reg);
1855 /* And add back in the constant part. */
1856 if (const_part != NULL_RTX)
1857 expand_inc (operand0, const_part);
1859 return 1;
1862 if (flag_pic)
1864 rtx temp;
1866 if (reload_in_progress || reload_completed)
1868 temp = scratch_reg ? scratch_reg : operand0;
1869 /* TEMP will hold an address and maybe the actual
1870 data. We want it in WORD_MODE regardless of what mode it
1871 was originally given to us. */
1872 temp = force_mode (word_mode, temp);
1874 else
1875 temp = gen_reg_rtx (Pmode);
1877 /* (const (plus (symbol) (const_int))) must be forced to
1878 memory during/after reload if the const_int will not fit
1879 in 14 bits. */
1880 if (GET_CODE (operand1) == CONST
1881 && GET_CODE (XEXP (operand1, 0)) == PLUS
1882 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
1883 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
1884 && (reload_completed || reload_in_progress)
1885 && flag_pic)
1887 rtx const_mem = force_const_mem (mode, operand1);
1888 operands[1] = legitimize_pic_address (XEXP (const_mem, 0),
1889 mode, temp);
1890 operands[1] = replace_equiv_address (const_mem, operands[1]);
1891 emit_move_sequence (operands, mode, temp);
1893 else
1895 operands[1] = legitimize_pic_address (operand1, mode, temp);
1896 if (REG_P (operand0) && REG_P (operands[1]))
1897 copy_reg_pointer (operand0, operands[1]);
1898 emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1]));
1901 /* On the HPPA, references to data space are supposed to use dp,
1902 register 27, but showing it in the RTL inhibits various cse
1903 and loop optimizations. */
1904 else
1906 rtx temp, set;
1908 if (reload_in_progress || reload_completed)
1910 temp = scratch_reg ? scratch_reg : operand0;
1911 /* TEMP will hold an address and maybe the actual
1912 data. We want it in WORD_MODE regardless of what mode it
1913 was originally given to us. */
1914 temp = force_mode (word_mode, temp);
1916 else
1917 temp = gen_reg_rtx (mode);
1919 /* Loading a SYMBOL_REF into a register makes that register
1920 safe to be used as the base in an indexed address.
1922 Don't mark hard registers though. That loses. */
1923 if (GET_CODE (operand0) == REG
1924 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1925 mark_reg_pointer (operand0, BITS_PER_UNIT);
1926 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
1927 mark_reg_pointer (temp, BITS_PER_UNIT);
1929 if (ishighonly)
1930 set = gen_rtx_SET (mode, operand0, temp);
1931 else
1932 set = gen_rtx_SET (VOIDmode,
1933 operand0,
1934 gen_rtx_LO_SUM (mode, temp, operand1));
1936 emit_insn (gen_rtx_SET (VOIDmode,
1937 temp,
1938 gen_rtx_HIGH (mode, operand1)));
1939 emit_insn (set);
1942 return 1;
1944 else if (pa_tls_referenced_p (operand1))
1946 rtx tmp = operand1;
1947 rtx addend = NULL;
1949 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
1951 addend = XEXP (XEXP (tmp, 0), 1);
1952 tmp = XEXP (XEXP (tmp, 0), 0);
1955 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
1956 tmp = legitimize_tls_address (tmp);
1957 if (addend)
1959 tmp = gen_rtx_PLUS (mode, tmp, addend);
1960 tmp = force_operand (tmp, operands[0]);
1962 operands[1] = tmp;
1964 else if (GET_CODE (operand1) != CONST_INT
1965 || !cint_ok_for_move (INTVAL (operand1)))
1967 rtx insn, temp;
1968 rtx op1 = operand1;
1969 HOST_WIDE_INT value = 0;
1970 HOST_WIDE_INT insv = 0;
1971 int insert = 0;
1973 if (GET_CODE (operand1) == CONST_INT)
1974 value = INTVAL (operand1);
1976 if (TARGET_64BIT
1977 && GET_CODE (operand1) == CONST_INT
1978 && HOST_BITS_PER_WIDE_INT > 32
1979 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
1981 HOST_WIDE_INT nval;
1983 /* Extract the low order 32 bits of the value and sign extend.
1984 If the new value is the same as the original value, we can
1985 can use the original value as-is. If the new value is
1986 different, we use it and insert the most-significant 32-bits
1987 of the original value into the final result. */
1988 nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
1989 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
1990 if (value != nval)
1992 #if HOST_BITS_PER_WIDE_INT > 32
1993 insv = value >= 0 ? value >> 32 : ~(~value >> 32);
1994 #endif
1995 insert = 1;
1996 value = nval;
1997 operand1 = GEN_INT (nval);
2001 if (reload_in_progress || reload_completed)
2002 temp = scratch_reg ? scratch_reg : operand0;
2003 else
2004 temp = gen_reg_rtx (mode);
2006 /* We don't directly split DImode constants on 32-bit targets
2007 because PLUS uses an 11-bit immediate and the insn sequence
2008 generated is not as efficient as the one using HIGH/LO_SUM. */
2009 if (GET_CODE (operand1) == CONST_INT
2010 && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
2011 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
2012 && !insert)
2014 /* Directly break constant into high and low parts. This
2015 provides better optimization opportunities because various
2016 passes recognize constants split with PLUS but not LO_SUM.
2017 We use a 14-bit signed low part except when the addition
2018 of 0x4000 to the high part might change the sign of the
2019 high part. */
2020 HOST_WIDE_INT low = value & 0x3fff;
2021 HOST_WIDE_INT high = value & ~ 0x3fff;
2023 if (low >= 0x2000)
2025 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
2026 high += 0x2000;
2027 else
2028 high += 0x4000;
2031 low = value - high;
2033 emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (high)));
2034 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
2036 else
2038 emit_insn (gen_rtx_SET (VOIDmode, temp,
2039 gen_rtx_HIGH (mode, operand1)));
2040 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
2043 insn = emit_move_insn (operands[0], operands[1]);
2045 /* Now insert the most significant 32 bits of the value
2046 into the register. When we don't have a second register
2047 available, it could take up to nine instructions to load
2048 a 64-bit integer constant. Prior to reload, we force
2049 constants that would take more than three instructions
2050 to load to the constant pool. During and after reload,
2051 we have to handle all possible values. */
2052 if (insert)
2054 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2055 register and the value to be inserted is outside the
2056 range that can be loaded with three depdi instructions. */
2057 if (temp != operand0 && (insv >= 16384 || insv < -16384))
2059 operand1 = GEN_INT (insv);
2061 emit_insn (gen_rtx_SET (VOIDmode, temp,
2062 gen_rtx_HIGH (mode, operand1)));
2063 emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
2064 emit_insn (gen_insv (operand0, GEN_INT (32),
2065 const0_rtx, temp));
2067 else
2069 int len = 5, pos = 27;
2071 /* Insert the bits using the depdi instruction. */
2072 while (pos >= 0)
2074 HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2075 HOST_WIDE_INT sign = v5 < 0;
2077 /* Left extend the insertion. */
2078 insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2079 while (pos > 0 && (insv & 1) == sign)
2081 insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2082 len += 1;
2083 pos -= 1;
2086 emit_insn (gen_insv (operand0, GEN_INT (len),
2087 GEN_INT (pos), GEN_INT (v5)));
2089 len = pos > 0 && pos < 5 ? pos : 5;
2090 pos -= len;
2095 set_unique_reg_note (insn, REG_EQUAL, op1);
2097 return 1;
2100 /* Now have insn-emit do whatever it normally does. */
2101 return 0;
2104 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2105 it will need a link/runtime reloc). */
2108 reloc_needed (tree exp)
2110 int reloc = 0;
2112 switch (TREE_CODE (exp))
2114 case ADDR_EXPR:
2115 return 1;
2117 case POINTER_PLUS_EXPR:
2118 case PLUS_EXPR:
2119 case MINUS_EXPR:
2120 reloc = reloc_needed (TREE_OPERAND (exp, 0));
2121 reloc |= reloc_needed (TREE_OPERAND (exp, 1));
2122 break;
2124 CASE_CONVERT:
2125 case NON_LVALUE_EXPR:
2126 reloc = reloc_needed (TREE_OPERAND (exp, 0));
2127 break;
2129 case CONSTRUCTOR:
2131 tree value;
2132 unsigned HOST_WIDE_INT ix;
2134 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2135 if (value)
2136 reloc |= reloc_needed (value);
2138 break;
2140 case ERROR_MARK:
2141 break;
2143 default:
2144 break;
2146 return reloc;
2149 /* Does operand (which is a symbolic_operand) live in text space?
2150 If so, SYMBOL_REF_FLAG, which is set by pa_encode_section_info,
2151 will be true. */
2154 read_only_operand (rtx operand, enum machine_mode mode ATTRIBUTE_UNUSED)
2156 if (GET_CODE (operand) == CONST)
2157 operand = XEXP (XEXP (operand, 0), 0);
2158 if (flag_pic)
2160 if (GET_CODE (operand) == SYMBOL_REF)
2161 return SYMBOL_REF_FLAG (operand) && !CONSTANT_POOL_ADDRESS_P (operand);
2163 else
2165 if (GET_CODE (operand) == SYMBOL_REF)
2166 return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand);
2168 return 1;
2172 /* Return the best assembler insn template
2173 for moving operands[1] into operands[0] as a fullword. */
2174 const char *
2175 singlemove_string (rtx *operands)
2177 HOST_WIDE_INT intval;
2179 if (GET_CODE (operands[0]) == MEM)
2180 return "stw %r1,%0";
2181 if (GET_CODE (operands[1]) == MEM)
2182 return "ldw %1,%0";
2183 if (GET_CODE (operands[1]) == CONST_DOUBLE)
2185 long i;
2186 REAL_VALUE_TYPE d;
2188 gcc_assert (GET_MODE (operands[1]) == SFmode);
2190 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2191 bit pattern. */
2192 REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
2193 REAL_VALUE_TO_TARGET_SINGLE (d, i);
2195 operands[1] = GEN_INT (i);
2196 /* Fall through to CONST_INT case. */
2198 if (GET_CODE (operands[1]) == CONST_INT)
2200 intval = INTVAL (operands[1]);
2202 if (VAL_14_BITS_P (intval))
2203 return "ldi %1,%0";
2204 else if ((intval & 0x7ff) == 0)
2205 return "ldil L'%1,%0";
2206 else if (zdepi_cint_p (intval))
2207 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2208 else
2209 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2211 return "copy %1,%0";
2215 /* Compute position (in OP[1]) and width (in OP[2])
2216 useful for copying IMM to a register using the zdepi
2217 instructions. Store the immediate value to insert in OP[0]. */
2218 static void
2219 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2221 int lsb, len;
2223 /* Find the least significant set bit in IMM. */
2224 for (lsb = 0; lsb < 32; lsb++)
2226 if ((imm & 1) != 0)
2227 break;
2228 imm >>= 1;
2231 /* Choose variants based on *sign* of the 5-bit field. */
2232 if ((imm & 0x10) == 0)
2233 len = (lsb <= 28) ? 4 : 32 - lsb;
2234 else
2236 /* Find the width of the bitstring in IMM. */
2237 for (len = 5; len < 32 - lsb; len++)
2239 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2240 break;
2243 /* Sign extend IMM as a 5-bit value. */
2244 imm = (imm & 0xf) - 0x10;
2247 op[0] = imm;
2248 op[1] = 31 - lsb;
2249 op[2] = len;
2252 /* Compute position (in OP[1]) and width (in OP[2])
2253 useful for copying IMM to a register using the depdi,z
2254 instructions. Store the immediate value to insert in OP[0]. */
2255 void
2256 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2258 int lsb, len, maxlen;
2260 maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64);
2262 /* Find the least significant set bit in IMM. */
2263 for (lsb = 0; lsb < maxlen; lsb++)
2265 if ((imm & 1) != 0)
2266 break;
2267 imm >>= 1;
2270 /* Choose variants based on *sign* of the 5-bit field. */
2271 if ((imm & 0x10) == 0)
2272 len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb;
2273 else
2275 /* Find the width of the bitstring in IMM. */
2276 for (len = 5; len < maxlen - lsb; len++)
2278 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2279 break;
2282 /* Extend length if host is narrow and IMM is negative. */
2283 if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb)
2284 len += 32;
2286 /* Sign extend IMM as a 5-bit value. */
2287 imm = (imm & 0xf) - 0x10;
2290 op[0] = imm;
2291 op[1] = 63 - lsb;
2292 op[2] = len;
2295 /* Output assembler code to perform a doubleword move insn
2296 with operands OPERANDS. */
2298 const char *
2299 output_move_double (rtx *operands)
2301 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2302 rtx latehalf[2];
2303 rtx addreg0 = 0, addreg1 = 0;
2305 /* First classify both operands. */
2307 if (REG_P (operands[0]))
2308 optype0 = REGOP;
2309 else if (offsettable_memref_p (operands[0]))
2310 optype0 = OFFSOP;
2311 else if (GET_CODE (operands[0]) == MEM)
2312 optype0 = MEMOP;
2313 else
2314 optype0 = RNDOP;
2316 if (REG_P (operands[1]))
2317 optype1 = REGOP;
2318 else if (CONSTANT_P (operands[1]))
2319 optype1 = CNSTOP;
2320 else if (offsettable_memref_p (operands[1]))
2321 optype1 = OFFSOP;
2322 else if (GET_CODE (operands[1]) == MEM)
2323 optype1 = MEMOP;
2324 else
2325 optype1 = RNDOP;
2327 /* Check for the cases that the operand constraints are not
2328 supposed to allow to happen. */
2329 gcc_assert (optype0 == REGOP || optype1 == REGOP);
2331 /* Handle copies between general and floating registers. */
2333 if (optype0 == REGOP && optype1 == REGOP
2334 && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2336 if (FP_REG_P (operands[0]))
2338 output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2339 output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2340 return "{fldds|fldd} -16(%%sp),%0";
2342 else
2344 output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2345 output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2346 return "{ldws|ldw} -12(%%sp),%R0";
2350 /* Handle auto decrementing and incrementing loads and stores
2351 specifically, since the structure of the function doesn't work
2352 for them without major modification. Do it better when we learn
2353 this port about the general inc/dec addressing of PA.
2354 (This was written by tege. Chide him if it doesn't work.) */
2356 if (optype0 == MEMOP)
2358 /* We have to output the address syntax ourselves, since print_operand
2359 doesn't deal with the addresses we want to use. Fix this later. */
2361 rtx addr = XEXP (operands[0], 0);
2362 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2364 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2366 operands[0] = XEXP (addr, 0);
2367 gcc_assert (GET_CODE (operands[1]) == REG
2368 && GET_CODE (operands[0]) == REG);
2370 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2372 /* No overlap between high target register and address
2373 register. (We do this in a non-obvious way to
2374 save a register file writeback) */
2375 if (GET_CODE (addr) == POST_INC)
2376 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2377 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2379 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2381 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2383 operands[0] = XEXP (addr, 0);
2384 gcc_assert (GET_CODE (operands[1]) == REG
2385 && GET_CODE (operands[0]) == REG);
2387 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2388 /* No overlap between high target register and address
2389 register. (We do this in a non-obvious way to save a
2390 register file writeback) */
2391 if (GET_CODE (addr) == PRE_INC)
2392 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2393 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2396 if (optype1 == MEMOP)
2398 /* We have to output the address syntax ourselves, since print_operand
2399 doesn't deal with the addresses we want to use. Fix this later. */
2401 rtx addr = XEXP (operands[1], 0);
2402 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2404 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2406 operands[1] = XEXP (addr, 0);
2407 gcc_assert (GET_CODE (operands[0]) == REG
2408 && GET_CODE (operands[1]) == REG);
2410 if (!reg_overlap_mentioned_p (high_reg, addr))
2412 /* No overlap between high target register and address
2413 register. (We do this in a non-obvious way to
2414 save a register file writeback) */
2415 if (GET_CODE (addr) == POST_INC)
2416 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2417 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2419 else
2421 /* This is an undefined situation. We should load into the
2422 address register *and* update that register. Probably
2423 we don't need to handle this at all. */
2424 if (GET_CODE (addr) == POST_INC)
2425 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2426 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2429 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2431 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2433 operands[1] = XEXP (addr, 0);
2434 gcc_assert (GET_CODE (operands[0]) == REG
2435 && GET_CODE (operands[1]) == REG);
2437 if (!reg_overlap_mentioned_p (high_reg, addr))
2439 /* No overlap between high target register and address
2440 register. (We do this in a non-obvious way to
2441 save a register file writeback) */
2442 if (GET_CODE (addr) == PRE_INC)
2443 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2444 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2446 else
2448 /* This is an undefined situation. We should load into the
2449 address register *and* update that register. Probably
2450 we don't need to handle this at all. */
2451 if (GET_CODE (addr) == PRE_INC)
2452 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2453 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2456 else if (GET_CODE (addr) == PLUS
2457 && GET_CODE (XEXP (addr, 0)) == MULT)
2459 rtx xoperands[4];
2460 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2462 if (!reg_overlap_mentioned_p (high_reg, addr))
2464 xoperands[0] = high_reg;
2465 xoperands[1] = XEXP (addr, 1);
2466 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2467 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2468 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2469 xoperands);
2470 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2472 else
2474 xoperands[0] = high_reg;
2475 xoperands[1] = XEXP (addr, 1);
2476 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2477 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2478 output_asm_insn ("{sh%O3addl %2,%1,%R0|shladd,l %2,%O3,%1,%R0}",
2479 xoperands);
2480 return "ldw 0(%R0),%0\n\tldw 4(%R0),%R0";
2485 /* If an operand is an unoffsettable memory ref, find a register
2486 we can increment temporarily to make it refer to the second word. */
2488 if (optype0 == MEMOP)
2489 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2491 if (optype1 == MEMOP)
2492 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2494 /* Ok, we can do one word at a time.
2495 Normally we do the low-numbered word first.
2497 In either case, set up in LATEHALF the operands to use
2498 for the high-numbered word and in some cases alter the
2499 operands in OPERANDS to be suitable for the low-numbered word. */
2501 if (optype0 == REGOP)
2502 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2503 else if (optype0 == OFFSOP)
2504 latehalf[0] = adjust_address (operands[0], SImode, 4);
2505 else
2506 latehalf[0] = operands[0];
2508 if (optype1 == REGOP)
2509 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2510 else if (optype1 == OFFSOP)
2511 latehalf[1] = adjust_address (operands[1], SImode, 4);
2512 else if (optype1 == CNSTOP)
2513 split_double (operands[1], &operands[1], &latehalf[1]);
2514 else
2515 latehalf[1] = operands[1];
2517 /* If the first move would clobber the source of the second one,
2518 do them in the other order.
2520 This can happen in two cases:
2522 mem -> register where the first half of the destination register
2523 is the same register used in the memory's address. Reload
2524 can create such insns.
2526 mem in this case will be either register indirect or register
2527 indirect plus a valid offset.
2529 register -> register move where REGNO(dst) == REGNO(src + 1)
2530 someone (Tim/Tege?) claimed this can happen for parameter loads.
2532 Handle mem -> register case first. */
2533 if (optype0 == REGOP
2534 && (optype1 == MEMOP || optype1 == OFFSOP)
2535 && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
2536 operands[1], 0))
2538 /* Do the late half first. */
2539 if (addreg1)
2540 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2541 output_asm_insn (singlemove_string (latehalf), latehalf);
2543 /* Then clobber. */
2544 if (addreg1)
2545 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2546 return singlemove_string (operands);
2549 /* Now handle register -> register case. */
2550 if (optype0 == REGOP && optype1 == REGOP
2551 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2553 output_asm_insn (singlemove_string (latehalf), latehalf);
2554 return singlemove_string (operands);
2557 /* Normal case: do the two words, low-numbered first. */
2559 output_asm_insn (singlemove_string (operands), operands);
2561 /* Make any unoffsettable addresses point at high-numbered word. */
2562 if (addreg0)
2563 output_asm_insn ("ldo 4(%0),%0", &addreg0);
2564 if (addreg1)
2565 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2567 /* Do that word. */
2568 output_asm_insn (singlemove_string (latehalf), latehalf);
2570 /* Undo the adds we just did. */
2571 if (addreg0)
2572 output_asm_insn ("ldo -4(%0),%0", &addreg0);
2573 if (addreg1)
2574 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2576 return "";
2579 const char *
2580 output_fp_move_double (rtx *operands)
2582 if (FP_REG_P (operands[0]))
2584 if (FP_REG_P (operands[1])
2585 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2586 output_asm_insn ("fcpy,dbl %f1,%0", operands);
2587 else
2588 output_asm_insn ("fldd%F1 %1,%0", operands);
2590 else if (FP_REG_P (operands[1]))
2592 output_asm_insn ("fstd%F0 %1,%0", operands);
2594 else
2596 rtx xoperands[2];
2598 gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2600 /* This is a pain. You have to be prepared to deal with an
2601 arbitrary address here including pre/post increment/decrement.
2603 so avoid this in the MD. */
2604 gcc_assert (GET_CODE (operands[0]) == REG);
2606 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2607 xoperands[0] = operands[0];
2608 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2610 return "";
2613 /* Return a REG that occurs in ADDR with coefficient 1.
2614 ADDR can be effectively incremented by incrementing REG. */
2616 static rtx
2617 find_addr_reg (rtx addr)
2619 while (GET_CODE (addr) == PLUS)
2621 if (GET_CODE (XEXP (addr, 0)) == REG)
2622 addr = XEXP (addr, 0);
2623 else if (GET_CODE (XEXP (addr, 1)) == REG)
2624 addr = XEXP (addr, 1);
2625 else if (CONSTANT_P (XEXP (addr, 0)))
2626 addr = XEXP (addr, 1);
2627 else if (CONSTANT_P (XEXP (addr, 1)))
2628 addr = XEXP (addr, 0);
2629 else
2630 gcc_unreachable ();
2632 gcc_assert (GET_CODE (addr) == REG);
2633 return addr;
2636 /* Emit code to perform a block move.
2638 OPERANDS[0] is the destination pointer as a REG, clobbered.
2639 OPERANDS[1] is the source pointer as a REG, clobbered.
2640 OPERANDS[2] is a register for temporary storage.
2641 OPERANDS[3] is a register for temporary storage.
2642 OPERANDS[4] is the size as a CONST_INT
2643 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2644 OPERANDS[6] is another temporary register. */
2646 const char *
2647 output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2649 int align = INTVAL (operands[5]);
2650 unsigned long n_bytes = INTVAL (operands[4]);
2652 /* We can't move more than a word at a time because the PA
2653 has no longer integer move insns. (Could use fp mem ops?) */
2654 if (align > (TARGET_64BIT ? 8 : 4))
2655 align = (TARGET_64BIT ? 8 : 4);
2657 /* Note that we know each loop below will execute at least twice
2658 (else we would have open-coded the copy). */
2659 switch (align)
2661 case 8:
2662 /* Pre-adjust the loop counter. */
2663 operands[4] = GEN_INT (n_bytes - 16);
2664 output_asm_insn ("ldi %4,%2", operands);
2666 /* Copying loop. */
2667 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2668 output_asm_insn ("ldd,ma 8(%1),%6", operands);
2669 output_asm_insn ("std,ma %3,8(%0)", operands);
2670 output_asm_insn ("addib,>= -16,%2,.-12", operands);
2671 output_asm_insn ("std,ma %6,8(%0)", operands);
2673 /* Handle the residual. There could be up to 7 bytes of
2674 residual to copy! */
2675 if (n_bytes % 16 != 0)
2677 operands[4] = GEN_INT (n_bytes % 8);
2678 if (n_bytes % 16 >= 8)
2679 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2680 if (n_bytes % 8 != 0)
2681 output_asm_insn ("ldd 0(%1),%6", operands);
2682 if (n_bytes % 16 >= 8)
2683 output_asm_insn ("std,ma %3,8(%0)", operands);
2684 if (n_bytes % 8 != 0)
2685 output_asm_insn ("stdby,e %6,%4(%0)", operands);
2687 return "";
2689 case 4:
2690 /* Pre-adjust the loop counter. */
2691 operands[4] = GEN_INT (n_bytes - 8);
2692 output_asm_insn ("ldi %4,%2", operands);
2694 /* Copying loop. */
2695 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2696 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2697 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2698 output_asm_insn ("addib,>= -8,%2,.-12", operands);
2699 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2701 /* Handle the residual. There could be up to 7 bytes of
2702 residual to copy! */
2703 if (n_bytes % 8 != 0)
2705 operands[4] = GEN_INT (n_bytes % 4);
2706 if (n_bytes % 8 >= 4)
2707 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2708 if (n_bytes % 4 != 0)
2709 output_asm_insn ("ldw 0(%1),%6", operands);
2710 if (n_bytes % 8 >= 4)
2711 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2712 if (n_bytes % 4 != 0)
2713 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2715 return "";
2717 case 2:
2718 /* Pre-adjust the loop counter. */
2719 operands[4] = GEN_INT (n_bytes - 4);
2720 output_asm_insn ("ldi %4,%2", operands);
2722 /* Copying loop. */
2723 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2724 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2725 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2726 output_asm_insn ("addib,>= -4,%2,.-12", operands);
2727 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2729 /* Handle the residual. */
2730 if (n_bytes % 4 != 0)
2732 if (n_bytes % 4 >= 2)
2733 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2734 if (n_bytes % 2 != 0)
2735 output_asm_insn ("ldb 0(%1),%6", operands);
2736 if (n_bytes % 4 >= 2)
2737 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2738 if (n_bytes % 2 != 0)
2739 output_asm_insn ("stb %6,0(%0)", operands);
2741 return "";
2743 case 1:
2744 /* Pre-adjust the loop counter. */
2745 operands[4] = GEN_INT (n_bytes - 2);
2746 output_asm_insn ("ldi %4,%2", operands);
2748 /* Copying loop. */
2749 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2750 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2751 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2752 output_asm_insn ("addib,>= -2,%2,.-12", operands);
2753 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2755 /* Handle the residual. */
2756 if (n_bytes % 2 != 0)
2758 output_asm_insn ("ldb 0(%1),%3", operands);
2759 output_asm_insn ("stb %3,0(%0)", operands);
2761 return "";
2763 default:
2764 gcc_unreachable ();
2768 /* Count the number of insns necessary to handle this block move.
2770 Basic structure is the same as emit_block_move, except that we
2771 count insns rather than emit them. */
2773 static int
2774 compute_movmem_length (rtx insn)
2776 rtx pat = PATTERN (insn);
2777 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2778 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2779 unsigned int n_insns = 0;
2781 /* We can't move more than four bytes at a time because the PA
2782 has no longer integer move insns. (Could use fp mem ops?) */
2783 if (align > (TARGET_64BIT ? 8 : 4))
2784 align = (TARGET_64BIT ? 8 : 4);
2786 /* The basic copying loop. */
2787 n_insns = 6;
2789 /* Residuals. */
2790 if (n_bytes % (2 * align) != 0)
2792 if ((n_bytes % (2 * align)) >= align)
2793 n_insns += 2;
2795 if ((n_bytes % align) != 0)
2796 n_insns += 2;
2799 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2800 return n_insns * 4;
2803 /* Emit code to perform a block clear.
2805 OPERANDS[0] is the destination pointer as a REG, clobbered.
2806 OPERANDS[1] is a register for temporary storage.
2807 OPERANDS[2] is the size as a CONST_INT
2808 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
2810 const char *
2811 output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2813 int align = INTVAL (operands[3]);
2814 unsigned long n_bytes = INTVAL (operands[2]);
2816 /* We can't clear more than a word at a time because the PA
2817 has no longer integer move insns. */
2818 if (align > (TARGET_64BIT ? 8 : 4))
2819 align = (TARGET_64BIT ? 8 : 4);
2821 /* Note that we know each loop below will execute at least twice
2822 (else we would have open-coded the copy). */
2823 switch (align)
2825 case 8:
2826 /* Pre-adjust the loop counter. */
2827 operands[2] = GEN_INT (n_bytes - 16);
2828 output_asm_insn ("ldi %2,%1", operands);
2830 /* Loop. */
2831 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2832 output_asm_insn ("addib,>= -16,%1,.-4", operands);
2833 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2835 /* Handle the residual. There could be up to 7 bytes of
2836 residual to copy! */
2837 if (n_bytes % 16 != 0)
2839 operands[2] = GEN_INT (n_bytes % 8);
2840 if (n_bytes % 16 >= 8)
2841 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2842 if (n_bytes % 8 != 0)
2843 output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
2845 return "";
2847 case 4:
2848 /* Pre-adjust the loop counter. */
2849 operands[2] = GEN_INT (n_bytes - 8);
2850 output_asm_insn ("ldi %2,%1", operands);
2852 /* Loop. */
2853 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2854 output_asm_insn ("addib,>= -8,%1,.-4", operands);
2855 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2857 /* Handle the residual. There could be up to 7 bytes of
2858 residual to copy! */
2859 if (n_bytes % 8 != 0)
2861 operands[2] = GEN_INT (n_bytes % 4);
2862 if (n_bytes % 8 >= 4)
2863 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2864 if (n_bytes % 4 != 0)
2865 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
2867 return "";
2869 case 2:
2870 /* Pre-adjust the loop counter. */
2871 operands[2] = GEN_INT (n_bytes - 4);
2872 output_asm_insn ("ldi %2,%1", operands);
2874 /* Loop. */
2875 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2876 output_asm_insn ("addib,>= -4,%1,.-4", operands);
2877 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2879 /* Handle the residual. */
2880 if (n_bytes % 4 != 0)
2882 if (n_bytes % 4 >= 2)
2883 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2884 if (n_bytes % 2 != 0)
2885 output_asm_insn ("stb %%r0,0(%0)", operands);
2887 return "";
2889 case 1:
2890 /* Pre-adjust the loop counter. */
2891 operands[2] = GEN_INT (n_bytes - 2);
2892 output_asm_insn ("ldi %2,%1", operands);
2894 /* Loop. */
2895 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2896 output_asm_insn ("addib,>= -2,%1,.-4", operands);
2897 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2899 /* Handle the residual. */
2900 if (n_bytes % 2 != 0)
2901 output_asm_insn ("stb %%r0,0(%0)", operands);
2903 return "";
2905 default:
2906 gcc_unreachable ();
2910 /* Count the number of insns necessary to handle this block move.
2912 Basic structure is the same as emit_block_move, except that we
2913 count insns rather than emit them. */
2915 static int
2916 compute_clrmem_length (rtx insn)
2918 rtx pat = PATTERN (insn);
2919 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
2920 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
2921 unsigned int n_insns = 0;
2923 /* We can't clear more than a word at a time because the PA
2924 has no longer integer move insns. */
2925 if (align > (TARGET_64BIT ? 8 : 4))
2926 align = (TARGET_64BIT ? 8 : 4);
2928 /* The basic loop. */
2929 n_insns = 4;
2931 /* Residuals. */
2932 if (n_bytes % (2 * align) != 0)
2934 if ((n_bytes % (2 * align)) >= align)
2935 n_insns++;
2937 if ((n_bytes % align) != 0)
2938 n_insns++;
2941 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2942 return n_insns * 4;
2946 const char *
2947 output_and (rtx *operands)
2949 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2951 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2952 int ls0, ls1, ms0, p, len;
2954 for (ls0 = 0; ls0 < 32; ls0++)
2955 if ((mask & (1 << ls0)) == 0)
2956 break;
2958 for (ls1 = ls0; ls1 < 32; ls1++)
2959 if ((mask & (1 << ls1)) != 0)
2960 break;
2962 for (ms0 = ls1; ms0 < 32; ms0++)
2963 if ((mask & (1 << ms0)) == 0)
2964 break;
2966 gcc_assert (ms0 == 32);
2968 if (ls1 == 32)
2970 len = ls0;
2972 gcc_assert (len);
2974 operands[2] = GEN_INT (len);
2975 return "{extru|extrw,u} %1,31,%2,%0";
2977 else
2979 /* We could use this `depi' for the case above as well, but `depi'
2980 requires one more register file access than an `extru'. */
2982 p = 31 - ls0;
2983 len = ls1 - ls0;
2985 operands[2] = GEN_INT (p);
2986 operands[3] = GEN_INT (len);
2987 return "{depi|depwi} 0,%2,%3,%0";
2990 else
2991 return "and %1,%2,%0";
2994 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
2995 storing the result in operands[0]. */
2996 const char *
2997 output_64bit_and (rtx *operands)
2999 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3001 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3002 int ls0, ls1, ms0, p, len;
3004 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
3005 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
3006 break;
3008 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
3009 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
3010 break;
3012 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
3013 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
3014 break;
3016 gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
3018 if (ls1 == HOST_BITS_PER_WIDE_INT)
3020 len = ls0;
3022 gcc_assert (len);
3024 operands[2] = GEN_INT (len);
3025 return "extrd,u %1,63,%2,%0";
3027 else
3029 /* We could use this `depi' for the case above as well, but `depi'
3030 requires one more register file access than an `extru'. */
3032 p = 63 - ls0;
3033 len = ls1 - ls0;
3035 operands[2] = GEN_INT (p);
3036 operands[3] = GEN_INT (len);
3037 return "depdi 0,%2,%3,%0";
3040 else
3041 return "and %1,%2,%0";
3044 const char *
3045 output_ior (rtx *operands)
3047 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3048 int bs0, bs1, p, len;
3050 if (INTVAL (operands[2]) == 0)
3051 return "copy %1,%0";
3053 for (bs0 = 0; bs0 < 32; bs0++)
3054 if ((mask & (1 << bs0)) != 0)
3055 break;
3057 for (bs1 = bs0; bs1 < 32; bs1++)
3058 if ((mask & (1 << bs1)) == 0)
3059 break;
3061 gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3063 p = 31 - bs0;
3064 len = bs1 - bs0;
3066 operands[2] = GEN_INT (p);
3067 operands[3] = GEN_INT (len);
3068 return "{depi|depwi} -1,%2,%3,%0";
3071 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3072 storing the result in operands[0]. */
3073 const char *
3074 output_64bit_ior (rtx *operands)
3076 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3077 int bs0, bs1, p, len;
3079 if (INTVAL (operands[2]) == 0)
3080 return "copy %1,%0";
3082 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
3083 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
3084 break;
3086 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
3087 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
3088 break;
3090 gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
3091 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3093 p = 63 - bs0;
3094 len = bs1 - bs0;
3096 operands[2] = GEN_INT (p);
3097 operands[3] = GEN_INT (len);
3098 return "depdi -1,%2,%3,%0";
3101 /* Target hook for assembling integer objects. This code handles
3102 aligned SI and DI integers specially since function references
3103 must be preceded by P%. */
3105 static bool
3106 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
3108 if (size == UNITS_PER_WORD
3109 && aligned_p
3110 && function_label_operand (x, VOIDmode))
3112 fputs (size == 8? "\t.dword\tP%" : "\t.word\tP%", asm_out_file);
3113 output_addr_const (asm_out_file, x);
3114 fputc ('\n', asm_out_file);
3115 return true;
3117 return default_assemble_integer (x, size, aligned_p);
3120 /* Output an ascii string. */
3121 void
3122 output_ascii (FILE *file, const char *p, int size)
3124 int i;
3125 int chars_output;
3126 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
3128 /* The HP assembler can only take strings of 256 characters at one
3129 time. This is a limitation on input line length, *not* the
3130 length of the string. Sigh. Even worse, it seems that the
3131 restriction is in number of input characters (see \xnn &
3132 \whatever). So we have to do this very carefully. */
3134 fputs ("\t.STRING \"", file);
3136 chars_output = 0;
3137 for (i = 0; i < size; i += 4)
3139 int co = 0;
3140 int io = 0;
3141 for (io = 0, co = 0; io < MIN (4, size - i); io++)
3143 register unsigned int c = (unsigned char) p[i + io];
3145 if (c == '\"' || c == '\\')
3146 partial_output[co++] = '\\';
3147 if (c >= ' ' && c < 0177)
3148 partial_output[co++] = c;
3149 else
3151 unsigned int hexd;
3152 partial_output[co++] = '\\';
3153 partial_output[co++] = 'x';
3154 hexd = c / 16 - 0 + '0';
3155 if (hexd > '9')
3156 hexd -= '9' - 'a' + 1;
3157 partial_output[co++] = hexd;
3158 hexd = c % 16 - 0 + '0';
3159 if (hexd > '9')
3160 hexd -= '9' - 'a' + 1;
3161 partial_output[co++] = hexd;
3164 if (chars_output + co > 243)
3166 fputs ("\"\n\t.STRING \"", file);
3167 chars_output = 0;
3169 fwrite (partial_output, 1, (size_t) co, file);
3170 chars_output += co;
3171 co = 0;
3173 fputs ("\"\n", file);
3176 /* Try to rewrite floating point comparisons & branches to avoid
3177 useless add,tr insns.
3179 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3180 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3181 first attempt to remove useless add,tr insns. It is zero
3182 for the second pass as reorg sometimes leaves bogus REG_DEAD
3183 notes lying around.
3185 When CHECK_NOTES is zero we can only eliminate add,tr insns
3186 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3187 instructions. */
3188 static void
3189 remove_useless_addtr_insns (int check_notes)
3191 rtx insn;
3192 static int pass = 0;
3194 /* This is fairly cheap, so always run it when optimizing. */
3195 if (optimize > 0)
3197 int fcmp_count = 0;
3198 int fbranch_count = 0;
3200 /* Walk all the insns in this function looking for fcmp & fbranch
3201 instructions. Keep track of how many of each we find. */
3202 for (insn = get_insns (); insn; insn = next_insn (insn))
3204 rtx tmp;
3206 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
3207 if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
3208 continue;
3210 tmp = PATTERN (insn);
3212 /* It must be a set. */
3213 if (GET_CODE (tmp) != SET)
3214 continue;
3216 /* If the destination is CCFP, then we've found an fcmp insn. */
3217 tmp = SET_DEST (tmp);
3218 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3220 fcmp_count++;
3221 continue;
3224 tmp = PATTERN (insn);
3225 /* If this is an fbranch instruction, bump the fbranch counter. */
3226 if (GET_CODE (tmp) == SET
3227 && SET_DEST (tmp) == pc_rtx
3228 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3229 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3230 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3231 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3233 fbranch_count++;
3234 continue;
3239 /* Find all floating point compare + branch insns. If possible,
3240 reverse the comparison & the branch to avoid add,tr insns. */
3241 for (insn = get_insns (); insn; insn = next_insn (insn))
3243 rtx tmp, next;
3245 /* Ignore anything that isn't an INSN. */
3246 if (GET_CODE (insn) != INSN)
3247 continue;
3249 tmp = PATTERN (insn);
3251 /* It must be a set. */
3252 if (GET_CODE (tmp) != SET)
3253 continue;
3255 /* The destination must be CCFP, which is register zero. */
3256 tmp = SET_DEST (tmp);
3257 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3258 continue;
3260 /* INSN should be a set of CCFP.
3262 See if the result of this insn is used in a reversed FP
3263 conditional branch. If so, reverse our condition and
3264 the branch. Doing so avoids useless add,tr insns. */
3265 next = next_insn (insn);
3266 while (next)
3268 /* Jumps, calls and labels stop our search. */
3269 if (GET_CODE (next) == JUMP_INSN
3270 || GET_CODE (next) == CALL_INSN
3271 || GET_CODE (next) == CODE_LABEL)
3272 break;
3274 /* As does another fcmp insn. */
3275 if (GET_CODE (next) == INSN
3276 && GET_CODE (PATTERN (next)) == SET
3277 && GET_CODE (SET_DEST (PATTERN (next))) == REG
3278 && REGNO (SET_DEST (PATTERN (next))) == 0)
3279 break;
3281 next = next_insn (next);
3284 /* Is NEXT_INSN a branch? */
3285 if (next
3286 && GET_CODE (next) == JUMP_INSN)
3288 rtx pattern = PATTERN (next);
3290 /* If it a reversed fp conditional branch (e.g. uses add,tr)
3291 and CCFP dies, then reverse our conditional and the branch
3292 to avoid the add,tr. */
3293 if (GET_CODE (pattern) == SET
3294 && SET_DEST (pattern) == pc_rtx
3295 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3296 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3297 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3298 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3299 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3300 && (fcmp_count == fbranch_count
3301 || (check_notes
3302 && find_regno_note (next, REG_DEAD, 0))))
3304 /* Reverse the branch. */
3305 tmp = XEXP (SET_SRC (pattern), 1);
3306 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3307 XEXP (SET_SRC (pattern), 2) = tmp;
3308 INSN_CODE (next) = -1;
3310 /* Reverse our condition. */
3311 tmp = PATTERN (insn);
3312 PUT_CODE (XEXP (tmp, 1),
3313 (reverse_condition_maybe_unordered
3314 (GET_CODE (XEXP (tmp, 1)))));
3320 pass = !pass;
3324 /* You may have trouble believing this, but this is the 32 bit HP-PA
3325 stack layout. Wow.
3327 Offset Contents
3329 Variable arguments (optional; any number may be allocated)
3331 SP-(4*(N+9)) arg word N
3333 SP-56 arg word 5
3334 SP-52 arg word 4
3336 Fixed arguments (must be allocated; may remain unused)
3338 SP-48 arg word 3
3339 SP-44 arg word 2
3340 SP-40 arg word 1
3341 SP-36 arg word 0
3343 Frame Marker
3345 SP-32 External Data Pointer (DP)
3346 SP-28 External sr4
3347 SP-24 External/stub RP (RP')
3348 SP-20 Current RP
3349 SP-16 Static Link
3350 SP-12 Clean up
3351 SP-8 Calling Stub RP (RP'')
3352 SP-4 Previous SP
3354 Top of Frame
3356 SP-0 Stack Pointer (points to next available address)
3360 /* This function saves registers as follows. Registers marked with ' are
3361 this function's registers (as opposed to the previous function's).
3362 If a frame_pointer isn't needed, r4 is saved as a general register;
3363 the space for the frame pointer is still allocated, though, to keep
3364 things simple.
3367 Top of Frame
3369 SP (FP') Previous FP
3370 SP + 4 Alignment filler (sigh)
3371 SP + 8 Space for locals reserved here.
3375 SP + n All call saved register used.
3379 SP + o All call saved fp registers used.
3383 SP + p (SP') points to next available address.
3387 /* Global variables set by output_function_prologue(). */
3388 /* Size of frame. Need to know this to emit return insns from
3389 leaf procedures. */
3390 static HOST_WIDE_INT actual_fsize, local_fsize;
3391 static int save_fregs;
3393 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3394 Handle case where DISP > 8k by using the add_high_const patterns.
3396 Note in DISP > 8k case, we will leave the high part of the address
3397 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3399 static void
3400 store_reg (int reg, HOST_WIDE_INT disp, int base)
3402 rtx insn, dest, src, basereg;
3404 src = gen_rtx_REG (word_mode, reg);
3405 basereg = gen_rtx_REG (Pmode, base);
3406 if (VAL_14_BITS_P (disp))
3408 dest = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3409 insn = emit_move_insn (dest, src);
3411 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3413 rtx delta = GEN_INT (disp);
3414 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3416 emit_move_insn (tmpreg, delta);
3417 insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3418 if (DO_FRAME_NOTES)
3420 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3421 gen_rtx_SET (VOIDmode, tmpreg,
3422 gen_rtx_PLUS (Pmode, basereg, delta)));
3423 RTX_FRAME_RELATED_P (insn) = 1;
3425 dest = gen_rtx_MEM (word_mode, tmpreg);
3426 insn = emit_move_insn (dest, src);
3428 else
3430 rtx delta = GEN_INT (disp);
3431 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3432 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3434 emit_move_insn (tmpreg, high);
3435 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3436 insn = emit_move_insn (dest, src);
3437 if (DO_FRAME_NOTES)
3438 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3439 gen_rtx_SET (VOIDmode,
3440 gen_rtx_MEM (word_mode,
3441 gen_rtx_PLUS (word_mode,
3442 basereg,
3443 delta)),
3444 src));
3447 if (DO_FRAME_NOTES)
3448 RTX_FRAME_RELATED_P (insn) = 1;
3451 /* Emit RTL to store REG at the memory location specified by BASE and then
3452 add MOD to BASE. MOD must be <= 8k. */
3454 static void
3455 store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3457 rtx insn, basereg, srcreg, delta;
3459 gcc_assert (VAL_14_BITS_P (mod));
3461 basereg = gen_rtx_REG (Pmode, base);
3462 srcreg = gen_rtx_REG (word_mode, reg);
3463 delta = GEN_INT (mod);
3465 insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3466 if (DO_FRAME_NOTES)
3468 RTX_FRAME_RELATED_P (insn) = 1;
3470 /* RTX_FRAME_RELATED_P must be set on each frame related set
3471 in a parallel with more than one element. */
3472 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3473 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3477 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3478 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3479 whether to add a frame note or not.
3481 In the DISP > 8k case, we leave the high part of the address in %r1.
3482 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3484 static void
3485 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3487 rtx insn;
3489 if (VAL_14_BITS_P (disp))
3491 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3492 plus_constant (gen_rtx_REG (Pmode, base), disp));
3494 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3496 rtx basereg = gen_rtx_REG (Pmode, base);
3497 rtx delta = GEN_INT (disp);
3498 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3500 emit_move_insn (tmpreg, delta);
3501 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3502 gen_rtx_PLUS (Pmode, tmpreg, basereg));
3503 if (DO_FRAME_NOTES)
3504 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3505 gen_rtx_SET (VOIDmode, tmpreg,
3506 gen_rtx_PLUS (Pmode, basereg, delta)));
3508 else
3510 rtx basereg = gen_rtx_REG (Pmode, base);
3511 rtx delta = GEN_INT (disp);
3512 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3514 emit_move_insn (tmpreg,
3515 gen_rtx_PLUS (Pmode, basereg,
3516 gen_rtx_HIGH (Pmode, delta)));
3517 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3518 gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3521 if (DO_FRAME_NOTES && note)
3522 RTX_FRAME_RELATED_P (insn) = 1;
3525 HOST_WIDE_INT
3526 compute_frame_size (HOST_WIDE_INT size, int *fregs_live)
3528 int freg_saved = 0;
3529 int i, j;
3531 /* The code in hppa_expand_prologue and hppa_expand_epilogue must
3532 be consistent with the rounding and size calculation done here.
3533 Change them at the same time. */
3535 /* We do our own stack alignment. First, round the size of the
3536 stack locals up to a word boundary. */
3537 size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3539 /* Space for previous frame pointer + filler. If any frame is
3540 allocated, we need to add in the STARTING_FRAME_OFFSET. We
3541 waste some space here for the sake of HP compatibility. The
3542 first slot is only used when the frame pointer is needed. */
3543 if (size || frame_pointer_needed)
3544 size += STARTING_FRAME_OFFSET;
3546 /* If the current function calls __builtin_eh_return, then we need
3547 to allocate stack space for registers that will hold data for
3548 the exception handler. */
3549 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3551 unsigned int i;
3553 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3554 continue;
3555 size += i * UNITS_PER_WORD;
3558 /* Account for space used by the callee general register saves. */
3559 for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3560 if (df_regs_ever_live_p (i))
3561 size += UNITS_PER_WORD;
3563 /* Account for space used by the callee floating point register saves. */
3564 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3565 if (df_regs_ever_live_p (i)
3566 || (!TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3568 freg_saved = 1;
3570 /* We always save both halves of the FP register, so always
3571 increment the frame size by 8 bytes. */
3572 size += 8;
3575 /* If any of the floating registers are saved, account for the
3576 alignment needed for the floating point register save block. */
3577 if (freg_saved)
3579 size = (size + 7) & ~7;
3580 if (fregs_live)
3581 *fregs_live = 1;
3584 /* The various ABIs include space for the outgoing parameters in the
3585 size of the current function's stack frame. We don't need to align
3586 for the outgoing arguments as their alignment is set by the final
3587 rounding for the frame as a whole. */
3588 size += crtl->outgoing_args_size;
3590 /* Allocate space for the fixed frame marker. This space must be
3591 allocated for any function that makes calls or allocates
3592 stack space. */
3593 if (!current_function_is_leaf || size)
3594 size += TARGET_64BIT ? 48 : 32;
3596 /* Finally, round to the preferred stack boundary. */
3597 return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3598 & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3601 /* Generate the assembly code for function entry. FILE is a stdio
3602 stream to output the code to. SIZE is an int: how many units of
3603 temporary storage to allocate.
3605 Refer to the array `regs_ever_live' to determine which registers to
3606 save; `regs_ever_live[I]' is nonzero if register number I is ever
3607 used in the function. This function is responsible for knowing
3608 which registers should not be saved even if used. */
3610 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3611 of memory. If any fpu reg is used in the function, we allocate
3612 such a block here, at the bottom of the frame, just in case it's needed.
3614 If this function is a leaf procedure, then we may choose not
3615 to do a "save" insn. The decision about whether or not
3616 to do this is made in regclass.c. */
3618 static void
3619 pa_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3621 /* The function's label and associated .PROC must never be
3622 separated and must be output *after* any profiling declarations
3623 to avoid changing spaces/subspaces within a procedure. */
3624 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3625 fputs ("\t.PROC\n", file);
3627 /* hppa_expand_prologue does the dirty work now. We just need
3628 to output the assembler directives which denote the start
3629 of a function. */
3630 fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
3631 if (current_function_is_leaf)
3632 fputs (",NO_CALLS", file);
3633 else
3634 fputs (",CALLS", file);
3635 if (rp_saved)
3636 fputs (",SAVE_RP", file);
3638 /* The SAVE_SP flag is used to indicate that register %r3 is stored
3639 at the beginning of the frame and that it is used as the frame
3640 pointer for the frame. We do this because our current frame
3641 layout doesn't conform to that specified in the HP runtime
3642 documentation and we need a way to indicate to programs such as
3643 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
3644 isn't used by HP compilers but is supported by the assembler.
3645 However, SAVE_SP is supposed to indicate that the previous stack
3646 pointer has been saved in the frame marker. */
3647 if (frame_pointer_needed)
3648 fputs (",SAVE_SP", file);
3650 /* Pass on information about the number of callee register saves
3651 performed in the prologue.
3653 The compiler is supposed to pass the highest register number
3654 saved, the assembler then has to adjust that number before
3655 entering it into the unwind descriptor (to account for any
3656 caller saved registers with lower register numbers than the
3657 first callee saved register). */
3658 if (gr_saved)
3659 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3661 if (fr_saved)
3662 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3664 fputs ("\n\t.ENTRY\n", file);
3666 remove_useless_addtr_insns (0);
3669 void
3670 hppa_expand_prologue (void)
3672 int merge_sp_adjust_with_store = 0;
3673 HOST_WIDE_INT size = get_frame_size ();
3674 HOST_WIDE_INT offset;
3675 int i;
3676 rtx insn, tmpreg;
3678 gr_saved = 0;
3679 fr_saved = 0;
3680 save_fregs = 0;
3682 /* Compute total size for frame pointer, filler, locals and rounding to
3683 the next word boundary. Similar code appears in compute_frame_size
3684 and must be changed in tandem with this code. */
3685 local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3686 if (local_fsize || frame_pointer_needed)
3687 local_fsize += STARTING_FRAME_OFFSET;
3689 actual_fsize = compute_frame_size (size, &save_fregs);
3691 /* Compute a few things we will use often. */
3692 tmpreg = gen_rtx_REG (word_mode, 1);
3694 /* Save RP first. The calling conventions manual states RP will
3695 always be stored into the caller's frame at sp - 20 or sp - 16
3696 depending on which ABI is in use. */
3697 if (df_regs_ever_live_p (2) || crtl->calls_eh_return)
3699 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3700 rp_saved = true;
3702 else
3703 rp_saved = false;
3705 /* Allocate the local frame and set up the frame pointer if needed. */
3706 if (actual_fsize != 0)
3708 if (frame_pointer_needed)
3710 /* Copy the old frame pointer temporarily into %r1. Set up the
3711 new stack pointer, then store away the saved old frame pointer
3712 into the stack at sp and at the same time update the stack
3713 pointer by actual_fsize bytes. Two versions, first
3714 handles small (<8k) frames. The second handles large (>=8k)
3715 frames. */
3716 insn = emit_move_insn (tmpreg, frame_pointer_rtx);
3717 if (DO_FRAME_NOTES)
3718 RTX_FRAME_RELATED_P (insn) = 1;
3720 insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
3721 if (DO_FRAME_NOTES)
3722 RTX_FRAME_RELATED_P (insn) = 1;
3724 if (VAL_14_BITS_P (actual_fsize))
3725 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
3726 else
3728 /* It is incorrect to store the saved frame pointer at *sp,
3729 then increment sp (writes beyond the current stack boundary).
3731 So instead use stwm to store at *sp and post-increment the
3732 stack pointer as an atomic operation. Then increment sp to
3733 finish allocating the new frame. */
3734 HOST_WIDE_INT adjust1 = 8192 - 64;
3735 HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
3737 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
3738 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3739 adjust2, 1);
3742 /* We set SAVE_SP in frames that need a frame pointer. Thus,
3743 we need to store the previous stack pointer (frame pointer)
3744 into the frame marker on targets that use the HP unwind
3745 library. This allows the HP unwind library to be used to
3746 unwind GCC frames. However, we are not fully compatible
3747 with the HP library because our frame layout differs from
3748 that specified in the HP runtime specification.
3750 We don't want a frame note on this instruction as the frame
3751 marker moves during dynamic stack allocation.
3753 This instruction also serves as a blockage to prevent
3754 register spills from being scheduled before the stack
3755 pointer is raised. This is necessary as we store
3756 registers using the frame pointer as a base register,
3757 and the frame pointer is set before sp is raised. */
3758 if (TARGET_HPUX_UNWIND_LIBRARY)
3760 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
3761 GEN_INT (TARGET_64BIT ? -8 : -4));
3763 emit_move_insn (gen_rtx_MEM (word_mode, addr),
3764 frame_pointer_rtx);
3766 else
3767 emit_insn (gen_blockage ());
3769 /* no frame pointer needed. */
3770 else
3772 /* In some cases we can perform the first callee register save
3773 and allocating the stack frame at the same time. If so, just
3774 make a note of it and defer allocating the frame until saving
3775 the callee registers. */
3776 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
3777 merge_sp_adjust_with_store = 1;
3778 /* Can not optimize. Adjust the stack frame by actual_fsize
3779 bytes. */
3780 else
3781 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3782 actual_fsize, 1);
3786 /* Normal register save.
3788 Do not save the frame pointer in the frame_pointer_needed case. It
3789 was done earlier. */
3790 if (frame_pointer_needed)
3792 offset = local_fsize;
3794 /* Saving the EH return data registers in the frame is the simplest
3795 way to get the frame unwind information emitted. We put them
3796 just before the general registers. */
3797 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3799 unsigned int i, regno;
3801 for (i = 0; ; ++i)
3803 regno = EH_RETURN_DATA_REGNO (i);
3804 if (regno == INVALID_REGNUM)
3805 break;
3807 store_reg (regno, offset, FRAME_POINTER_REGNUM);
3808 offset += UNITS_PER_WORD;
3812 for (i = 18; i >= 4; i--)
3813 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
3815 store_reg (i, offset, FRAME_POINTER_REGNUM);
3816 offset += UNITS_PER_WORD;
3817 gr_saved++;
3819 /* Account for %r3 which is saved in a special place. */
3820 gr_saved++;
3822 /* No frame pointer needed. */
3823 else
3825 offset = local_fsize - actual_fsize;
3827 /* Saving the EH return data registers in the frame is the simplest
3828 way to get the frame unwind information emitted. */
3829 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3831 unsigned int i, regno;
3833 for (i = 0; ; ++i)
3835 regno = EH_RETURN_DATA_REGNO (i);
3836 if (regno == INVALID_REGNUM)
3837 break;
3839 /* If merge_sp_adjust_with_store is nonzero, then we can
3840 optimize the first save. */
3841 if (merge_sp_adjust_with_store)
3843 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
3844 merge_sp_adjust_with_store = 0;
3846 else
3847 store_reg (regno, offset, STACK_POINTER_REGNUM);
3848 offset += UNITS_PER_WORD;
3852 for (i = 18; i >= 3; i--)
3853 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
3855 /* If merge_sp_adjust_with_store is nonzero, then we can
3856 optimize the first GR save. */
3857 if (merge_sp_adjust_with_store)
3859 store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
3860 merge_sp_adjust_with_store = 0;
3862 else
3863 store_reg (i, offset, STACK_POINTER_REGNUM);
3864 offset += UNITS_PER_WORD;
3865 gr_saved++;
3868 /* If we wanted to merge the SP adjustment with a GR save, but we never
3869 did any GR saves, then just emit the adjustment here. */
3870 if (merge_sp_adjust_with_store)
3871 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3872 actual_fsize, 1);
3875 /* The hppa calling conventions say that %r19, the pic offset
3876 register, is saved at sp - 32 (in this function's frame)
3877 when generating PIC code. FIXME: What is the correct thing
3878 to do for functions which make no calls and allocate no
3879 frame? Do we need to allocate a frame, or can we just omit
3880 the save? For now we'll just omit the save.
3882 We don't want a note on this insn as the frame marker can
3883 move if there is a dynamic stack allocation. */
3884 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
3886 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
3888 emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
3892 /* Align pointer properly (doubleword boundary). */
3893 offset = (offset + 7) & ~7;
3895 /* Floating point register store. */
3896 if (save_fregs)
3898 rtx base;
3900 /* First get the frame or stack pointer to the start of the FP register
3901 save area. */
3902 if (frame_pointer_needed)
3904 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
3905 base = frame_pointer_rtx;
3907 else
3909 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
3910 base = stack_pointer_rtx;
3913 /* Now actually save the FP registers. */
3914 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3916 if (df_regs_ever_live_p (i)
3917 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3919 rtx addr, insn, reg;
3920 addr = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
3921 reg = gen_rtx_REG (DFmode, i);
3922 insn = emit_move_insn (addr, reg);
3923 if (DO_FRAME_NOTES)
3925 RTX_FRAME_RELATED_P (insn) = 1;
3926 if (TARGET_64BIT)
3928 rtx mem = gen_rtx_MEM (DFmode,
3929 plus_constant (base, offset));
3930 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3931 gen_rtx_SET (VOIDmode, mem, reg));
3933 else
3935 rtx meml = gen_rtx_MEM (SFmode,
3936 plus_constant (base, offset));
3937 rtx memr = gen_rtx_MEM (SFmode,
3938 plus_constant (base, offset + 4));
3939 rtx regl = gen_rtx_REG (SFmode, i);
3940 rtx regr = gen_rtx_REG (SFmode, i + 1);
3941 rtx setl = gen_rtx_SET (VOIDmode, meml, regl);
3942 rtx setr = gen_rtx_SET (VOIDmode, memr, regr);
3943 rtvec vec;
3945 RTX_FRAME_RELATED_P (setl) = 1;
3946 RTX_FRAME_RELATED_P (setr) = 1;
3947 vec = gen_rtvec (2, setl, setr);
3948 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3949 gen_rtx_SEQUENCE (VOIDmode, vec));
3952 offset += GET_MODE_SIZE (DFmode);
3953 fr_saved++;
3959 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
3960 Handle case where DISP > 8k by using the add_high_const patterns. */
3962 static void
3963 load_reg (int reg, HOST_WIDE_INT disp, int base)
3965 rtx dest = gen_rtx_REG (word_mode, reg);
3966 rtx basereg = gen_rtx_REG (Pmode, base);
3967 rtx src;
3969 if (VAL_14_BITS_P (disp))
3970 src = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3971 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3973 rtx delta = GEN_INT (disp);
3974 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3976 emit_move_insn (tmpreg, delta);
3977 if (TARGET_DISABLE_INDEXING)
3979 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3980 src = gen_rtx_MEM (word_mode, tmpreg);
3982 else
3983 src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3985 else
3987 rtx delta = GEN_INT (disp);
3988 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3989 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3991 emit_move_insn (tmpreg, high);
3992 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3995 emit_move_insn (dest, src);
3998 /* Update the total code bytes output to the text section. */
4000 static void
4001 update_total_code_bytes (unsigned int nbytes)
4003 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
4004 && !IN_NAMED_SECTION_P (cfun->decl))
4006 unsigned int old_total = total_code_bytes;
4008 total_code_bytes += nbytes;
4010 /* Be prepared to handle overflows. */
4011 if (old_total > total_code_bytes)
4012 total_code_bytes = UINT_MAX;
4016 /* This function generates the assembly code for function exit.
4017 Args are as for output_function_prologue ().
4019 The function epilogue should not depend on the current stack
4020 pointer! It should use the frame pointer only. This is mandatory
4021 because of alloca; we also take advantage of it to omit stack
4022 adjustments before returning. */
4024 static void
4025 pa_output_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4027 rtx insn = get_last_insn ();
4029 last_address = 0;
4031 /* hppa_expand_epilogue does the dirty work now. We just need
4032 to output the assembler directives which denote the end
4033 of a function.
4035 To make debuggers happy, emit a nop if the epilogue was completely
4036 eliminated due to a volatile call as the last insn in the
4037 current function. That way the return address (in %r2) will
4038 always point to a valid instruction in the current function. */
4040 /* Get the last real insn. */
4041 if (GET_CODE (insn) == NOTE)
4042 insn = prev_real_insn (insn);
4044 /* If it is a sequence, then look inside. */
4045 if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
4046 insn = XVECEXP (PATTERN (insn), 0, 0);
4048 /* If insn is a CALL_INSN, then it must be a call to a volatile
4049 function (otherwise there would be epilogue insns). */
4050 if (insn && GET_CODE (insn) == CALL_INSN)
4052 fputs ("\tnop\n", file);
4053 last_address += 4;
4056 fputs ("\t.EXIT\n\t.PROCEND\n", file);
4058 if (TARGET_SOM && TARGET_GAS)
4060 /* We done with this subspace except possibly for some additional
4061 debug information. Forget that we are in this subspace to ensure
4062 that the next function is output in its own subspace. */
4063 in_section = NULL;
4064 cfun->machine->in_nsubspa = 2;
4067 if (INSN_ADDRESSES_SET_P ())
4069 insn = get_last_nonnote_insn ();
4070 last_address += INSN_ADDRESSES (INSN_UID (insn));
4071 if (INSN_P (insn))
4072 last_address += insn_default_length (insn);
4073 last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4074 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
4076 else
4077 last_address = UINT_MAX;
4079 /* Finally, update the total number of code bytes output so far. */
4080 update_total_code_bytes (last_address);
4083 void
4084 hppa_expand_epilogue (void)
4086 rtx tmpreg;
4087 HOST_WIDE_INT offset;
4088 HOST_WIDE_INT ret_off = 0;
4089 int i;
4090 int merge_sp_adjust_with_load = 0;
4092 /* We will use this often. */
4093 tmpreg = gen_rtx_REG (word_mode, 1);
4095 /* Try to restore RP early to avoid load/use interlocks when
4096 RP gets used in the return (bv) instruction. This appears to still
4097 be necessary even when we schedule the prologue and epilogue. */
4098 if (rp_saved)
4100 ret_off = TARGET_64BIT ? -16 : -20;
4101 if (frame_pointer_needed)
4103 load_reg (2, ret_off, FRAME_POINTER_REGNUM);
4104 ret_off = 0;
4106 else
4108 /* No frame pointer, and stack is smaller than 8k. */
4109 if (VAL_14_BITS_P (ret_off - actual_fsize))
4111 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4112 ret_off = 0;
4117 /* General register restores. */
4118 if (frame_pointer_needed)
4120 offset = local_fsize;
4122 /* If the current function calls __builtin_eh_return, then we need
4123 to restore the saved EH data registers. */
4124 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4126 unsigned int i, regno;
4128 for (i = 0; ; ++i)
4130 regno = EH_RETURN_DATA_REGNO (i);
4131 if (regno == INVALID_REGNUM)
4132 break;
4134 load_reg (regno, offset, FRAME_POINTER_REGNUM);
4135 offset += UNITS_PER_WORD;
4139 for (i = 18; i >= 4; i--)
4140 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4142 load_reg (i, offset, FRAME_POINTER_REGNUM);
4143 offset += UNITS_PER_WORD;
4146 else
4148 offset = local_fsize - actual_fsize;
4150 /* If the current function calls __builtin_eh_return, then we need
4151 to restore the saved EH data registers. */
4152 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4154 unsigned int i, regno;
4156 for (i = 0; ; ++i)
4158 regno = EH_RETURN_DATA_REGNO (i);
4159 if (regno == INVALID_REGNUM)
4160 break;
4162 /* Only for the first load.
4163 merge_sp_adjust_with_load holds the register load
4164 with which we will merge the sp adjustment. */
4165 if (merge_sp_adjust_with_load == 0
4166 && local_fsize == 0
4167 && VAL_14_BITS_P (-actual_fsize))
4168 merge_sp_adjust_with_load = regno;
4169 else
4170 load_reg (regno, offset, STACK_POINTER_REGNUM);
4171 offset += UNITS_PER_WORD;
4175 for (i = 18; i >= 3; i--)
4177 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4179 /* Only for the first load.
4180 merge_sp_adjust_with_load holds the register load
4181 with which we will merge the sp adjustment. */
4182 if (merge_sp_adjust_with_load == 0
4183 && local_fsize == 0
4184 && VAL_14_BITS_P (-actual_fsize))
4185 merge_sp_adjust_with_load = i;
4186 else
4187 load_reg (i, offset, STACK_POINTER_REGNUM);
4188 offset += UNITS_PER_WORD;
4193 /* Align pointer properly (doubleword boundary). */
4194 offset = (offset + 7) & ~7;
4196 /* FP register restores. */
4197 if (save_fregs)
4199 /* Adjust the register to index off of. */
4200 if (frame_pointer_needed)
4201 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
4202 else
4203 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4205 /* Actually do the restores now. */
4206 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4207 if (df_regs_ever_live_p (i)
4208 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4210 rtx src = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
4211 rtx dest = gen_rtx_REG (DFmode, i);
4212 emit_move_insn (dest, src);
4216 /* Emit a blockage insn here to keep these insns from being moved to
4217 an earlier spot in the epilogue, or into the main instruction stream.
4219 This is necessary as we must not cut the stack back before all the
4220 restores are finished. */
4221 emit_insn (gen_blockage ());
4223 /* Reset stack pointer (and possibly frame pointer). The stack
4224 pointer is initially set to fp + 64 to avoid a race condition. */
4225 if (frame_pointer_needed)
4227 rtx delta = GEN_INT (-64);
4229 set_reg_plus_d (STACK_POINTER_REGNUM, FRAME_POINTER_REGNUM, 64, 0);
4230 emit_insn (gen_pre_load (frame_pointer_rtx, stack_pointer_rtx, delta));
4232 /* If we were deferring a callee register restore, do it now. */
4233 else if (merge_sp_adjust_with_load)
4235 rtx delta = GEN_INT (-actual_fsize);
4236 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4238 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4240 else if (actual_fsize != 0)
4241 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4242 - actual_fsize, 0);
4244 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4245 frame greater than 8k), do so now. */
4246 if (ret_off != 0)
4247 load_reg (2, ret_off, STACK_POINTER_REGNUM);
4249 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4251 rtx sa = EH_RETURN_STACKADJ_RTX;
4253 emit_insn (gen_blockage ());
4254 emit_insn (TARGET_64BIT
4255 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4256 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4261 hppa_pic_save_rtx (void)
4263 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4266 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4267 #define NO_DEFERRED_PROFILE_COUNTERS 0
4268 #endif
4271 /* Vector of funcdef numbers. */
4272 static VEC(int,heap) *funcdef_nos;
4274 /* Output deferred profile counters. */
4275 static void
4276 output_deferred_profile_counters (void)
4278 unsigned int i;
4279 int align, n;
4281 if (VEC_empty (int, funcdef_nos))
4282 return;
4284 switch_to_section (data_section);
4285 align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4286 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4288 for (i = 0; VEC_iterate (int, funcdef_nos, i, n); i++)
4290 targetm.asm_out.internal_label (asm_out_file, "LP", n);
4291 assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4294 VEC_free (int, heap, funcdef_nos);
4297 void
4298 hppa_profile_hook (int label_no)
4300 /* We use SImode for the address of the function in both 32 and
4301 64-bit code to avoid having to provide DImode versions of the
4302 lcla2 and load_offset_label_address insn patterns. */
4303 rtx reg = gen_reg_rtx (SImode);
4304 rtx label_rtx = gen_label_rtx ();
4305 rtx begin_label_rtx, call_insn;
4306 char begin_label_name[16];
4308 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4309 label_no);
4310 begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4312 if (TARGET_64BIT)
4313 emit_move_insn (arg_pointer_rtx,
4314 gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
4315 GEN_INT (64)));
4317 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4319 /* The address of the function is loaded into %r25 with an instruction-
4320 relative sequence that avoids the use of relocations. The sequence
4321 is split so that the load_offset_label_address instruction can
4322 occupy the delay slot of the call to _mcount. */
4323 if (TARGET_PA_20)
4324 emit_insn (gen_lcla2 (reg, label_rtx));
4325 else
4326 emit_insn (gen_lcla1 (reg, label_rtx));
4328 emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25),
4329 reg, begin_label_rtx, label_rtx));
4331 #if !NO_DEFERRED_PROFILE_COUNTERS
4333 rtx count_label_rtx, addr, r24;
4334 char count_label_name[16];
4336 VEC_safe_push (int, heap, funcdef_nos, label_no);
4337 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4338 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name));
4340 addr = force_reg (Pmode, count_label_rtx);
4341 r24 = gen_rtx_REG (Pmode, 24);
4342 emit_move_insn (r24, addr);
4344 call_insn =
4345 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4346 gen_rtx_SYMBOL_REF (Pmode,
4347 "_mcount")),
4348 GEN_INT (TARGET_64BIT ? 24 : 12)));
4350 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4352 #else
4354 call_insn =
4355 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4356 gen_rtx_SYMBOL_REF (Pmode,
4357 "_mcount")),
4358 GEN_INT (TARGET_64BIT ? 16 : 8)));
4360 #endif
4362 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4363 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4365 /* Indicate the _mcount call cannot throw, nor will it execute a
4366 non-local goto. */
4367 make_reg_eh_region_note_nothrow_nononlocal (call_insn);
4370 /* Fetch the return address for the frame COUNT steps up from
4371 the current frame, after the prologue. FRAMEADDR is the
4372 frame pointer of the COUNT frame.
4374 We want to ignore any export stub remnants here. To handle this,
4375 we examine the code at the return address, and if it is an export
4376 stub, we return a memory rtx for the stub return address stored
4377 at frame-24.
4379 The value returned is used in two different ways:
4381 1. To find a function's caller.
4383 2. To change the return address for a function.
4385 This function handles most instances of case 1; however, it will
4386 fail if there are two levels of stubs to execute on the return
4387 path. The only way I believe that can happen is if the return value
4388 needs a parameter relocation, which never happens for C code.
4390 This function handles most instances of case 2; however, it will
4391 fail if we did not originally have stub code on the return path
4392 but will need stub code on the new return path. This can happen if
4393 the caller & callee are both in the main program, but the new
4394 return location is in a shared library. */
4397 return_addr_rtx (int count, rtx frameaddr)
4399 rtx label;
4400 rtx rp;
4401 rtx saved_rp;
4402 rtx ins;
4404 /* Instruction stream at the normal return address for the export stub:
4406 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4407 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4408 0x00011820 | stub+16: mtsp r1,sr0
4409 0xe0400002 | stub+20: be,n 0(sr0,rp)
4411 0xe0400002 must be specified as -532676606 so that it won't be
4412 rejected as an invalid immediate operand on 64-bit hosts. */
4414 HOST_WIDE_INT insns[4] = {0x4bc23fd1, 0x004010a1, 0x00011820, -532676606};
4415 int i;
4417 if (count != 0)
4418 return NULL_RTX;
4420 rp = get_hard_reg_initial_val (Pmode, 2);
4422 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4423 return rp;
4425 /* If there is no export stub then just use the value saved from
4426 the return pointer register. */
4428 saved_rp = gen_reg_rtx (Pmode);
4429 emit_move_insn (saved_rp, rp);
4431 /* Get pointer to the instruction stream. We have to mask out the
4432 privilege level from the two low order bits of the return address
4433 pointer here so that ins will point to the start of the first
4434 instruction that would have been executed if we returned. */
4435 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4436 label = gen_label_rtx ();
4438 /* Check the instruction stream at the normal return address for the
4439 export stub. If it is an export stub, than our return address is
4440 really in -24[frameaddr]. */
4442 for (i = 0; i < 3; i++)
4444 rtx op0 = gen_rtx_MEM (SImode, plus_constant (ins, i * 4));
4445 rtx op1 = GEN_INT (insns[i]);
4446 emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label);
4449 /* Here we know that our return address points to an export
4450 stub. We don't want to return the address of the export stub,
4451 but rather the return address of the export stub. That return
4452 address is stored at -24[frameaddr]. */
4454 emit_move_insn (saved_rp,
4455 gen_rtx_MEM (Pmode,
4456 memory_address (Pmode,
4457 plus_constant (frameaddr,
4458 -24))));
4460 emit_label (label);
4462 return saved_rp;
4465 void
4466 emit_bcond_fp (rtx operands[])
4468 enum rtx_code code = GET_CODE (operands[0]);
4469 rtx operand0 = operands[1];
4470 rtx operand1 = operands[2];
4471 rtx label = operands[3];
4473 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0),
4474 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1)));
4476 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4477 gen_rtx_IF_THEN_ELSE (VOIDmode,
4478 gen_rtx_fmt_ee (NE,
4479 VOIDmode,
4480 gen_rtx_REG (CCFPmode, 0),
4481 const0_rtx),
4482 gen_rtx_LABEL_REF (VOIDmode, label),
4483 pc_rtx)));
4487 /* Adjust the cost of a scheduling dependency. Return the new cost of
4488 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4490 static int
4491 pa_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
4493 enum attr_type attr_type;
4495 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4496 true dependencies as they are described with bypasses now. */
4497 if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0)
4498 return cost;
4500 if (! recog_memoized (insn))
4501 return 0;
4503 attr_type = get_attr_type (insn);
4505 switch (REG_NOTE_KIND (link))
4507 case REG_DEP_ANTI:
4508 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4509 cycles later. */
4511 if (attr_type == TYPE_FPLOAD)
4513 rtx pat = PATTERN (insn);
4514 rtx dep_pat = PATTERN (dep_insn);
4515 if (GET_CODE (pat) == PARALLEL)
4517 /* This happens for the fldXs,mb patterns. */
4518 pat = XVECEXP (pat, 0, 0);
4520 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4521 /* If this happens, we have to extend this to schedule
4522 optimally. Return 0 for now. */
4523 return 0;
4525 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4527 if (! recog_memoized (dep_insn))
4528 return 0;
4529 switch (get_attr_type (dep_insn))
4531 case TYPE_FPALU:
4532 case TYPE_FPMULSGL:
4533 case TYPE_FPMULDBL:
4534 case TYPE_FPDIVSGL:
4535 case TYPE_FPDIVDBL:
4536 case TYPE_FPSQRTSGL:
4537 case TYPE_FPSQRTDBL:
4538 /* A fpload can't be issued until one cycle before a
4539 preceding arithmetic operation has finished if
4540 the target of the fpload is any of the sources
4541 (or destination) of the arithmetic operation. */
4542 return insn_default_latency (dep_insn) - 1;
4544 default:
4545 return 0;
4549 else if (attr_type == TYPE_FPALU)
4551 rtx pat = PATTERN (insn);
4552 rtx dep_pat = PATTERN (dep_insn);
4553 if (GET_CODE (pat) == PARALLEL)
4555 /* This happens for the fldXs,mb patterns. */
4556 pat = XVECEXP (pat, 0, 0);
4558 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4559 /* If this happens, we have to extend this to schedule
4560 optimally. Return 0 for now. */
4561 return 0;
4563 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4565 if (! recog_memoized (dep_insn))
4566 return 0;
4567 switch (get_attr_type (dep_insn))
4569 case TYPE_FPDIVSGL:
4570 case TYPE_FPDIVDBL:
4571 case TYPE_FPSQRTSGL:
4572 case TYPE_FPSQRTDBL:
4573 /* An ALU flop can't be issued until two cycles before a
4574 preceding divide or sqrt operation has finished if
4575 the target of the ALU flop is any of the sources
4576 (or destination) of the divide or sqrt operation. */
4577 return insn_default_latency (dep_insn) - 2;
4579 default:
4580 return 0;
4585 /* For other anti dependencies, the cost is 0. */
4586 return 0;
4588 case REG_DEP_OUTPUT:
4589 /* Output dependency; DEP_INSN writes a register that INSN writes some
4590 cycles later. */
4591 if (attr_type == TYPE_FPLOAD)
4593 rtx pat = PATTERN (insn);
4594 rtx dep_pat = PATTERN (dep_insn);
4595 if (GET_CODE (pat) == PARALLEL)
4597 /* This happens for the fldXs,mb patterns. */
4598 pat = XVECEXP (pat, 0, 0);
4600 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4601 /* If this happens, we have to extend this to schedule
4602 optimally. Return 0 for now. */
4603 return 0;
4605 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4607 if (! recog_memoized (dep_insn))
4608 return 0;
4609 switch (get_attr_type (dep_insn))
4611 case TYPE_FPALU:
4612 case TYPE_FPMULSGL:
4613 case TYPE_FPMULDBL:
4614 case TYPE_FPDIVSGL:
4615 case TYPE_FPDIVDBL:
4616 case TYPE_FPSQRTSGL:
4617 case TYPE_FPSQRTDBL:
4618 /* A fpload can't be issued until one cycle before a
4619 preceding arithmetic operation has finished if
4620 the target of the fpload is the destination of the
4621 arithmetic operation.
4623 Exception: For PA7100LC, PA7200 and PA7300, the cost
4624 is 3 cycles, unless they bundle together. We also
4625 pay the penalty if the second insn is a fpload. */
4626 return insn_default_latency (dep_insn) - 1;
4628 default:
4629 return 0;
4633 else if (attr_type == TYPE_FPALU)
4635 rtx pat = PATTERN (insn);
4636 rtx dep_pat = PATTERN (dep_insn);
4637 if (GET_CODE (pat) == PARALLEL)
4639 /* This happens for the fldXs,mb patterns. */
4640 pat = XVECEXP (pat, 0, 0);
4642 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4643 /* If this happens, we have to extend this to schedule
4644 optimally. Return 0 for now. */
4645 return 0;
4647 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4649 if (! recog_memoized (dep_insn))
4650 return 0;
4651 switch (get_attr_type (dep_insn))
4653 case TYPE_FPDIVSGL:
4654 case TYPE_FPDIVDBL:
4655 case TYPE_FPSQRTSGL:
4656 case TYPE_FPSQRTDBL:
4657 /* An ALU flop can't be issued until two cycles before a
4658 preceding divide or sqrt operation has finished if
4659 the target of the ALU flop is also the target of
4660 the divide or sqrt operation. */
4661 return insn_default_latency (dep_insn) - 2;
4663 default:
4664 return 0;
4669 /* For other output dependencies, the cost is 0. */
4670 return 0;
4672 default:
4673 gcc_unreachable ();
4677 /* Adjust scheduling priorities. We use this to try and keep addil
4678 and the next use of %r1 close together. */
4679 static int
4680 pa_adjust_priority (rtx insn, int priority)
4682 rtx set = single_set (insn);
4683 rtx src, dest;
4684 if (set)
4686 src = SET_SRC (set);
4687 dest = SET_DEST (set);
4688 if (GET_CODE (src) == LO_SUM
4689 && symbolic_operand (XEXP (src, 1), VOIDmode)
4690 && ! read_only_operand (XEXP (src, 1), VOIDmode))
4691 priority >>= 3;
4693 else if (GET_CODE (src) == MEM
4694 && GET_CODE (XEXP (src, 0)) == LO_SUM
4695 && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
4696 && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
4697 priority >>= 1;
4699 else if (GET_CODE (dest) == MEM
4700 && GET_CODE (XEXP (dest, 0)) == LO_SUM
4701 && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
4702 && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
4703 priority >>= 3;
4705 return priority;
4708 /* The 700 can only issue a single insn at a time.
4709 The 7XXX processors can issue two insns at a time.
4710 The 8000 can issue 4 insns at a time. */
4711 static int
4712 pa_issue_rate (void)
4714 switch (pa_cpu)
4716 case PROCESSOR_700: return 1;
4717 case PROCESSOR_7100: return 2;
4718 case PROCESSOR_7100LC: return 2;
4719 case PROCESSOR_7200: return 2;
4720 case PROCESSOR_7300: return 2;
4721 case PROCESSOR_8000: return 4;
4723 default:
4724 gcc_unreachable ();
4730 /* Return any length adjustment needed by INSN which already has its length
4731 computed as LENGTH. Return zero if no adjustment is necessary.
4733 For the PA: function calls, millicode calls, and backwards short
4734 conditional branches with unfilled delay slots need an adjustment by +1
4735 (to account for the NOP which will be inserted into the instruction stream).
4737 Also compute the length of an inline block move here as it is too
4738 complicated to express as a length attribute in pa.md. */
4740 pa_adjust_insn_length (rtx insn, int length)
4742 rtx pat = PATTERN (insn);
4744 /* Jumps inside switch tables which have unfilled delay slots need
4745 adjustment. */
4746 if (GET_CODE (insn) == JUMP_INSN
4747 && GET_CODE (pat) == PARALLEL
4748 && get_attr_type (insn) == TYPE_BTABLE_BRANCH)
4749 return 4;
4750 /* Millicode insn with an unfilled delay slot. */
4751 else if (GET_CODE (insn) == INSN
4752 && GET_CODE (pat) != SEQUENCE
4753 && GET_CODE (pat) != USE
4754 && GET_CODE (pat) != CLOBBER
4755 && get_attr_type (insn) == TYPE_MILLI)
4756 return 4;
4757 /* Block move pattern. */
4758 else if (GET_CODE (insn) == INSN
4759 && GET_CODE (pat) == PARALLEL
4760 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4761 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4762 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
4763 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
4764 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
4765 return compute_movmem_length (insn) - 4;
4766 /* Block clear pattern. */
4767 else if (GET_CODE (insn) == INSN
4768 && GET_CODE (pat) == PARALLEL
4769 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4770 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4771 && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
4772 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
4773 return compute_clrmem_length (insn) - 4;
4774 /* Conditional branch with an unfilled delay slot. */
4775 else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
4777 /* Adjust a short backwards conditional with an unfilled delay slot. */
4778 if (GET_CODE (pat) == SET
4779 && length == 4
4780 && JUMP_LABEL (insn) != NULL_RTX
4781 && ! forward_branch_p (insn))
4782 return 4;
4783 else if (GET_CODE (pat) == PARALLEL
4784 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
4785 && length == 4)
4786 return 4;
4787 /* Adjust dbra insn with short backwards conditional branch with
4788 unfilled delay slot -- only for case where counter is in a
4789 general register register. */
4790 else if (GET_CODE (pat) == PARALLEL
4791 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
4792 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
4793 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
4794 && length == 4
4795 && ! forward_branch_p (insn))
4796 return 4;
4797 else
4798 return 0;
4800 return 0;
4803 /* Print operand X (an rtx) in assembler syntax to file FILE.
4804 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
4805 For `%' followed by punctuation, CODE is the punctuation and X is null. */
4807 void
4808 print_operand (FILE *file, rtx x, int code)
4810 switch (code)
4812 case '#':
4813 /* Output a 'nop' if there's nothing for the delay slot. */
4814 if (dbr_sequence_length () == 0)
4815 fputs ("\n\tnop", file);
4816 return;
4817 case '*':
4818 /* Output a nullification completer if there's nothing for the */
4819 /* delay slot or nullification is requested. */
4820 if (dbr_sequence_length () == 0 ||
4821 (final_sequence &&
4822 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
4823 fputs (",n", file);
4824 return;
4825 case 'R':
4826 /* Print out the second register name of a register pair.
4827 I.e., R (6) => 7. */
4828 fputs (reg_names[REGNO (x) + 1], file);
4829 return;
4830 case 'r':
4831 /* A register or zero. */
4832 if (x == const0_rtx
4833 || (x == CONST0_RTX (DFmode))
4834 || (x == CONST0_RTX (SFmode)))
4836 fputs ("%r0", file);
4837 return;
4839 else
4840 break;
4841 case 'f':
4842 /* A register or zero (floating point). */
4843 if (x == const0_rtx
4844 || (x == CONST0_RTX (DFmode))
4845 || (x == CONST0_RTX (SFmode)))
4847 fputs ("%fr0", file);
4848 return;
4850 else
4851 break;
4852 case 'A':
4854 rtx xoperands[2];
4856 xoperands[0] = XEXP (XEXP (x, 0), 0);
4857 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
4858 output_global_address (file, xoperands[1], 0);
4859 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
4860 return;
4863 case 'C': /* Plain (C)ondition */
4864 case 'X':
4865 switch (GET_CODE (x))
4867 case EQ:
4868 fputs ("=", file); break;
4869 case NE:
4870 fputs ("<>", file); break;
4871 case GT:
4872 fputs (">", file); break;
4873 case GE:
4874 fputs (">=", file); break;
4875 case GEU:
4876 fputs (">>=", file); break;
4877 case GTU:
4878 fputs (">>", file); break;
4879 case LT:
4880 fputs ("<", file); break;
4881 case LE:
4882 fputs ("<=", file); break;
4883 case LEU:
4884 fputs ("<<=", file); break;
4885 case LTU:
4886 fputs ("<<", file); break;
4887 default:
4888 gcc_unreachable ();
4890 return;
4891 case 'N': /* Condition, (N)egated */
4892 switch (GET_CODE (x))
4894 case EQ:
4895 fputs ("<>", file); break;
4896 case NE:
4897 fputs ("=", file); break;
4898 case GT:
4899 fputs ("<=", file); break;
4900 case GE:
4901 fputs ("<", file); break;
4902 case GEU:
4903 fputs ("<<", file); break;
4904 case GTU:
4905 fputs ("<<=", file); break;
4906 case LT:
4907 fputs (">=", file); break;
4908 case LE:
4909 fputs (">", file); break;
4910 case LEU:
4911 fputs (">>", file); break;
4912 case LTU:
4913 fputs (">>=", file); break;
4914 default:
4915 gcc_unreachable ();
4917 return;
4918 /* For floating point comparisons. Note that the output
4919 predicates are the complement of the desired mode. The
4920 conditions for GT, GE, LT, LE and LTGT cause an invalid
4921 operation exception if the result is unordered and this
4922 exception is enabled in the floating-point status register. */
4923 case 'Y':
4924 switch (GET_CODE (x))
4926 case EQ:
4927 fputs ("!=", file); break;
4928 case NE:
4929 fputs ("=", file); break;
4930 case GT:
4931 fputs ("!>", file); break;
4932 case GE:
4933 fputs ("!>=", file); break;
4934 case LT:
4935 fputs ("!<", file); break;
4936 case LE:
4937 fputs ("!<=", file); break;
4938 case LTGT:
4939 fputs ("!<>", file); break;
4940 case UNLE:
4941 fputs ("!?<=", file); break;
4942 case UNLT:
4943 fputs ("!?<", file); break;
4944 case UNGE:
4945 fputs ("!?>=", file); break;
4946 case UNGT:
4947 fputs ("!?>", file); break;
4948 case UNEQ:
4949 fputs ("!?=", file); break;
4950 case UNORDERED:
4951 fputs ("!?", file); break;
4952 case ORDERED:
4953 fputs ("?", file); break;
4954 default:
4955 gcc_unreachable ();
4957 return;
4958 case 'S': /* Condition, operands are (S)wapped. */
4959 switch (GET_CODE (x))
4961 case EQ:
4962 fputs ("=", file); break;
4963 case NE:
4964 fputs ("<>", file); break;
4965 case GT:
4966 fputs ("<", file); break;
4967 case GE:
4968 fputs ("<=", file); break;
4969 case GEU:
4970 fputs ("<<=", file); break;
4971 case GTU:
4972 fputs ("<<", file); break;
4973 case LT:
4974 fputs (">", file); break;
4975 case LE:
4976 fputs (">=", file); break;
4977 case LEU:
4978 fputs (">>=", file); break;
4979 case LTU:
4980 fputs (">>", file); break;
4981 default:
4982 gcc_unreachable ();
4984 return;
4985 case 'B': /* Condition, (B)oth swapped and negate. */
4986 switch (GET_CODE (x))
4988 case EQ:
4989 fputs ("<>", file); break;
4990 case NE:
4991 fputs ("=", file); break;
4992 case GT:
4993 fputs (">=", file); break;
4994 case GE:
4995 fputs (">", file); break;
4996 case GEU:
4997 fputs (">>", file); break;
4998 case GTU:
4999 fputs (">>=", file); break;
5000 case LT:
5001 fputs ("<=", file); break;
5002 case LE:
5003 fputs ("<", file); break;
5004 case LEU:
5005 fputs ("<<", file); break;
5006 case LTU:
5007 fputs ("<<=", file); break;
5008 default:
5009 gcc_unreachable ();
5011 return;
5012 case 'k':
5013 gcc_assert (GET_CODE (x) == CONST_INT);
5014 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
5015 return;
5016 case 'Q':
5017 gcc_assert (GET_CODE (x) == CONST_INT);
5018 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
5019 return;
5020 case 'L':
5021 gcc_assert (GET_CODE (x) == CONST_INT);
5022 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
5023 return;
5024 case 'O':
5025 gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
5026 fprintf (file, "%d", exact_log2 (INTVAL (x)));
5027 return;
5028 case 'p':
5029 gcc_assert (GET_CODE (x) == CONST_INT);
5030 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
5031 return;
5032 case 'P':
5033 gcc_assert (GET_CODE (x) == CONST_INT);
5034 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
5035 return;
5036 case 'I':
5037 if (GET_CODE (x) == CONST_INT)
5038 fputs ("i", file);
5039 return;
5040 case 'M':
5041 case 'F':
5042 switch (GET_CODE (XEXP (x, 0)))
5044 case PRE_DEC:
5045 case PRE_INC:
5046 if (ASSEMBLER_DIALECT == 0)
5047 fputs ("s,mb", file);
5048 else
5049 fputs (",mb", file);
5050 break;
5051 case POST_DEC:
5052 case POST_INC:
5053 if (ASSEMBLER_DIALECT == 0)
5054 fputs ("s,ma", file);
5055 else
5056 fputs (",ma", file);
5057 break;
5058 case PLUS:
5059 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5060 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5062 if (ASSEMBLER_DIALECT == 0)
5063 fputs ("x", file);
5065 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5066 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5068 if (ASSEMBLER_DIALECT == 0)
5069 fputs ("x,s", file);
5070 else
5071 fputs (",s", file);
5073 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
5074 fputs ("s", file);
5075 break;
5076 default:
5077 if (code == 'F' && ASSEMBLER_DIALECT == 0)
5078 fputs ("s", file);
5079 break;
5081 return;
5082 case 'G':
5083 output_global_address (file, x, 0);
5084 return;
5085 case 'H':
5086 output_global_address (file, x, 1);
5087 return;
5088 case 0: /* Don't do anything special */
5089 break;
5090 case 'Z':
5092 unsigned op[3];
5093 compute_zdepwi_operands (INTVAL (x), op);
5094 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5095 return;
5097 case 'z':
5099 unsigned op[3];
5100 compute_zdepdi_operands (INTVAL (x), op);
5101 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5102 return;
5104 case 'c':
5105 /* We can get here from a .vtable_inherit due to our
5106 CONSTANT_ADDRESS_P rejecting perfectly good constant
5107 addresses. */
5108 break;
5109 default:
5110 gcc_unreachable ();
5112 if (GET_CODE (x) == REG)
5114 fputs (reg_names [REGNO (x)], file);
5115 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5117 fputs ("R", file);
5118 return;
5120 if (FP_REG_P (x)
5121 && GET_MODE_SIZE (GET_MODE (x)) <= 4
5122 && (REGNO (x) & 1) == 0)
5123 fputs ("L", file);
5125 else if (GET_CODE (x) == MEM)
5127 int size = GET_MODE_SIZE (GET_MODE (x));
5128 rtx base = NULL_RTX;
5129 switch (GET_CODE (XEXP (x, 0)))
5131 case PRE_DEC:
5132 case POST_DEC:
5133 base = XEXP (XEXP (x, 0), 0);
5134 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5135 break;
5136 case PRE_INC:
5137 case POST_INC:
5138 base = XEXP (XEXP (x, 0), 0);
5139 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5140 break;
5141 case PLUS:
5142 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5143 fprintf (file, "%s(%s)",
5144 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5145 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5146 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5147 fprintf (file, "%s(%s)",
5148 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5149 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5150 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5151 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5153 /* Because the REG_POINTER flag can get lost during reload,
5154 GO_IF_LEGITIMATE_ADDRESS canonicalizes the order of the
5155 index and base registers in the combined move patterns. */
5156 rtx base = XEXP (XEXP (x, 0), 1);
5157 rtx index = XEXP (XEXP (x, 0), 0);
5159 fprintf (file, "%s(%s)",
5160 reg_names [REGNO (index)], reg_names [REGNO (base)]);
5162 else
5163 output_address (XEXP (x, 0));
5164 break;
5165 default:
5166 output_address (XEXP (x, 0));
5167 break;
5170 else
5171 output_addr_const (file, x);
5174 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
5176 void
5177 output_global_address (FILE *file, rtx x, int round_constant)
5180 /* Imagine (high (const (plus ...))). */
5181 if (GET_CODE (x) == HIGH)
5182 x = XEXP (x, 0);
5184 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5185 output_addr_const (file, x);
5186 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5188 output_addr_const (file, x);
5189 fputs ("-$global$", file);
5191 else if (GET_CODE (x) == CONST)
5193 const char *sep = "";
5194 int offset = 0; /* assembler wants -$global$ at end */
5195 rtx base = NULL_RTX;
5197 switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
5199 case SYMBOL_REF:
5200 base = XEXP (XEXP (x, 0), 0);
5201 output_addr_const (file, base);
5202 break;
5203 case CONST_INT:
5204 offset = INTVAL (XEXP (XEXP (x, 0), 0));
5205 break;
5206 default:
5207 gcc_unreachable ();
5210 switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
5212 case SYMBOL_REF:
5213 base = XEXP (XEXP (x, 0), 1);
5214 output_addr_const (file, base);
5215 break;
5216 case CONST_INT:
5217 offset = INTVAL (XEXP (XEXP (x, 0), 1));
5218 break;
5219 default:
5220 gcc_unreachable ();
5223 /* How bogus. The compiler is apparently responsible for
5224 rounding the constant if it uses an LR field selector.
5226 The linker and/or assembler seem a better place since
5227 they have to do this kind of thing already.
5229 If we fail to do this, HP's optimizing linker may eliminate
5230 an addil, but not update the ldw/stw/ldo instruction that
5231 uses the result of the addil. */
5232 if (round_constant)
5233 offset = ((offset + 0x1000) & ~0x1fff);
5235 switch (GET_CODE (XEXP (x, 0)))
5237 case PLUS:
5238 if (offset < 0)
5240 offset = -offset;
5241 sep = "-";
5243 else
5244 sep = "+";
5245 break;
5247 case MINUS:
5248 gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5249 sep = "-";
5250 break;
5252 default:
5253 gcc_unreachable ();
5256 if (!read_only_operand (base, VOIDmode) && !flag_pic)
5257 fputs ("-$global$", file);
5258 if (offset)
5259 fprintf (file, "%s%d", sep, offset);
5261 else
5262 output_addr_const (file, x);
5265 /* Output boilerplate text to appear at the beginning of the file.
5266 There are several possible versions. */
5267 #define aputs(x) fputs(x, asm_out_file)
5268 static inline void
5269 pa_file_start_level (void)
5271 if (TARGET_64BIT)
5272 aputs ("\t.LEVEL 2.0w\n");
5273 else if (TARGET_PA_20)
5274 aputs ("\t.LEVEL 2.0\n");
5275 else if (TARGET_PA_11)
5276 aputs ("\t.LEVEL 1.1\n");
5277 else
5278 aputs ("\t.LEVEL 1.0\n");
5281 static inline void
5282 pa_file_start_space (int sortspace)
5284 aputs ("\t.SPACE $PRIVATE$");
5285 if (sortspace)
5286 aputs (",SORT=16");
5287 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31"
5288 "\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5289 "\n\t.SPACE $TEXT$");
5290 if (sortspace)
5291 aputs (",SORT=8");
5292 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5293 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5296 static inline void
5297 pa_file_start_file (int want_version)
5299 if (write_symbols != NO_DEBUG)
5301 output_file_directive (asm_out_file, main_input_filename);
5302 if (want_version)
5303 aputs ("\t.version\t\"01.01\"\n");
5307 static inline void
5308 pa_file_start_mcount (const char *aswhat)
5310 if (profile_flag)
5311 fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5314 static void
5315 pa_elf_file_start (void)
5317 pa_file_start_level ();
5318 pa_file_start_mcount ("ENTRY");
5319 pa_file_start_file (0);
5322 static void
5323 pa_som_file_start (void)
5325 pa_file_start_level ();
5326 pa_file_start_space (0);
5327 aputs ("\t.IMPORT $global$,DATA\n"
5328 "\t.IMPORT $$dyncall,MILLICODE\n");
5329 pa_file_start_mcount ("CODE");
5330 pa_file_start_file (0);
5333 static void
5334 pa_linux_file_start (void)
5336 pa_file_start_file (1);
5337 pa_file_start_level ();
5338 pa_file_start_mcount ("CODE");
5341 static void
5342 pa_hpux64_gas_file_start (void)
5344 pa_file_start_level ();
5345 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5346 if (profile_flag)
5347 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5348 #endif
5349 pa_file_start_file (1);
5352 static void
5353 pa_hpux64_hpas_file_start (void)
5355 pa_file_start_level ();
5356 pa_file_start_space (1);
5357 pa_file_start_mcount ("CODE");
5358 pa_file_start_file (0);
5360 #undef aputs
5362 /* Search the deferred plabel list for SYMBOL and return its internal
5363 label. If an entry for SYMBOL is not found, a new entry is created. */
5366 get_deferred_plabel (rtx symbol)
5368 const char *fname = XSTR (symbol, 0);
5369 size_t i;
5371 /* See if we have already put this function on the list of deferred
5372 plabels. This list is generally small, so a liner search is not
5373 too ugly. If it proves too slow replace it with something faster. */
5374 for (i = 0; i < n_deferred_plabels; i++)
5375 if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
5376 break;
5378 /* If the deferred plabel list is empty, or this entry was not found
5379 on the list, create a new entry on the list. */
5380 if (deferred_plabels == NULL || i == n_deferred_plabels)
5382 tree id;
5384 if (deferred_plabels == 0)
5385 deferred_plabels = ggc_alloc_deferred_plabel ();
5386 else
5387 deferred_plabels = GGC_RESIZEVEC (struct deferred_plabel,
5388 deferred_plabels,
5389 n_deferred_plabels + 1);
5391 i = n_deferred_plabels++;
5392 deferred_plabels[i].internal_label = gen_label_rtx ();
5393 deferred_plabels[i].symbol = symbol;
5395 /* Gross. We have just implicitly taken the address of this
5396 function. Mark it in the same manner as assemble_name. */
5397 id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5398 if (id)
5399 mark_referenced (id);
5402 return deferred_plabels[i].internal_label;
5405 static void
5406 output_deferred_plabels (void)
5408 size_t i;
5410 /* If we have some deferred plabels, then we need to switch into the
5411 data or readonly data section, and align it to a 4 byte boundary
5412 before outputting the deferred plabels. */
5413 if (n_deferred_plabels)
5415 switch_to_section (flag_pic ? data_section : readonly_data_section);
5416 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5419 /* Now output the deferred plabels. */
5420 for (i = 0; i < n_deferred_plabels; i++)
5422 targetm.asm_out.internal_label (asm_out_file, "L",
5423 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5424 assemble_integer (deferred_plabels[i].symbol,
5425 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5429 #ifdef HPUX_LONG_DOUBLE_LIBRARY
5430 /* Initialize optabs to point to HPUX long double emulation routines. */
5431 static void
5432 pa_hpux_init_libfuncs (void)
5434 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5435 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5436 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5437 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5438 set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5439 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5440 set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5441 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5442 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5444 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5445 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5446 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5447 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5448 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5449 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5450 set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5452 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5453 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5454 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5455 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5457 set_conv_libfunc (sfix_optab, SImode, TFmode, TARGET_64BIT
5458 ? "__U_Qfcnvfxt_quad_to_sgl"
5459 : "_U_Qfcnvfxt_quad_to_sgl");
5460 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
5461 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_usgl");
5462 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_udbl");
5464 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
5465 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
5466 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_U_Qfcnvxf_usgl_to_quad");
5467 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxf_udbl_to_quad");
5469 #endif
5471 /* HP's millicode routines mean something special to the assembler.
5472 Keep track of which ones we have used. */
5474 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5475 static void import_milli (enum millicodes);
5476 static char imported[(int) end1000];
5477 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5478 static const char import_string[] = ".IMPORT $$....,MILLICODE";
5479 #define MILLI_START 10
5481 static void
5482 import_milli (enum millicodes code)
5484 char str[sizeof (import_string)];
5486 if (!imported[(int) code])
5488 imported[(int) code] = 1;
5489 strcpy (str, import_string);
5490 strncpy (str + MILLI_START, milli_names[(int) code], 4);
5491 output_asm_insn (str, 0);
5495 /* The register constraints have put the operands and return value in
5496 the proper registers. */
5498 const char *
5499 output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx insn)
5501 import_milli (mulI);
5502 return output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5505 /* Emit the rtl for doing a division by a constant. */
5507 /* Do magic division millicodes exist for this value? */
5508 const int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5510 /* We'll use an array to keep track of the magic millicodes and
5511 whether or not we've used them already. [n][0] is signed, [n][1] is
5512 unsigned. */
5514 static int div_milli[16][2];
5517 emit_hpdiv_const (rtx *operands, int unsignedp)
5519 if (GET_CODE (operands[2]) == CONST_INT
5520 && INTVAL (operands[2]) > 0
5521 && INTVAL (operands[2]) < 16
5522 && magic_milli[INTVAL (operands[2])])
5524 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5526 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
5527 emit
5528 (gen_rtx_PARALLEL
5529 (VOIDmode,
5530 gen_rtvec (6, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29),
5531 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5532 SImode,
5533 gen_rtx_REG (SImode, 26),
5534 operands[2])),
5535 gen_rtx_CLOBBER (VOIDmode, operands[4]),
5536 gen_rtx_CLOBBER (VOIDmode, operands[3]),
5537 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5538 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
5539 gen_rtx_CLOBBER (VOIDmode, ret))));
5540 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
5541 return 1;
5543 return 0;
5546 const char *
5547 output_div_insn (rtx *operands, int unsignedp, rtx insn)
5549 int divisor;
5551 /* If the divisor is a constant, try to use one of the special
5552 opcodes .*/
5553 if (GET_CODE (operands[0]) == CONST_INT)
5555 static char buf[100];
5556 divisor = INTVAL (operands[0]);
5557 if (!div_milli[divisor][unsignedp])
5559 div_milli[divisor][unsignedp] = 1;
5560 if (unsignedp)
5561 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
5562 else
5563 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
5565 if (unsignedp)
5567 sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
5568 INTVAL (operands[0]));
5569 return output_millicode_call (insn,
5570 gen_rtx_SYMBOL_REF (SImode, buf));
5572 else
5574 sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
5575 INTVAL (operands[0]));
5576 return output_millicode_call (insn,
5577 gen_rtx_SYMBOL_REF (SImode, buf));
5580 /* Divisor isn't a special constant. */
5581 else
5583 if (unsignedp)
5585 import_milli (divU);
5586 return output_millicode_call (insn,
5587 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
5589 else
5591 import_milli (divI);
5592 return output_millicode_call (insn,
5593 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
5598 /* Output a $$rem millicode to do mod. */
5600 const char *
5601 output_mod_insn (int unsignedp, rtx insn)
5603 if (unsignedp)
5605 import_milli (remU);
5606 return output_millicode_call (insn,
5607 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
5609 else
5611 import_milli (remI);
5612 return output_millicode_call (insn,
5613 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
5617 void
5618 output_arg_descriptor (rtx call_insn)
5620 const char *arg_regs[4];
5621 enum machine_mode arg_mode;
5622 rtx link;
5623 int i, output_flag = 0;
5624 int regno;
5626 /* We neither need nor want argument location descriptors for the
5627 64bit runtime environment or the ELF32 environment. */
5628 if (TARGET_64BIT || TARGET_ELF32)
5629 return;
5631 for (i = 0; i < 4; i++)
5632 arg_regs[i] = 0;
5634 /* Specify explicitly that no argument relocations should take place
5635 if using the portable runtime calling conventions. */
5636 if (TARGET_PORTABLE_RUNTIME)
5638 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5639 asm_out_file);
5640 return;
5643 gcc_assert (GET_CODE (call_insn) == CALL_INSN);
5644 for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
5645 link; link = XEXP (link, 1))
5647 rtx use = XEXP (link, 0);
5649 if (! (GET_CODE (use) == USE
5650 && GET_CODE (XEXP (use, 0)) == REG
5651 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5652 continue;
5654 arg_mode = GET_MODE (XEXP (use, 0));
5655 regno = REGNO (XEXP (use, 0));
5656 if (regno >= 23 && regno <= 26)
5658 arg_regs[26 - regno] = "GR";
5659 if (arg_mode == DImode)
5660 arg_regs[25 - regno] = "GR";
5662 else if (regno >= 32 && regno <= 39)
5664 if (arg_mode == SFmode)
5665 arg_regs[(regno - 32) / 2] = "FR";
5666 else
5668 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
5669 arg_regs[(regno - 34) / 2] = "FR";
5670 arg_regs[(regno - 34) / 2 + 1] = "FU";
5671 #else
5672 arg_regs[(regno - 34) / 2] = "FU";
5673 arg_regs[(regno - 34) / 2 + 1] = "FR";
5674 #endif
5678 fputs ("\t.CALL ", asm_out_file);
5679 for (i = 0; i < 4; i++)
5681 if (arg_regs[i])
5683 if (output_flag++)
5684 fputc (',', asm_out_file);
5685 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
5688 fputc ('\n', asm_out_file);
5691 static reg_class_t
5692 pa_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
5693 enum machine_mode mode, secondary_reload_info *sri)
5695 int is_symbolic, regno;
5696 enum reg_class rclass = (enum reg_class) rclass_i;
5698 /* Handle the easy stuff first. */
5699 if (rclass == R1_REGS)
5700 return NO_REGS;
5702 if (REG_P (x))
5704 regno = REGNO (x);
5705 if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
5706 return NO_REGS;
5708 else
5709 regno = -1;
5711 /* If we have something like (mem (mem (...)), we can safely assume the
5712 inner MEM will end up in a general register after reloading, so there's
5713 no need for a secondary reload. */
5714 if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
5715 return NO_REGS;
5717 /* Trying to load a constant into a FP register during PIC code
5718 generation requires %r1 as a scratch register. */
5719 if (flag_pic
5720 && (mode == SImode || mode == DImode)
5721 && FP_REG_CLASS_P (rclass)
5722 && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
5724 sri->icode = (mode == SImode ? CODE_FOR_reload_insi_r1
5725 : CODE_FOR_reload_indi_r1);
5726 return NO_REGS;
5729 /* Profiling showed the PA port spends about 1.3% of its compilation
5730 time in true_regnum from calls inside pa_secondary_reload_class. */
5731 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
5732 regno = true_regnum (x);
5734 /* In order to allow 14-bit displacements in integer loads and stores,
5735 we need to prevent reload from generating out of range integer mode
5736 loads and stores to the floating point registers. Previously, we
5737 used to call for a secondary reload and have emit_move_sequence()
5738 fix the instruction sequence. However, reload occasionally wouldn't
5739 generate the reload and we would end up with an invalid REG+D memory
5740 address. So, now we use an intermediate general register for most
5741 memory loads and stores. */
5742 if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
5743 && GET_MODE_CLASS (mode) == MODE_INT
5744 && FP_REG_CLASS_P (rclass))
5746 /* Reload passes (mem:SI (reg/f:DI 30 %r30) when it wants to check
5747 the secondary reload needed for a pseudo. It never passes a
5748 REG+D address. */
5749 if (GET_CODE (x) == MEM)
5751 x = XEXP (x, 0);
5753 /* We don't need an intermediate for indexed and LO_SUM DLT
5754 memory addresses. When INT14_OK_STRICT is true, it might
5755 appear that we could directly allow register indirect
5756 memory addresses. However, this doesn't work because we
5757 don't support SUBREGs in floating-point register copies
5758 and reload doesn't tell us when it's going to use a SUBREG. */
5759 if (IS_INDEX_ADDR_P (x)
5760 || IS_LO_SUM_DLT_ADDR_P (x))
5761 return NO_REGS;
5763 /* Otherwise, we need an intermediate general register. */
5764 return GENERAL_REGS;
5767 /* Request a secondary reload with a general scratch register
5768 for everthing else. ??? Could symbolic operands be handled
5769 directly when generating non-pic PA 2.0 code? */
5770 sri->icode = in_p ? reload_in_optab[mode] : reload_out_optab[mode];
5771 return NO_REGS;
5774 /* We need a secondary register (GPR) for copies between the SAR
5775 and anything other than a general register. */
5776 if (rclass == SHIFT_REGS && (regno <= 0 || regno >= 32))
5778 sri->icode = in_p ? reload_in_optab[mode] : reload_out_optab[mode];
5779 return NO_REGS;
5782 /* A SAR<->FP register copy requires a secondary register (GPR) as
5783 well as secondary memory. */
5784 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
5785 && (REGNO_REG_CLASS (regno) == SHIFT_REGS
5786 && FP_REG_CLASS_P (rclass)))
5788 sri->icode = in_p ? reload_in_optab[mode] : reload_out_optab[mode];
5789 return NO_REGS;
5792 /* Secondary reloads of symbolic operands require %r1 as a scratch
5793 register when we're generating PIC code and when the operand isn't
5794 readonly. */
5795 if (GET_CODE (x) == HIGH)
5796 x = XEXP (x, 0);
5798 /* Profiling has showed GCC spends about 2.6% of its compilation
5799 time in symbolic_operand from calls inside pa_secondary_reload_class.
5800 So, we use an inline copy to avoid useless work. */
5801 switch (GET_CODE (x))
5803 rtx op;
5805 case SYMBOL_REF:
5806 is_symbolic = !SYMBOL_REF_TLS_MODEL (x);
5807 break;
5808 case LABEL_REF:
5809 is_symbolic = 1;
5810 break;
5811 case CONST:
5812 op = XEXP (x, 0);
5813 is_symbolic = (GET_CODE (op) == PLUS
5814 && ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
5815 && !SYMBOL_REF_TLS_MODEL (XEXP (op, 0)))
5816 || GET_CODE (XEXP (op, 0)) == LABEL_REF)
5817 && GET_CODE (XEXP (op, 1)) == CONST_INT);
5818 break;
5819 default:
5820 is_symbolic = 0;
5821 break;
5824 if (is_symbolic && (flag_pic || !read_only_operand (x, VOIDmode)))
5826 gcc_assert (mode == SImode || mode == DImode);
5827 sri->icode = (mode == SImode ? CODE_FOR_reload_insi_r1
5828 : CODE_FOR_reload_indi_r1);
5831 return NO_REGS;
5834 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY. The argument pointer
5835 is only marked as live on entry by df-scan when it is a fixed
5836 register. It isn't a fixed register in the 64-bit runtime,
5837 so we need to mark it here. */
5839 static void
5840 pa_extra_live_on_entry (bitmap regs)
5842 if (TARGET_64BIT)
5843 bitmap_set_bit (regs, ARG_POINTER_REGNUM);
5846 /* Implement EH_RETURN_HANDLER_RTX. The MEM needs to be volatile
5847 to prevent it from being deleted. */
5850 pa_eh_return_handler_rtx (void)
5852 rtx tmp;
5854 tmp = gen_rtx_PLUS (word_mode, frame_pointer_rtx,
5855 TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20));
5856 tmp = gen_rtx_MEM (word_mode, tmp);
5857 tmp->volatil = 1;
5858 return tmp;
5861 /* In the 32-bit runtime, arguments larger than eight bytes are passed
5862 by invisible reference. As a GCC extension, we also pass anything
5863 with a zero or variable size by reference.
5865 The 64-bit runtime does not describe passing any types by invisible
5866 reference. The internals of GCC can't currently handle passing
5867 empty structures, and zero or variable length arrays when they are
5868 not passed entirely on the stack or by reference. Thus, as a GCC
5869 extension, we pass these types by reference. The HP compiler doesn't
5870 support these types, so hopefully there shouldn't be any compatibility
5871 issues. This may have to be revisited when HP releases a C99 compiler
5872 or updates the ABI. */
5874 static bool
5875 pa_pass_by_reference (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED,
5876 enum machine_mode mode, const_tree type,
5877 bool named ATTRIBUTE_UNUSED)
5879 HOST_WIDE_INT size;
5881 if (type)
5882 size = int_size_in_bytes (type);
5883 else
5884 size = GET_MODE_SIZE (mode);
5886 if (TARGET_64BIT)
5887 return size <= 0;
5888 else
5889 return size <= 0 || size > 8;
5892 enum direction
5893 function_arg_padding (enum machine_mode mode, const_tree type)
5895 if (mode == BLKmode
5896 || (TARGET_64BIT
5897 && type
5898 && (AGGREGATE_TYPE_P (type)
5899 || TREE_CODE (type) == COMPLEX_TYPE
5900 || TREE_CODE (type) == VECTOR_TYPE)))
5902 /* Return none if justification is not required. */
5903 if (type
5904 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
5905 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
5906 return none;
5908 /* The directions set here are ignored when a BLKmode argument larger
5909 than a word is placed in a register. Different code is used for
5910 the stack and registers. This makes it difficult to have a
5911 consistent data representation for both the stack and registers.
5912 For both runtimes, the justification and padding for arguments on
5913 the stack and in registers should be identical. */
5914 if (TARGET_64BIT)
5915 /* The 64-bit runtime specifies left justification for aggregates. */
5916 return upward;
5917 else
5918 /* The 32-bit runtime architecture specifies right justification.
5919 When the argument is passed on the stack, the argument is padded
5920 with garbage on the left. The HP compiler pads with zeros. */
5921 return downward;
5924 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
5925 return downward;
5926 else
5927 return none;
5931 /* Do what is necessary for `va_start'. We look at the current function
5932 to determine if stdargs or varargs is used and fill in an initial
5933 va_list. A pointer to this constructor is returned. */
5935 static rtx
5936 hppa_builtin_saveregs (void)
5938 rtx offset, dest;
5939 tree fntype = TREE_TYPE (current_function_decl);
5940 int argadj = ((!(TYPE_ARG_TYPES (fntype) != 0
5941 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
5942 != void_type_node)))
5943 ? UNITS_PER_WORD : 0);
5945 if (argadj)
5946 offset = plus_constant (crtl->args.arg_offset_rtx, argadj);
5947 else
5948 offset = crtl->args.arg_offset_rtx;
5950 if (TARGET_64BIT)
5952 int i, off;
5954 /* Adjust for varargs/stdarg differences. */
5955 if (argadj)
5956 offset = plus_constant (crtl->args.arg_offset_rtx, -argadj);
5957 else
5958 offset = crtl->args.arg_offset_rtx;
5960 /* We need to save %r26 .. %r19 inclusive starting at offset -64
5961 from the incoming arg pointer and growing to larger addresses. */
5962 for (i = 26, off = -64; i >= 19; i--, off += 8)
5963 emit_move_insn (gen_rtx_MEM (word_mode,
5964 plus_constant (arg_pointer_rtx, off)),
5965 gen_rtx_REG (word_mode, i));
5967 /* The incoming args pointer points just beyond the flushback area;
5968 normally this is not a serious concern. However, when we are doing
5969 varargs/stdargs we want to make the arg pointer point to the start
5970 of the incoming argument area. */
5971 emit_move_insn (virtual_incoming_args_rtx,
5972 plus_constant (arg_pointer_rtx, -64));
5974 /* Now return a pointer to the first anonymous argument. */
5975 return copy_to_reg (expand_binop (Pmode, add_optab,
5976 virtual_incoming_args_rtx,
5977 offset, 0, 0, OPTAB_LIB_WIDEN));
5980 /* Store general registers on the stack. */
5981 dest = gen_rtx_MEM (BLKmode,
5982 plus_constant (crtl->args.internal_arg_pointer,
5983 -16));
5984 set_mem_alias_set (dest, get_varargs_alias_set ());
5985 set_mem_align (dest, BITS_PER_WORD);
5986 move_block_from_reg (23, dest, 4);
5988 /* move_block_from_reg will emit code to store the argument registers
5989 individually as scalar stores.
5991 However, other insns may later load from the same addresses for
5992 a structure load (passing a struct to a varargs routine).
5994 The alias code assumes that such aliasing can never happen, so we
5995 have to keep memory referencing insns from moving up beyond the
5996 last argument register store. So we emit a blockage insn here. */
5997 emit_insn (gen_blockage ());
5999 return copy_to_reg (expand_binop (Pmode, add_optab,
6000 crtl->args.internal_arg_pointer,
6001 offset, 0, 0, OPTAB_LIB_WIDEN));
6004 static void
6005 hppa_va_start (tree valist, rtx nextarg)
6007 nextarg = expand_builtin_saveregs ();
6008 std_expand_builtin_va_start (valist, nextarg);
6011 static tree
6012 hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6013 gimple_seq *post_p)
6015 if (TARGET_64BIT)
6017 /* Args grow upward. We can use the generic routines. */
6018 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6020 else /* !TARGET_64BIT */
6022 tree ptr = build_pointer_type (type);
6023 tree valist_type;
6024 tree t, u;
6025 unsigned int size, ofs;
6026 bool indirect;
6028 indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0);
6029 if (indirect)
6031 type = ptr;
6032 ptr = build_pointer_type (type);
6034 size = int_size_in_bytes (type);
6035 valist_type = TREE_TYPE (valist);
6037 /* Args grow down. Not handled by generic routines. */
6039 u = fold_convert (sizetype, size_in_bytes (type));
6040 u = fold_build1 (NEGATE_EXPR, sizetype, u);
6041 t = build2 (POINTER_PLUS_EXPR, valist_type, valist, u);
6043 /* Copied from va-pa.h, but we probably don't need to align to
6044 word size, since we generate and preserve that invariant. */
6045 u = size_int (size > 4 ? -8 : -4);
6046 t = fold_convert (sizetype, t);
6047 t = build2 (BIT_AND_EXPR, sizetype, t, u);
6048 t = fold_convert (valist_type, t);
6050 t = build2 (MODIFY_EXPR, valist_type, valist, t);
6052 ofs = (8 - size) % 4;
6053 if (ofs != 0)
6055 u = size_int (ofs);
6056 t = build2 (POINTER_PLUS_EXPR, valist_type, t, u);
6059 t = fold_convert (ptr, t);
6060 t = build_va_arg_indirect_ref (t);
6062 if (indirect)
6063 t = build_va_arg_indirect_ref (t);
6065 return t;
6069 /* True if MODE is valid for the target. By "valid", we mean able to
6070 be manipulated in non-trivial ways. In particular, this means all
6071 the arithmetic is supported.
6073 Currently, TImode is not valid as the HP 64-bit runtime documentation
6074 doesn't document the alignment and calling conventions for this type.
6075 Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
6076 2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE. */
6078 static bool
6079 pa_scalar_mode_supported_p (enum machine_mode mode)
6081 int precision = GET_MODE_PRECISION (mode);
6083 switch (GET_MODE_CLASS (mode))
6085 case MODE_PARTIAL_INT:
6086 case MODE_INT:
6087 if (precision == CHAR_TYPE_SIZE)
6088 return true;
6089 if (precision == SHORT_TYPE_SIZE)
6090 return true;
6091 if (precision == INT_TYPE_SIZE)
6092 return true;
6093 if (precision == LONG_TYPE_SIZE)
6094 return true;
6095 if (precision == LONG_LONG_TYPE_SIZE)
6096 return true;
6097 return false;
6099 case MODE_FLOAT:
6100 if (precision == FLOAT_TYPE_SIZE)
6101 return true;
6102 if (precision == DOUBLE_TYPE_SIZE)
6103 return true;
6104 if (precision == LONG_DOUBLE_TYPE_SIZE)
6105 return true;
6106 return false;
6108 case MODE_DECIMAL_FLOAT:
6109 return false;
6111 default:
6112 gcc_unreachable ();
6116 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
6117 it branches to the next real instruction. Otherwise, return FALSE. */
6119 static bool
6120 branch_to_delay_slot_p (rtx insn)
6122 if (dbr_sequence_length ())
6123 return FALSE;
6125 return next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn);
6128 /* Return TRUE if INSN, a jump insn, needs a nop in its delay slot.
6130 This occurs when INSN has an unfilled delay slot and is followed
6131 by an ASM_INPUT. Disaster can occur if the ASM_INPUT is empty and
6132 the jump branches into the delay slot. So, we add a nop in the delay
6133 slot just to be safe. This messes up our instruction count, but we
6134 don't know how big the ASM_INPUT insn is anyway. */
6136 static bool
6137 branch_needs_nop_p (rtx insn)
6139 rtx next_insn;
6141 if (dbr_sequence_length ())
6142 return FALSE;
6144 next_insn = next_real_insn (insn);
6145 return GET_CODE (PATTERN (next_insn)) == ASM_INPUT;
6148 /* This routine handles all the normal conditional branch sequences we
6149 might need to generate. It handles compare immediate vs compare
6150 register, nullification of delay slots, varying length branches,
6151 negated branches, and all combinations of the above. It returns the
6152 output appropriate to emit the branch corresponding to all given
6153 parameters. */
6155 const char *
6156 output_cbranch (rtx *operands, int negated, rtx insn)
6158 static char buf[100];
6159 int useskip = 0;
6160 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6161 int length = get_attr_length (insn);
6162 int xdelay;
6164 /* A conditional branch to the following instruction (e.g. the delay slot)
6165 is asking for a disaster. This can happen when not optimizing and
6166 when jump optimization fails.
6168 While it is usually safe to emit nothing, this can fail if the
6169 preceding instruction is a nullified branch with an empty delay
6170 slot and the same branch target as this branch. We could check
6171 for this but jump optimization should eliminate nop jumps. It
6172 is always safe to emit a nop. */
6173 if (branch_to_delay_slot_p (insn))
6174 return "nop";
6176 /* The doubleword form of the cmpib instruction doesn't have the LEU
6177 and GTU conditions while the cmpb instruction does. Since we accept
6178 zero for cmpb, we must ensure that we use cmpb for the comparison. */
6179 if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6180 operands[2] = gen_rtx_REG (DImode, 0);
6181 if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
6182 operands[1] = gen_rtx_REG (DImode, 0);
6184 /* If this is a long branch with its delay slot unfilled, set `nullify'
6185 as it can nullify the delay slot and save a nop. */
6186 if (length == 8 && dbr_sequence_length () == 0)
6187 nullify = 1;
6189 /* If this is a short forward conditional branch which did not get
6190 its delay slot filled, the delay slot can still be nullified. */
6191 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6192 nullify = forward_branch_p (insn);
6194 /* A forward branch over a single nullified insn can be done with a
6195 comclr instruction. This avoids a single cycle penalty due to
6196 mis-predicted branch if we fall through (branch not taken). */
6197 if (length == 4
6198 && next_real_insn (insn) != 0
6199 && get_attr_length (next_real_insn (insn)) == 4
6200 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6201 && nullify)
6202 useskip = 1;
6204 switch (length)
6206 /* All short conditional branches except backwards with an unfilled
6207 delay slot. */
6208 case 4:
6209 if (useskip)
6210 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6211 else
6212 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6213 if (GET_MODE (operands[1]) == DImode)
6214 strcat (buf, "*");
6215 if (negated)
6216 strcat (buf, "%B3");
6217 else
6218 strcat (buf, "%S3");
6219 if (useskip)
6220 strcat (buf, " %2,%r1,%%r0");
6221 else if (nullify)
6223 if (branch_needs_nop_p (insn))
6224 strcat (buf, ",n %2,%r1,%0%#");
6225 else
6226 strcat (buf, ",n %2,%r1,%0");
6228 else
6229 strcat (buf, " %2,%r1,%0");
6230 break;
6232 /* All long conditionals. Note a short backward branch with an
6233 unfilled delay slot is treated just like a long backward branch
6234 with an unfilled delay slot. */
6235 case 8:
6236 /* Handle weird backwards branch with a filled delay slot
6237 which is nullified. */
6238 if (dbr_sequence_length () != 0
6239 && ! forward_branch_p (insn)
6240 && nullify)
6242 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6243 if (GET_MODE (operands[1]) == DImode)
6244 strcat (buf, "*");
6245 if (negated)
6246 strcat (buf, "%S3");
6247 else
6248 strcat (buf, "%B3");
6249 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6251 /* Handle short backwards branch with an unfilled delay slot.
6252 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6253 taken and untaken branches. */
6254 else if (dbr_sequence_length () == 0
6255 && ! forward_branch_p (insn)
6256 && INSN_ADDRESSES_SET_P ()
6257 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6258 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6260 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6261 if (GET_MODE (operands[1]) == DImode)
6262 strcat (buf, "*");
6263 if (negated)
6264 strcat (buf, "%B3 %2,%r1,%0%#");
6265 else
6266 strcat (buf, "%S3 %2,%r1,%0%#");
6268 else
6270 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6271 if (GET_MODE (operands[1]) == DImode)
6272 strcat (buf, "*");
6273 if (negated)
6274 strcat (buf, "%S3");
6275 else
6276 strcat (buf, "%B3");
6277 if (nullify)
6278 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6279 else
6280 strcat (buf, " %2,%r1,%%r0\n\tb %0");
6282 break;
6284 default:
6285 /* The reversed conditional branch must branch over one additional
6286 instruction if the delay slot is filled and needs to be extracted
6287 by output_lbranch. If the delay slot is empty or this is a
6288 nullified forward branch, the instruction after the reversed
6289 condition branch must be nullified. */
6290 if (dbr_sequence_length () == 0
6291 || (nullify && forward_branch_p (insn)))
6293 nullify = 1;
6294 xdelay = 0;
6295 operands[4] = GEN_INT (length);
6297 else
6299 xdelay = 1;
6300 operands[4] = GEN_INT (length + 4);
6303 /* Create a reversed conditional branch which branches around
6304 the following insns. */
6305 if (GET_MODE (operands[1]) != DImode)
6307 if (nullify)
6309 if (negated)
6310 strcpy (buf,
6311 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6312 else
6313 strcpy (buf,
6314 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6316 else
6318 if (negated)
6319 strcpy (buf,
6320 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6321 else
6322 strcpy (buf,
6323 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6326 else
6328 if (nullify)
6330 if (negated)
6331 strcpy (buf,
6332 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6333 else
6334 strcpy (buf,
6335 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6337 else
6339 if (negated)
6340 strcpy (buf,
6341 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6342 else
6343 strcpy (buf,
6344 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6348 output_asm_insn (buf, operands);
6349 return output_lbranch (operands[0], insn, xdelay);
6351 return buf;
6354 /* This routine handles output of long unconditional branches that
6355 exceed the maximum range of a simple branch instruction. Since
6356 we don't have a register available for the branch, we save register
6357 %r1 in the frame marker, load the branch destination DEST into %r1,
6358 execute the branch, and restore %r1 in the delay slot of the branch.
6360 Since long branches may have an insn in the delay slot and the
6361 delay slot is used to restore %r1, we in general need to extract
6362 this insn and execute it before the branch. However, to facilitate
6363 use of this function by conditional branches, we also provide an
6364 option to not extract the delay insn so that it will be emitted
6365 after the long branch. So, if there is an insn in the delay slot,
6366 it is extracted if XDELAY is nonzero.
6368 The lengths of the various long-branch sequences are 20, 16 and 24
6369 bytes for the portable runtime, non-PIC and PIC cases, respectively. */
6371 const char *
6372 output_lbranch (rtx dest, rtx insn, int xdelay)
6374 rtx xoperands[2];
6376 xoperands[0] = dest;
6378 /* First, free up the delay slot. */
6379 if (xdelay && dbr_sequence_length () != 0)
6381 /* We can't handle a jump in the delay slot. */
6382 gcc_assert (GET_CODE (NEXT_INSN (insn)) != JUMP_INSN);
6384 final_scan_insn (NEXT_INSN (insn), asm_out_file,
6385 optimize, 0, NULL);
6387 /* Now delete the delay insn. */
6388 SET_INSN_DELETED (NEXT_INSN (insn));
6391 /* Output an insn to save %r1. The runtime documentation doesn't
6392 specify whether the "Clean Up" slot in the callers frame can
6393 be clobbered by the callee. It isn't copied by HP's builtin
6394 alloca, so this suggests that it can be clobbered if necessary.
6395 The "Static Link" location is copied by HP builtin alloca, so
6396 we avoid using it. Using the cleanup slot might be a problem
6397 if we have to interoperate with languages that pass cleanup
6398 information. However, it should be possible to handle these
6399 situations with GCC's asm feature.
6401 The "Current RP" slot is reserved for the called procedure, so
6402 we try to use it when we don't have a frame of our own. It's
6403 rather unlikely that we won't have a frame when we need to emit
6404 a very long branch.
6406 Really the way to go long term is a register scavenger; goto
6407 the target of the jump and find a register which we can use
6408 as a scratch to hold the value in %r1. Then, we wouldn't have
6409 to free up the delay slot or clobber a slot that may be needed
6410 for other purposes. */
6411 if (TARGET_64BIT)
6413 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6414 /* Use the return pointer slot in the frame marker. */
6415 output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
6416 else
6417 /* Use the slot at -40 in the frame marker since HP builtin
6418 alloca doesn't copy it. */
6419 output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
6421 else
6423 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6424 /* Use the return pointer slot in the frame marker. */
6425 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
6426 else
6427 /* Use the "Clean Up" slot in the frame marker. In GCC,
6428 the only other use of this location is for copying a
6429 floating point double argument from a floating-point
6430 register to two general registers. The copy is done
6431 as an "atomic" operation when outputting a call, so it
6432 won't interfere with our using the location here. */
6433 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
6436 if (TARGET_PORTABLE_RUNTIME)
6438 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6439 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6440 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6442 else if (flag_pic)
6444 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6445 if (TARGET_SOM || !TARGET_GAS)
6447 xoperands[1] = gen_label_rtx ();
6448 output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands);
6449 targetm.asm_out.internal_label (asm_out_file, "L",
6450 CODE_LABEL_NUMBER (xoperands[1]));
6451 output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands);
6453 else
6455 output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands);
6456 output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
6458 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6460 else
6461 /* Now output a very long branch to the original target. */
6462 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
6464 /* Now restore the value of %r1 in the delay slot. */
6465 if (TARGET_64BIT)
6467 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6468 return "ldd -16(%%r30),%%r1";
6469 else
6470 return "ldd -40(%%r30),%%r1";
6472 else
6474 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6475 return "ldw -20(%%r30),%%r1";
6476 else
6477 return "ldw -12(%%r30),%%r1";
6481 /* This routine handles all the branch-on-bit conditional branch sequences we
6482 might need to generate. It handles nullification of delay slots,
6483 varying length branches, negated branches and all combinations of the
6484 above. it returns the appropriate output template to emit the branch. */
6486 const char *
6487 output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn, int which)
6489 static char buf[100];
6490 int useskip = 0;
6491 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6492 int length = get_attr_length (insn);
6493 int xdelay;
6495 /* A conditional branch to the following instruction (e.g. the delay slot) is
6496 asking for a disaster. I do not think this can happen as this pattern
6497 is only used when optimizing; jump optimization should eliminate the
6498 jump. But be prepared just in case. */
6500 if (branch_to_delay_slot_p (insn))
6501 return "nop";
6503 /* If this is a long branch with its delay slot unfilled, set `nullify'
6504 as it can nullify the delay slot and save a nop. */
6505 if (length == 8 && dbr_sequence_length () == 0)
6506 nullify = 1;
6508 /* If this is a short forward conditional branch which did not get
6509 its delay slot filled, the delay slot can still be nullified. */
6510 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6511 nullify = forward_branch_p (insn);
6513 /* A forward branch over a single nullified insn can be done with a
6514 extrs instruction. This avoids a single cycle penalty due to
6515 mis-predicted branch if we fall through (branch not taken). */
6517 if (length == 4
6518 && next_real_insn (insn) != 0
6519 && get_attr_length (next_real_insn (insn)) == 4
6520 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6521 && nullify)
6522 useskip = 1;
6524 switch (length)
6527 /* All short conditional branches except backwards with an unfilled
6528 delay slot. */
6529 case 4:
6530 if (useskip)
6531 strcpy (buf, "{extrs,|extrw,s,}");
6532 else
6533 strcpy (buf, "bb,");
6534 if (useskip && GET_MODE (operands[0]) == DImode)
6535 strcpy (buf, "extrd,s,*");
6536 else if (GET_MODE (operands[0]) == DImode)
6537 strcpy (buf, "bb,*");
6538 if ((which == 0 && negated)
6539 || (which == 1 && ! negated))
6540 strcat (buf, ">=");
6541 else
6542 strcat (buf, "<");
6543 if (useskip)
6544 strcat (buf, " %0,%1,1,%%r0");
6545 else if (nullify && negated)
6547 if (branch_needs_nop_p (insn))
6548 strcat (buf, ",n %0,%1,%3%#");
6549 else
6550 strcat (buf, ",n %0,%1,%3");
6552 else if (nullify && ! negated)
6554 if (branch_needs_nop_p (insn))
6555 strcat (buf, ",n %0,%1,%2%#");
6556 else
6557 strcat (buf, ",n %0,%1,%2");
6559 else if (! nullify && negated)
6560 strcat (buf, " %0,%1,%3");
6561 else if (! nullify && ! negated)
6562 strcat (buf, " %0,%1,%2");
6563 break;
6565 /* All long conditionals. Note a short backward branch with an
6566 unfilled delay slot is treated just like a long backward branch
6567 with an unfilled delay slot. */
6568 case 8:
6569 /* Handle weird backwards branch with a filled delay slot
6570 which is nullified. */
6571 if (dbr_sequence_length () != 0
6572 && ! forward_branch_p (insn)
6573 && nullify)
6575 strcpy (buf, "bb,");
6576 if (GET_MODE (operands[0]) == DImode)
6577 strcat (buf, "*");
6578 if ((which == 0 && negated)
6579 || (which == 1 && ! negated))
6580 strcat (buf, "<");
6581 else
6582 strcat (buf, ">=");
6583 if (negated)
6584 strcat (buf, ",n %0,%1,.+12\n\tb %3");
6585 else
6586 strcat (buf, ",n %0,%1,.+12\n\tb %2");
6588 /* Handle short backwards branch with an unfilled delay slot.
6589 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6590 taken and untaken branches. */
6591 else if (dbr_sequence_length () == 0
6592 && ! forward_branch_p (insn)
6593 && INSN_ADDRESSES_SET_P ()
6594 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6595 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6597 strcpy (buf, "bb,");
6598 if (GET_MODE (operands[0]) == DImode)
6599 strcat (buf, "*");
6600 if ((which == 0 && negated)
6601 || (which == 1 && ! negated))
6602 strcat (buf, ">=");
6603 else
6604 strcat (buf, "<");
6605 if (negated)
6606 strcat (buf, " %0,%1,%3%#");
6607 else
6608 strcat (buf, " %0,%1,%2%#");
6610 else
6612 if (GET_MODE (operands[0]) == DImode)
6613 strcpy (buf, "extrd,s,*");
6614 else
6615 strcpy (buf, "{extrs,|extrw,s,}");
6616 if ((which == 0 && negated)
6617 || (which == 1 && ! negated))
6618 strcat (buf, "<");
6619 else
6620 strcat (buf, ">=");
6621 if (nullify && negated)
6622 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
6623 else if (nullify && ! negated)
6624 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
6625 else if (negated)
6626 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
6627 else
6628 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
6630 break;
6632 default:
6633 /* The reversed conditional branch must branch over one additional
6634 instruction if the delay slot is filled and needs to be extracted
6635 by output_lbranch. If the delay slot is empty or this is a
6636 nullified forward branch, the instruction after the reversed
6637 condition branch must be nullified. */
6638 if (dbr_sequence_length () == 0
6639 || (nullify && forward_branch_p (insn)))
6641 nullify = 1;
6642 xdelay = 0;
6643 operands[4] = GEN_INT (length);
6645 else
6647 xdelay = 1;
6648 operands[4] = GEN_INT (length + 4);
6651 if (GET_MODE (operands[0]) == DImode)
6652 strcpy (buf, "bb,*");
6653 else
6654 strcpy (buf, "bb,");
6655 if ((which == 0 && negated)
6656 || (which == 1 && !negated))
6657 strcat (buf, "<");
6658 else
6659 strcat (buf, ">=");
6660 if (nullify)
6661 strcat (buf, ",n %0,%1,.+%4");
6662 else
6663 strcat (buf, " %0,%1,.+%4");
6664 output_asm_insn (buf, operands);
6665 return output_lbranch (negated ? operands[3] : operands[2],
6666 insn, xdelay);
6668 return buf;
6671 /* This routine handles all the branch-on-variable-bit conditional branch
6672 sequences we might need to generate. It handles nullification of delay
6673 slots, varying length branches, negated branches and all combinations
6674 of the above. it returns the appropriate output template to emit the
6675 branch. */
6677 const char *
6678 output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn, int which)
6680 static char buf[100];
6681 int useskip = 0;
6682 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6683 int length = get_attr_length (insn);
6684 int xdelay;
6686 /* A conditional branch to the following instruction (e.g. the delay slot) is
6687 asking for a disaster. I do not think this can happen as this pattern
6688 is only used when optimizing; jump optimization should eliminate the
6689 jump. But be prepared just in case. */
6691 if (branch_to_delay_slot_p (insn))
6692 return "nop";
6694 /* If this is a long branch with its delay slot unfilled, set `nullify'
6695 as it can nullify the delay slot and save a nop. */
6696 if (length == 8 && dbr_sequence_length () == 0)
6697 nullify = 1;
6699 /* If this is a short forward conditional branch which did not get
6700 its delay slot filled, the delay slot can still be nullified. */
6701 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6702 nullify = forward_branch_p (insn);
6704 /* A forward branch over a single nullified insn can be done with a
6705 extrs instruction. This avoids a single cycle penalty due to
6706 mis-predicted branch if we fall through (branch not taken). */
6708 if (length == 4
6709 && next_real_insn (insn) != 0
6710 && get_attr_length (next_real_insn (insn)) == 4
6711 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6712 && nullify)
6713 useskip = 1;
6715 switch (length)
6718 /* All short conditional branches except backwards with an unfilled
6719 delay slot. */
6720 case 4:
6721 if (useskip)
6722 strcpy (buf, "{vextrs,|extrw,s,}");
6723 else
6724 strcpy (buf, "{bvb,|bb,}");
6725 if (useskip && GET_MODE (operands[0]) == DImode)
6726 strcpy (buf, "extrd,s,*");
6727 else if (GET_MODE (operands[0]) == DImode)
6728 strcpy (buf, "bb,*");
6729 if ((which == 0 && negated)
6730 || (which == 1 && ! negated))
6731 strcat (buf, ">=");
6732 else
6733 strcat (buf, "<");
6734 if (useskip)
6735 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
6736 else if (nullify && negated)
6738 if (branch_needs_nop_p (insn))
6739 strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
6740 else
6741 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
6743 else if (nullify && ! negated)
6745 if (branch_needs_nop_p (insn))
6746 strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
6747 else
6748 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
6750 else if (! nullify && negated)
6751 strcat (buf, "{ %0,%3| %0,%%sar,%3}");
6752 else if (! nullify && ! negated)
6753 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
6754 break;
6756 /* All long conditionals. Note a short backward branch with an
6757 unfilled delay slot is treated just like a long backward branch
6758 with an unfilled delay slot. */
6759 case 8:
6760 /* Handle weird backwards branch with a filled delay slot
6761 which is nullified. */
6762 if (dbr_sequence_length () != 0
6763 && ! forward_branch_p (insn)
6764 && nullify)
6766 strcpy (buf, "{bvb,|bb,}");
6767 if (GET_MODE (operands[0]) == DImode)
6768 strcat (buf, "*");
6769 if ((which == 0 && negated)
6770 || (which == 1 && ! negated))
6771 strcat (buf, "<");
6772 else
6773 strcat (buf, ">=");
6774 if (negated)
6775 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
6776 else
6777 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
6779 /* Handle short backwards branch with an unfilled delay slot.
6780 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6781 taken and untaken branches. */
6782 else if (dbr_sequence_length () == 0
6783 && ! forward_branch_p (insn)
6784 && INSN_ADDRESSES_SET_P ()
6785 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6786 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6788 strcpy (buf, "{bvb,|bb,}");
6789 if (GET_MODE (operands[0]) == DImode)
6790 strcat (buf, "*");
6791 if ((which == 0 && negated)
6792 || (which == 1 && ! negated))
6793 strcat (buf, ">=");
6794 else
6795 strcat (buf, "<");
6796 if (negated)
6797 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
6798 else
6799 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
6801 else
6803 strcpy (buf, "{vextrs,|extrw,s,}");
6804 if (GET_MODE (operands[0]) == DImode)
6805 strcpy (buf, "extrd,s,*");
6806 if ((which == 0 && negated)
6807 || (which == 1 && ! negated))
6808 strcat (buf, "<");
6809 else
6810 strcat (buf, ">=");
6811 if (nullify && negated)
6812 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
6813 else if (nullify && ! negated)
6814 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
6815 else if (negated)
6816 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
6817 else
6818 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
6820 break;
6822 default:
6823 /* The reversed conditional branch must branch over one additional
6824 instruction if the delay slot is filled and needs to be extracted
6825 by output_lbranch. If the delay slot is empty or this is a
6826 nullified forward branch, the instruction after the reversed
6827 condition branch must be nullified. */
6828 if (dbr_sequence_length () == 0
6829 || (nullify && forward_branch_p (insn)))
6831 nullify = 1;
6832 xdelay = 0;
6833 operands[4] = GEN_INT (length);
6835 else
6837 xdelay = 1;
6838 operands[4] = GEN_INT (length + 4);
6841 if (GET_MODE (operands[0]) == DImode)
6842 strcpy (buf, "bb,*");
6843 else
6844 strcpy (buf, "{bvb,|bb,}");
6845 if ((which == 0 && negated)
6846 || (which == 1 && !negated))
6847 strcat (buf, "<");
6848 else
6849 strcat (buf, ">=");
6850 if (nullify)
6851 strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
6852 else
6853 strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
6854 output_asm_insn (buf, operands);
6855 return output_lbranch (negated ? operands[3] : operands[2],
6856 insn, xdelay);
6858 return buf;
6861 /* Return the output template for emitting a dbra type insn.
6863 Note it may perform some output operations on its own before
6864 returning the final output string. */
6865 const char *
6866 output_dbra (rtx *operands, rtx insn, int which_alternative)
6868 int length = get_attr_length (insn);
6870 /* A conditional branch to the following instruction (e.g. the delay slot) is
6871 asking for a disaster. Be prepared! */
6873 if (branch_to_delay_slot_p (insn))
6875 if (which_alternative == 0)
6876 return "ldo %1(%0),%0";
6877 else if (which_alternative == 1)
6879 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
6880 output_asm_insn ("ldw -16(%%r30),%4", operands);
6881 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6882 return "{fldws|fldw} -16(%%r30),%0";
6884 else
6886 output_asm_insn ("ldw %0,%4", operands);
6887 return "ldo %1(%4),%4\n\tstw %4,%0";
6891 if (which_alternative == 0)
6893 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6894 int xdelay;
6896 /* If this is a long branch with its delay slot unfilled, set `nullify'
6897 as it can nullify the delay slot and save a nop. */
6898 if (length == 8 && dbr_sequence_length () == 0)
6899 nullify = 1;
6901 /* If this is a short forward conditional branch which did not get
6902 its delay slot filled, the delay slot can still be nullified. */
6903 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6904 nullify = forward_branch_p (insn);
6906 switch (length)
6908 case 4:
6909 if (nullify)
6911 if (branch_needs_nop_p (insn))
6912 return "addib,%C2,n %1,%0,%3%#";
6913 else
6914 return "addib,%C2,n %1,%0,%3";
6916 else
6917 return "addib,%C2 %1,%0,%3";
6919 case 8:
6920 /* Handle weird backwards branch with a fulled delay slot
6921 which is nullified. */
6922 if (dbr_sequence_length () != 0
6923 && ! forward_branch_p (insn)
6924 && nullify)
6925 return "addib,%N2,n %1,%0,.+12\n\tb %3";
6926 /* Handle short backwards branch with an unfilled delay slot.
6927 Using a addb;nop rather than addi;bl saves 1 cycle for both
6928 taken and untaken branches. */
6929 else if (dbr_sequence_length () == 0
6930 && ! forward_branch_p (insn)
6931 && INSN_ADDRESSES_SET_P ()
6932 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6933 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6934 return "addib,%C2 %1,%0,%3%#";
6936 /* Handle normal cases. */
6937 if (nullify)
6938 return "addi,%N2 %1,%0,%0\n\tb,n %3";
6939 else
6940 return "addi,%N2 %1,%0,%0\n\tb %3";
6942 default:
6943 /* The reversed conditional branch must branch over one additional
6944 instruction if the delay slot is filled and needs to be extracted
6945 by output_lbranch. If the delay slot is empty or this is a
6946 nullified forward branch, the instruction after the reversed
6947 condition branch must be nullified. */
6948 if (dbr_sequence_length () == 0
6949 || (nullify && forward_branch_p (insn)))
6951 nullify = 1;
6952 xdelay = 0;
6953 operands[4] = GEN_INT (length);
6955 else
6957 xdelay = 1;
6958 operands[4] = GEN_INT (length + 4);
6961 if (nullify)
6962 output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
6963 else
6964 output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
6966 return output_lbranch (operands[3], insn, xdelay);
6970 /* Deal with gross reload from FP register case. */
6971 else if (which_alternative == 1)
6973 /* Move loop counter from FP register to MEM then into a GR,
6974 increment the GR, store the GR into MEM, and finally reload
6975 the FP register from MEM from within the branch's delay slot. */
6976 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
6977 operands);
6978 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6979 if (length == 24)
6980 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
6981 else if (length == 28)
6982 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
6983 else
6985 operands[5] = GEN_INT (length - 16);
6986 output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
6987 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
6988 return output_lbranch (operands[3], insn, 0);
6991 /* Deal with gross reload from memory case. */
6992 else
6994 /* Reload loop counter from memory, the store back to memory
6995 happens in the branch's delay slot. */
6996 output_asm_insn ("ldw %0,%4", operands);
6997 if (length == 12)
6998 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
6999 else if (length == 16)
7000 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
7001 else
7003 operands[5] = GEN_INT (length - 4);
7004 output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
7005 return output_lbranch (operands[3], insn, 0);
7010 /* Return the output template for emitting a movb type insn.
7012 Note it may perform some output operations on its own before
7013 returning the final output string. */
7014 const char *
7015 output_movb (rtx *operands, rtx insn, int which_alternative,
7016 int reverse_comparison)
7018 int length = get_attr_length (insn);
7020 /* A conditional branch to the following instruction (e.g. the delay slot) is
7021 asking for a disaster. Be prepared! */
7023 if (branch_to_delay_slot_p (insn))
7025 if (which_alternative == 0)
7026 return "copy %1,%0";
7027 else if (which_alternative == 1)
7029 output_asm_insn ("stw %1,-16(%%r30)", operands);
7030 return "{fldws|fldw} -16(%%r30),%0";
7032 else if (which_alternative == 2)
7033 return "stw %1,%0";
7034 else
7035 return "mtsar %r1";
7038 /* Support the second variant. */
7039 if (reverse_comparison)
7040 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
7042 if (which_alternative == 0)
7044 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7045 int xdelay;
7047 /* If this is a long branch with its delay slot unfilled, set `nullify'
7048 as it can nullify the delay slot and save a nop. */
7049 if (length == 8 && dbr_sequence_length () == 0)
7050 nullify = 1;
7052 /* If this is a short forward conditional branch which did not get
7053 its delay slot filled, the delay slot can still be nullified. */
7054 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7055 nullify = forward_branch_p (insn);
7057 switch (length)
7059 case 4:
7060 if (nullify)
7062 if (branch_needs_nop_p (insn))
7063 return "movb,%C2,n %1,%0,%3%#";
7064 else
7065 return "movb,%C2,n %1,%0,%3";
7067 else
7068 return "movb,%C2 %1,%0,%3";
7070 case 8:
7071 /* Handle weird backwards branch with a filled delay slot
7072 which is nullified. */
7073 if (dbr_sequence_length () != 0
7074 && ! forward_branch_p (insn)
7075 && nullify)
7076 return "movb,%N2,n %1,%0,.+12\n\tb %3";
7078 /* Handle short backwards branch with an unfilled delay slot.
7079 Using a movb;nop rather than or;bl saves 1 cycle for both
7080 taken and untaken branches. */
7081 else if (dbr_sequence_length () == 0
7082 && ! forward_branch_p (insn)
7083 && INSN_ADDRESSES_SET_P ()
7084 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7085 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7086 return "movb,%C2 %1,%0,%3%#";
7087 /* Handle normal cases. */
7088 if (nullify)
7089 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
7090 else
7091 return "or,%N2 %1,%%r0,%0\n\tb %3";
7093 default:
7094 /* The reversed conditional branch must branch over one additional
7095 instruction if the delay slot is filled and needs to be extracted
7096 by output_lbranch. If the delay slot is empty or this is a
7097 nullified forward branch, the instruction after the reversed
7098 condition branch must be nullified. */
7099 if (dbr_sequence_length () == 0
7100 || (nullify && forward_branch_p (insn)))
7102 nullify = 1;
7103 xdelay = 0;
7104 operands[4] = GEN_INT (length);
7106 else
7108 xdelay = 1;
7109 operands[4] = GEN_INT (length + 4);
7112 if (nullify)
7113 output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
7114 else
7115 output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
7117 return output_lbranch (operands[3], insn, xdelay);
7120 /* Deal with gross reload for FP destination register case. */
7121 else if (which_alternative == 1)
7123 /* Move source register to MEM, perform the branch test, then
7124 finally load the FP register from MEM from within the branch's
7125 delay slot. */
7126 output_asm_insn ("stw %1,-16(%%r30)", operands);
7127 if (length == 12)
7128 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7129 else if (length == 16)
7130 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7131 else
7133 operands[4] = GEN_INT (length - 4);
7134 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
7135 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7136 return output_lbranch (operands[3], insn, 0);
7139 /* Deal with gross reload from memory case. */
7140 else if (which_alternative == 2)
7142 /* Reload loop counter from memory, the store back to memory
7143 happens in the branch's delay slot. */
7144 if (length == 8)
7145 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7146 else if (length == 12)
7147 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7148 else
7150 operands[4] = GEN_INT (length);
7151 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7152 operands);
7153 return output_lbranch (operands[3], insn, 0);
7156 /* Handle SAR as a destination. */
7157 else
7159 if (length == 8)
7160 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7161 else if (length == 12)
7162 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7163 else
7165 operands[4] = GEN_INT (length);
7166 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7167 operands);
7168 return output_lbranch (operands[3], insn, 0);
7173 /* Copy any FP arguments in INSN into integer registers. */
7174 static void
7175 copy_fp_args (rtx insn)
7177 rtx link;
7178 rtx xoperands[2];
7180 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7182 int arg_mode, regno;
7183 rtx use = XEXP (link, 0);
7185 if (! (GET_CODE (use) == USE
7186 && GET_CODE (XEXP (use, 0)) == REG
7187 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7188 continue;
7190 arg_mode = GET_MODE (XEXP (use, 0));
7191 regno = REGNO (XEXP (use, 0));
7193 /* Is it a floating point register? */
7194 if (regno >= 32 && regno <= 39)
7196 /* Copy the FP register into an integer register via memory. */
7197 if (arg_mode == SFmode)
7199 xoperands[0] = XEXP (use, 0);
7200 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
7201 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
7202 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7204 else
7206 xoperands[0] = XEXP (use, 0);
7207 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
7208 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
7209 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
7210 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7216 /* Compute length of the FP argument copy sequence for INSN. */
7217 static int
7218 length_fp_args (rtx insn)
7220 int length = 0;
7221 rtx link;
7223 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7225 int arg_mode, regno;
7226 rtx use = XEXP (link, 0);
7228 if (! (GET_CODE (use) == USE
7229 && GET_CODE (XEXP (use, 0)) == REG
7230 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7231 continue;
7233 arg_mode = GET_MODE (XEXP (use, 0));
7234 regno = REGNO (XEXP (use, 0));
7236 /* Is it a floating point register? */
7237 if (regno >= 32 && regno <= 39)
7239 if (arg_mode == SFmode)
7240 length += 8;
7241 else
7242 length += 12;
7246 return length;
7249 /* Return the attribute length for the millicode call instruction INSN.
7250 The length must match the code generated by output_millicode_call.
7251 We include the delay slot in the returned length as it is better to
7252 over estimate the length than to under estimate it. */
7255 attr_length_millicode_call (rtx insn)
7257 unsigned long distance = -1;
7258 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7260 if (INSN_ADDRESSES_SET_P ())
7262 distance = (total + insn_current_reference_address (insn));
7263 if (distance < total)
7264 distance = -1;
7267 if (TARGET_64BIT)
7269 if (!TARGET_LONG_CALLS && distance < 7600000)
7270 return 8;
7272 return 20;
7274 else if (TARGET_PORTABLE_RUNTIME)
7275 return 24;
7276 else
7278 if (!TARGET_LONG_CALLS && distance < 240000)
7279 return 8;
7281 if (TARGET_LONG_ABS_CALL && !flag_pic)
7282 return 12;
7284 return 24;
7288 /* INSN is a function call. It may have an unconditional jump
7289 in its delay slot.
7291 CALL_DEST is the routine we are calling. */
7293 const char *
7294 output_millicode_call (rtx insn, rtx call_dest)
7296 int attr_length = get_attr_length (insn);
7297 int seq_length = dbr_sequence_length ();
7298 int distance;
7299 rtx seq_insn;
7300 rtx xoperands[3];
7302 xoperands[0] = call_dest;
7303 xoperands[2] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
7305 /* Handle the common case where we are sure that the branch will
7306 reach the beginning of the $CODE$ subspace. The within reach
7307 form of the $$sh_func_adrs call has a length of 28. Because
7308 it has an attribute type of multi, it never has a nonzero
7309 sequence length. The length of the $$sh_func_adrs is the same
7310 as certain out of reach PIC calls to other routines. */
7311 if (!TARGET_LONG_CALLS
7312 && ((seq_length == 0
7313 && (attr_length == 12
7314 || (attr_length == 28 && get_attr_type (insn) == TYPE_MULTI)))
7315 || (seq_length != 0 && attr_length == 8)))
7317 output_asm_insn ("{bl|b,l} %0,%2", xoperands);
7319 else
7321 if (TARGET_64BIT)
7323 /* It might seem that one insn could be saved by accessing
7324 the millicode function using the linkage table. However,
7325 this doesn't work in shared libraries and other dynamically
7326 loaded objects. Using a pc-relative sequence also avoids
7327 problems related to the implicit use of the gp register. */
7328 output_asm_insn ("b,l .+8,%%r1", xoperands);
7330 if (TARGET_GAS)
7332 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
7333 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
7335 else
7337 xoperands[1] = gen_label_rtx ();
7338 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7339 targetm.asm_out.internal_label (asm_out_file, "L",
7340 CODE_LABEL_NUMBER (xoperands[1]));
7341 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7344 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7346 else if (TARGET_PORTABLE_RUNTIME)
7348 /* Pure portable runtime doesn't allow be/ble; we also don't
7349 have PIC support in the assembler/linker, so this sequence
7350 is needed. */
7352 /* Get the address of our target into %r1. */
7353 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7354 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7356 /* Get our return address into %r31. */
7357 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7358 output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
7360 /* Jump to our target address in %r1. */
7361 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7363 else if (!flag_pic)
7365 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7366 if (TARGET_PA_20)
7367 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
7368 else
7369 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7371 else
7373 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7374 output_asm_insn ("addi 16,%%r1,%%r31", xoperands);
7376 if (TARGET_SOM || !TARGET_GAS)
7378 /* The HP assembler can generate relocations for the
7379 difference of two symbols. GAS can do this for a
7380 millicode symbol but not an arbitrary external
7381 symbol when generating SOM output. */
7382 xoperands[1] = gen_label_rtx ();
7383 targetm.asm_out.internal_label (asm_out_file, "L",
7384 CODE_LABEL_NUMBER (xoperands[1]));
7385 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7386 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7388 else
7390 output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands);
7391 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1",
7392 xoperands);
7395 /* Jump to our target address in %r1. */
7396 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7400 if (seq_length == 0)
7401 output_asm_insn ("nop", xoperands);
7403 /* We are done if there isn't a jump in the delay slot. */
7404 if (seq_length == 0 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7405 return "";
7407 /* This call has an unconditional jump in its delay slot. */
7408 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7410 /* See if the return address can be adjusted. Use the containing
7411 sequence insn's address. */
7412 if (INSN_ADDRESSES_SET_P ())
7414 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7415 distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7416 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7418 if (VAL_14_BITS_P (distance))
7420 xoperands[1] = gen_label_rtx ();
7421 output_asm_insn ("ldo %0-%1(%2),%2", xoperands);
7422 targetm.asm_out.internal_label (asm_out_file, "L",
7423 CODE_LABEL_NUMBER (xoperands[1]));
7425 else
7426 /* ??? This branch may not reach its target. */
7427 output_asm_insn ("nop\n\tb,n %0", xoperands);
7429 else
7430 /* ??? This branch may not reach its target. */
7431 output_asm_insn ("nop\n\tb,n %0", xoperands);
7433 /* Delete the jump. */
7434 SET_INSN_DELETED (NEXT_INSN (insn));
7436 return "";
7439 /* Return the attribute length of the call instruction INSN. The SIBCALL
7440 flag indicates whether INSN is a regular call or a sibling call. The
7441 length returned must be longer than the code actually generated by
7442 output_call. Since branch shortening is done before delay branch
7443 sequencing, there is no way to determine whether or not the delay
7444 slot will be filled during branch shortening. Even when the delay
7445 slot is filled, we may have to add a nop if the delay slot contains
7446 a branch that can't reach its target. Thus, we always have to include
7447 the delay slot in the length estimate. This used to be done in
7448 pa_adjust_insn_length but we do it here now as some sequences always
7449 fill the delay slot and we can save four bytes in the estimate for
7450 these sequences. */
7453 attr_length_call (rtx insn, int sibcall)
7455 int local_call;
7456 rtx call, call_dest;
7457 tree call_decl;
7458 int length = 0;
7459 rtx pat = PATTERN (insn);
7460 unsigned long distance = -1;
7462 gcc_assert (GET_CODE (insn) == CALL_INSN);
7464 if (INSN_ADDRESSES_SET_P ())
7466 unsigned long total;
7468 total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7469 distance = (total + insn_current_reference_address (insn));
7470 if (distance < total)
7471 distance = -1;
7474 gcc_assert (GET_CODE (pat) == PARALLEL);
7476 /* Get the call rtx. */
7477 call = XVECEXP (pat, 0, 0);
7478 if (GET_CODE (call) == SET)
7479 call = SET_SRC (call);
7481 gcc_assert (GET_CODE (call) == CALL);
7483 /* Determine if this is a local call. */
7484 call_dest = XEXP (XEXP (call, 0), 0);
7485 call_decl = SYMBOL_REF_DECL (call_dest);
7486 local_call = call_decl && targetm.binds_local_p (call_decl);
7488 /* pc-relative branch. */
7489 if (!TARGET_LONG_CALLS
7490 && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7491 || distance < 240000))
7492 length += 8;
7494 /* 64-bit plabel sequence. */
7495 else if (TARGET_64BIT && !local_call)
7496 length += sibcall ? 28 : 24;
7498 /* non-pic long absolute branch sequence. */
7499 else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7500 length += 12;
7502 /* long pc-relative branch sequence. */
7503 else if (TARGET_LONG_PIC_SDIFF_CALL
7504 || (TARGET_GAS && !TARGET_SOM
7505 && (TARGET_LONG_PIC_PCREL_CALL || local_call)))
7507 length += 20;
7509 if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7510 length += 8;
7513 /* 32-bit plabel sequence. */
7514 else
7516 length += 32;
7518 if (TARGET_SOM)
7519 length += length_fp_args (insn);
7521 if (flag_pic)
7522 length += 4;
7524 if (!TARGET_PA_20)
7526 if (!sibcall)
7527 length += 8;
7529 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7530 length += 8;
7534 return length;
7537 /* INSN is a function call. It may have an unconditional jump
7538 in its delay slot.
7540 CALL_DEST is the routine we are calling. */
7542 const char *
7543 output_call (rtx insn, rtx call_dest, int sibcall)
7545 int delay_insn_deleted = 0;
7546 int delay_slot_filled = 0;
7547 int seq_length = dbr_sequence_length ();
7548 tree call_decl = SYMBOL_REF_DECL (call_dest);
7549 int local_call = call_decl && targetm.binds_local_p (call_decl);
7550 rtx xoperands[2];
7552 xoperands[0] = call_dest;
7554 /* Handle the common case where we're sure that the branch will reach
7555 the beginning of the "$CODE$" subspace. This is the beginning of
7556 the current function if we are in a named section. */
7557 if (!TARGET_LONG_CALLS && attr_length_call (insn, sibcall) == 8)
7559 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
7560 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7562 else
7564 if (TARGET_64BIT && !local_call)
7566 /* ??? As far as I can tell, the HP linker doesn't support the
7567 long pc-relative sequence described in the 64-bit runtime
7568 architecture. So, we use a slightly longer indirect call. */
7569 xoperands[0] = get_deferred_plabel (call_dest);
7570 xoperands[1] = gen_label_rtx ();
7572 /* If this isn't a sibcall, we put the load of %r27 into the
7573 delay slot. We can't do this in a sibcall as we don't
7574 have a second call-clobbered scratch register available. */
7575 if (seq_length != 0
7576 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7577 && !sibcall)
7579 final_scan_insn (NEXT_INSN (insn), asm_out_file,
7580 optimize, 0, NULL);
7582 /* Now delete the delay insn. */
7583 SET_INSN_DELETED (NEXT_INSN (insn));
7584 delay_insn_deleted = 1;
7587 output_asm_insn ("addil LT'%0,%%r27", xoperands);
7588 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
7589 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
7591 if (sibcall)
7593 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7594 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands);
7595 output_asm_insn ("bve (%%r1)", xoperands);
7597 else
7599 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
7600 output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
7601 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7602 delay_slot_filled = 1;
7605 else
7607 int indirect_call = 0;
7609 /* Emit a long call. There are several different sequences
7610 of increasing length and complexity. In most cases,
7611 they don't allow an instruction in the delay slot. */
7612 if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7613 && !TARGET_LONG_PIC_SDIFF_CALL
7614 && !(TARGET_GAS && !TARGET_SOM
7615 && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7616 && !TARGET_64BIT)
7617 indirect_call = 1;
7619 if (seq_length != 0
7620 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7621 && !sibcall
7622 && (!TARGET_PA_20
7623 || indirect_call
7624 || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)))
7626 /* A non-jump insn in the delay slot. By definition we can
7627 emit this insn before the call (and in fact before argument
7628 relocating. */
7629 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
7630 NULL);
7632 /* Now delete the delay insn. */
7633 SET_INSN_DELETED (NEXT_INSN (insn));
7634 delay_insn_deleted = 1;
7637 if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7639 /* This is the best sequence for making long calls in
7640 non-pic code. Unfortunately, GNU ld doesn't provide
7641 the stub needed for external calls, and GAS's support
7642 for this with the SOM linker is buggy. It is safe
7643 to use this for local calls. */
7644 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7645 if (sibcall)
7646 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
7647 else
7649 if (TARGET_PA_20)
7650 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
7651 xoperands);
7652 else
7653 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7655 output_asm_insn ("copy %%r31,%%r2", xoperands);
7656 delay_slot_filled = 1;
7659 else
7661 if (TARGET_LONG_PIC_SDIFF_CALL)
7663 /* The HP assembler and linker can handle relocations
7664 for the difference of two symbols. The HP assembler
7665 recognizes the sequence as a pc-relative call and
7666 the linker provides stubs when needed. */
7667 xoperands[1] = gen_label_rtx ();
7668 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7669 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7670 targetm.asm_out.internal_label (asm_out_file, "L",
7671 CODE_LABEL_NUMBER (xoperands[1]));
7672 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7674 else if (TARGET_GAS && !TARGET_SOM
7675 && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7677 /* GAS currently can't generate the relocations that
7678 are needed for the SOM linker under HP-UX using this
7679 sequence. The GNU linker doesn't generate the stubs
7680 that are needed for external calls on TARGET_ELF32
7681 with this sequence. For now, we have to use a
7682 longer plabel sequence when using GAS. */
7683 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7684 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1",
7685 xoperands);
7686 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1",
7687 xoperands);
7689 else
7691 /* Emit a long plabel-based call sequence. This is
7692 essentially an inline implementation of $$dyncall.
7693 We don't actually try to call $$dyncall as this is
7694 as difficult as calling the function itself. */
7695 xoperands[0] = get_deferred_plabel (call_dest);
7696 xoperands[1] = gen_label_rtx ();
7698 /* Since the call is indirect, FP arguments in registers
7699 need to be copied to the general registers. Then, the
7700 argument relocation stub will copy them back. */
7701 if (TARGET_SOM)
7702 copy_fp_args (insn);
7704 if (flag_pic)
7706 output_asm_insn ("addil LT'%0,%%r19", xoperands);
7707 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
7708 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands);
7710 else
7712 output_asm_insn ("addil LR'%0-$global$,%%r27",
7713 xoperands);
7714 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
7715 xoperands);
7718 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands);
7719 output_asm_insn ("depi 0,31,2,%%r1", xoperands);
7720 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands);
7721 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands);
7723 if (!sibcall && !TARGET_PA_20)
7725 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
7726 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
7727 output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
7728 else
7729 output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
7733 if (TARGET_PA_20)
7735 if (sibcall)
7736 output_asm_insn ("bve (%%r1)", xoperands);
7737 else
7739 if (indirect_call)
7741 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7742 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
7743 delay_slot_filled = 1;
7745 else
7746 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7749 else
7751 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7752 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
7753 xoperands);
7755 if (sibcall)
7757 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
7758 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
7759 else
7760 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
7762 else
7764 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
7765 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
7766 else
7767 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
7769 if (indirect_call)
7770 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
7771 else
7772 output_asm_insn ("copy %%r31,%%r2", xoperands);
7773 delay_slot_filled = 1;
7780 if (!delay_slot_filled && (seq_length == 0 || delay_insn_deleted))
7781 output_asm_insn ("nop", xoperands);
7783 /* We are done if there isn't a jump in the delay slot. */
7784 if (seq_length == 0
7785 || delay_insn_deleted
7786 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7787 return "";
7789 /* A sibcall should never have a branch in the delay slot. */
7790 gcc_assert (!sibcall);
7792 /* This call has an unconditional jump in its delay slot. */
7793 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7795 if (!delay_slot_filled && INSN_ADDRESSES_SET_P ())
7797 /* See if the return address can be adjusted. Use the containing
7798 sequence insn's address. This would break the regular call/return@
7799 relationship assumed by the table based eh unwinder, so only do that
7800 if the call is not possibly throwing. */
7801 rtx seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7802 int distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7803 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7805 if (VAL_14_BITS_P (distance)
7806 && !(can_throw_internal (insn) || can_throw_external (insn)))
7808 xoperands[1] = gen_label_rtx ();
7809 output_asm_insn ("ldo %0-%1(%%r2),%%r2", xoperands);
7810 targetm.asm_out.internal_label (asm_out_file, "L",
7811 CODE_LABEL_NUMBER (xoperands[1]));
7813 else
7814 output_asm_insn ("nop\n\tb,n %0", xoperands);
7816 else
7817 output_asm_insn ("b,n %0", xoperands);
7819 /* Delete the jump. */
7820 SET_INSN_DELETED (NEXT_INSN (insn));
7822 return "";
7825 /* Return the attribute length of the indirect call instruction INSN.
7826 The length must match the code generated by output_indirect call.
7827 The returned length includes the delay slot. Currently, the delay
7828 slot of an indirect call sequence is not exposed and it is used by
7829 the sequence itself. */
7832 attr_length_indirect_call (rtx insn)
7834 unsigned long distance = -1;
7835 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7837 if (INSN_ADDRESSES_SET_P ())
7839 distance = (total + insn_current_reference_address (insn));
7840 if (distance < total)
7841 distance = -1;
7844 if (TARGET_64BIT)
7845 return 12;
7847 if (TARGET_FAST_INDIRECT_CALLS
7848 || (!TARGET_PORTABLE_RUNTIME
7849 && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
7850 || distance < 240000)))
7851 return 8;
7853 if (flag_pic)
7854 return 24;
7856 if (TARGET_PORTABLE_RUNTIME)
7857 return 20;
7859 /* Out of reach, can use ble. */
7860 return 12;
7863 const char *
7864 output_indirect_call (rtx insn, rtx call_dest)
7866 rtx xoperands[1];
7868 if (TARGET_64BIT)
7870 xoperands[0] = call_dest;
7871 output_asm_insn ("ldd 16(%0),%%r2", xoperands);
7872 output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands);
7873 return "";
7876 /* First the special case for kernels, level 0 systems, etc. */
7877 if (TARGET_FAST_INDIRECT_CALLS)
7878 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
7880 /* Now the normal case -- we can reach $$dyncall directly or
7881 we're sure that we can get there via a long-branch stub.
7883 No need to check target flags as the length uniquely identifies
7884 the remaining cases. */
7885 if (attr_length_indirect_call (insn) == 8)
7887 /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
7888 $$dyncall. Since BLE uses %r31 as the link register, the 22-bit
7889 variant of the B,L instruction can't be used on the SOM target. */
7890 if (TARGET_PA_20 && !TARGET_SOM)
7891 return ".CALL\tARGW0=GR\n\tb,l $$dyncall,%%r2\n\tcopy %%r2,%%r31";
7892 else
7893 return ".CALL\tARGW0=GR\n\tbl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
7896 /* Long millicode call, but we are not generating PIC or portable runtime
7897 code. */
7898 if (attr_length_indirect_call (insn) == 12)
7899 return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
7901 /* Long millicode call for portable runtime. */
7902 if (attr_length_indirect_call (insn) == 20)
7903 return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)\n\tnop";
7905 /* We need a long PIC call to $$dyncall. */
7906 xoperands[0] = NULL_RTX;
7907 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7908 if (TARGET_SOM || !TARGET_GAS)
7910 xoperands[0] = gen_label_rtx ();
7911 output_asm_insn ("addil L'$$dyncall-%0,%%r1", xoperands);
7912 targetm.asm_out.internal_label (asm_out_file, "L",
7913 CODE_LABEL_NUMBER (xoperands[0]));
7914 output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands);
7916 else
7918 output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r1", xoperands);
7919 output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
7920 xoperands);
7922 output_asm_insn ("blr %%r0,%%r2", xoperands);
7923 output_asm_insn ("bv,n %%r0(%%r1)\n\tnop", xoperands);
7924 return "";
7927 /* Return the total length of the save and restore instructions needed for
7928 the data linkage table pointer (i.e., the PIC register) across the call
7929 instruction INSN. No-return calls do not require a save and restore.
7930 In addition, we may be able to avoid the save and restore for calls
7931 within the same translation unit. */
7934 attr_length_save_restore_dltp (rtx insn)
7936 if (find_reg_note (insn, REG_NORETURN, NULL_RTX))
7937 return 0;
7939 return 8;
7942 /* In HPUX 8.0's shared library scheme, special relocations are needed
7943 for function labels if they might be passed to a function
7944 in a shared library (because shared libraries don't live in code
7945 space), and special magic is needed to construct their address. */
7947 void
7948 hppa_encode_label (rtx sym)
7950 const char *str = XSTR (sym, 0);
7951 int len = strlen (str) + 1;
7952 char *newstr, *p;
7954 p = newstr = XALLOCAVEC (char, len + 1);
7955 *p++ = '@';
7956 strcpy (p, str);
7958 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
7961 static void
7962 pa_encode_section_info (tree decl, rtx rtl, int first)
7964 int old_referenced = 0;
7966 if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF)
7967 old_referenced
7968 = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED;
7970 default_encode_section_info (decl, rtl, first);
7972 if (first && TEXT_SPACE_P (decl))
7974 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
7975 if (TREE_CODE (decl) == FUNCTION_DECL)
7976 hppa_encode_label (XEXP (rtl, 0));
7978 else if (old_referenced)
7979 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced;
7982 /* This is sort of inverse to pa_encode_section_info. */
7984 static const char *
7985 pa_strip_name_encoding (const char *str)
7987 str += (*str == '@');
7988 str += (*str == '*');
7989 return str;
7993 function_label_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7995 return GET_CODE (op) == SYMBOL_REF && FUNCTION_NAME_P (XSTR (op, 0));
7998 /* Returns 1 if OP is a function label involved in a simple addition
7999 with a constant. Used to keep certain patterns from matching
8000 during instruction combination. */
8002 is_function_label_plus_const (rtx op)
8004 /* Strip off any CONST. */
8005 if (GET_CODE (op) == CONST)
8006 op = XEXP (op, 0);
8008 return (GET_CODE (op) == PLUS
8009 && function_label_operand (XEXP (op, 0), Pmode)
8010 && GET_CODE (XEXP (op, 1)) == CONST_INT);
8013 /* Output assembly code for a thunk to FUNCTION. */
8015 static void
8016 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
8017 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
8018 tree function)
8020 static unsigned int current_thunk_number;
8021 int val_14 = VAL_14_BITS_P (delta);
8022 unsigned int old_last_address = last_address, nbytes = 0;
8023 char label[16];
8024 rtx xoperands[4];
8026 xoperands[0] = XEXP (DECL_RTL (function), 0);
8027 xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
8028 xoperands[2] = GEN_INT (delta);
8030 ASM_OUTPUT_LABEL (file, XSTR (xoperands[1], 0));
8031 fprintf (file, "\t.PROC\n\t.CALLINFO FRAME=0,NO_CALLS\n\t.ENTRY\n");
8033 /* Output the thunk. We know that the function is in the same
8034 translation unit (i.e., the same space) as the thunk, and that
8035 thunks are output after their method. Thus, we don't need an
8036 external branch to reach the function. With SOM and GAS,
8037 functions and thunks are effectively in different sections.
8038 Thus, we can always use a IA-relative branch and the linker
8039 will add a long branch stub if necessary.
8041 However, we have to be careful when generating PIC code on the
8042 SOM port to ensure that the sequence does not transfer to an
8043 import stub for the target function as this could clobber the
8044 return value saved at SP-24. This would also apply to the
8045 32-bit linux port if the multi-space model is implemented. */
8046 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8047 && !(flag_pic && TREE_PUBLIC (function))
8048 && (TARGET_GAS || last_address < 262132))
8049 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8050 && ((targetm.have_named_sections
8051 && DECL_SECTION_NAME (thunk_fndecl) != NULL
8052 /* The GNU 64-bit linker has rather poor stub management.
8053 So, we use a long branch from thunks that aren't in
8054 the same section as the target function. */
8055 && ((!TARGET_64BIT
8056 && (DECL_SECTION_NAME (thunk_fndecl)
8057 != DECL_SECTION_NAME (function)))
8058 || ((DECL_SECTION_NAME (thunk_fndecl)
8059 == DECL_SECTION_NAME (function))
8060 && last_address < 262132)))
8061 || (targetm.have_named_sections
8062 && DECL_SECTION_NAME (thunk_fndecl) == NULL
8063 && DECL_SECTION_NAME (function) == NULL
8064 && last_address < 262132)
8065 || (!targetm.have_named_sections && last_address < 262132))))
8067 if (!val_14)
8068 output_asm_insn ("addil L'%2,%%r26", xoperands);
8070 output_asm_insn ("b %0", xoperands);
8072 if (val_14)
8074 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8075 nbytes += 8;
8077 else
8079 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8080 nbytes += 12;
8083 else if (TARGET_64BIT)
8085 /* We only have one call-clobbered scratch register, so we can't
8086 make use of the delay slot if delta doesn't fit in 14 bits. */
8087 if (!val_14)
8089 output_asm_insn ("addil L'%2,%%r26", xoperands);
8090 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8093 output_asm_insn ("b,l .+8,%%r1", xoperands);
8095 if (TARGET_GAS)
8097 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
8098 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
8100 else
8102 xoperands[3] = GEN_INT (val_14 ? 8 : 16);
8103 output_asm_insn ("addil L'%0-%1-%3,%%r1", xoperands);
8106 if (val_14)
8108 output_asm_insn ("bv %%r0(%%r1)", xoperands);
8109 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8110 nbytes += 20;
8112 else
8114 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8115 nbytes += 24;
8118 else if (TARGET_PORTABLE_RUNTIME)
8120 output_asm_insn ("ldil L'%0,%%r1", xoperands);
8121 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
8123 if (!val_14)
8124 output_asm_insn ("addil L'%2,%%r26", xoperands);
8126 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8128 if (val_14)
8130 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8131 nbytes += 16;
8133 else
8135 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8136 nbytes += 20;
8139 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8141 /* The function is accessible from outside this module. The only
8142 way to avoid an import stub between the thunk and function is to
8143 call the function directly with an indirect sequence similar to
8144 that used by $$dyncall. This is possible because $$dyncall acts
8145 as the import stub in an indirect call. */
8146 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8147 xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8148 output_asm_insn ("addil LT'%3,%%r19", xoperands);
8149 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8150 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8151 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8152 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8153 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8154 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8156 if (!val_14)
8158 output_asm_insn ("addil L'%2,%%r26", xoperands);
8159 nbytes += 4;
8162 if (TARGET_PA_20)
8164 output_asm_insn ("bve (%%r22)", xoperands);
8165 nbytes += 36;
8167 else if (TARGET_NO_SPACE_REGS)
8169 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
8170 nbytes += 36;
8172 else
8174 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8175 output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8176 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
8177 nbytes += 44;
8180 if (val_14)
8181 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8182 else
8183 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8185 else if (flag_pic)
8187 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
8189 if (TARGET_SOM || !TARGET_GAS)
8191 output_asm_insn ("addil L'%0-%1-8,%%r1", xoperands);
8192 output_asm_insn ("ldo R'%0-%1-8(%%r1),%%r22", xoperands);
8194 else
8196 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
8197 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r22", xoperands);
8200 if (!val_14)
8201 output_asm_insn ("addil L'%2,%%r26", xoperands);
8203 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8205 if (val_14)
8207 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8208 nbytes += 20;
8210 else
8212 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8213 nbytes += 24;
8216 else
8218 if (!val_14)
8219 output_asm_insn ("addil L'%2,%%r26", xoperands);
8221 output_asm_insn ("ldil L'%0,%%r22", xoperands);
8222 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8224 if (val_14)
8226 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8227 nbytes += 12;
8229 else
8231 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8232 nbytes += 16;
8236 fprintf (file, "\t.EXIT\n\t.PROCEND\n");
8238 if (TARGET_SOM && TARGET_GAS)
8240 /* We done with this subspace except possibly for some additional
8241 debug information. Forget that we are in this subspace to ensure
8242 that the next function is output in its own subspace. */
8243 in_section = NULL;
8244 cfun->machine->in_nsubspa = 2;
8247 if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8249 switch_to_section (data_section);
8250 output_asm_insn (".align 4", xoperands);
8251 ASM_OUTPUT_LABEL (file, label);
8252 output_asm_insn (".word P'%0", xoperands);
8255 current_thunk_number++;
8256 nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
8257 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
8258 last_address += nbytes;
8259 if (old_last_address > last_address)
8260 last_address = UINT_MAX;
8261 update_total_code_bytes (nbytes);
8264 /* Only direct calls to static functions are allowed to be sibling (tail)
8265 call optimized.
8267 This restriction is necessary because some linker generated stubs will
8268 store return pointers into rp' in some cases which might clobber a
8269 live value already in rp'.
8271 In a sibcall the current function and the target function share stack
8272 space. Thus if the path to the current function and the path to the
8273 target function save a value in rp', they save the value into the
8274 same stack slot, which has undesirable consequences.
8276 Because of the deferred binding nature of shared libraries any function
8277 with external scope could be in a different load module and thus require
8278 rp' to be saved when calling that function. So sibcall optimizations
8279 can only be safe for static function.
8281 Note that GCC never needs return value relocations, so we don't have to
8282 worry about static calls with return value relocations (which require
8283 saving rp').
8285 It is safe to perform a sibcall optimization when the target function
8286 will never return. */
8287 static bool
8288 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8290 if (TARGET_PORTABLE_RUNTIME)
8291 return false;
8293 /* Sibcalls are ok for TARGET_ELF32 as along as the linker is used in
8294 single subspace mode and the call is not indirect. As far as I know,
8295 there is no operating system support for the multiple subspace mode.
8296 It might be possible to support indirect calls if we didn't use
8297 $$dyncall (see the indirect sequence generated in output_call). */
8298 if (TARGET_ELF32)
8299 return (decl != NULL_TREE);
8301 /* Sibcalls are not ok because the arg pointer register is not a fixed
8302 register. This prevents the sibcall optimization from occurring. In
8303 addition, there are problems with stub placement using GNU ld. This
8304 is because a normal sibcall branch uses a 17-bit relocation while
8305 a regular call branch uses a 22-bit relocation. As a result, more
8306 care needs to be taken in the placement of long-branch stubs. */
8307 if (TARGET_64BIT)
8308 return false;
8310 /* Sibcalls are only ok within a translation unit. */
8311 return (decl && !TREE_PUBLIC (decl));
8314 /* ??? Addition is not commutative on the PA due to the weird implicit
8315 space register selection rules for memory addresses. Therefore, we
8316 don't consider a + b == b + a, as this might be inside a MEM. */
8317 static bool
8318 pa_commutative_p (const_rtx x, int outer_code)
8320 return (COMMUTATIVE_P (x)
8321 && (TARGET_NO_SPACE_REGS
8322 || (outer_code != UNKNOWN && outer_code != MEM)
8323 || GET_CODE (x) != PLUS));
8326 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8327 use in fmpyadd instructions. */
8329 fmpyaddoperands (rtx *operands)
8331 enum machine_mode mode = GET_MODE (operands[0]);
8333 /* Must be a floating point mode. */
8334 if (mode != SFmode && mode != DFmode)
8335 return 0;
8337 /* All modes must be the same. */
8338 if (! (mode == GET_MODE (operands[1])
8339 && mode == GET_MODE (operands[2])
8340 && mode == GET_MODE (operands[3])
8341 && mode == GET_MODE (operands[4])
8342 && mode == GET_MODE (operands[5])))
8343 return 0;
8345 /* All operands must be registers. */
8346 if (! (GET_CODE (operands[1]) == REG
8347 && GET_CODE (operands[2]) == REG
8348 && GET_CODE (operands[3]) == REG
8349 && GET_CODE (operands[4]) == REG
8350 && GET_CODE (operands[5]) == REG))
8351 return 0;
8353 /* Only 2 real operands to the addition. One of the input operands must
8354 be the same as the output operand. */
8355 if (! rtx_equal_p (operands[3], operands[4])
8356 && ! rtx_equal_p (operands[3], operands[5]))
8357 return 0;
8359 /* Inout operand of add cannot conflict with any operands from multiply. */
8360 if (rtx_equal_p (operands[3], operands[0])
8361 || rtx_equal_p (operands[3], operands[1])
8362 || rtx_equal_p (operands[3], operands[2]))
8363 return 0;
8365 /* multiply cannot feed into addition operands. */
8366 if (rtx_equal_p (operands[4], operands[0])
8367 || rtx_equal_p (operands[5], operands[0]))
8368 return 0;
8370 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8371 if (mode == SFmode
8372 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8373 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8374 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8375 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8376 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8377 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8378 return 0;
8380 /* Passed. Operands are suitable for fmpyadd. */
8381 return 1;
8384 #if !defined(USE_COLLECT2)
8385 static void
8386 pa_asm_out_constructor (rtx symbol, int priority)
8388 if (!function_label_operand (symbol, VOIDmode))
8389 hppa_encode_label (symbol);
8391 #ifdef CTORS_SECTION_ASM_OP
8392 default_ctor_section_asm_out_constructor (symbol, priority);
8393 #else
8394 # ifdef TARGET_ASM_NAMED_SECTION
8395 default_named_section_asm_out_constructor (symbol, priority);
8396 # else
8397 default_stabs_asm_out_constructor (symbol, priority);
8398 # endif
8399 #endif
8402 static void
8403 pa_asm_out_destructor (rtx symbol, int priority)
8405 if (!function_label_operand (symbol, VOIDmode))
8406 hppa_encode_label (symbol);
8408 #ifdef DTORS_SECTION_ASM_OP
8409 default_dtor_section_asm_out_destructor (symbol, priority);
8410 #else
8411 # ifdef TARGET_ASM_NAMED_SECTION
8412 default_named_section_asm_out_destructor (symbol, priority);
8413 # else
8414 default_stabs_asm_out_destructor (symbol, priority);
8415 # endif
8416 #endif
8418 #endif
8420 /* This function places uninitialized global data in the bss section.
8421 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
8422 function on the SOM port to prevent uninitialized global data from
8423 being placed in the data section. */
8425 void
8426 pa_asm_output_aligned_bss (FILE *stream,
8427 const char *name,
8428 unsigned HOST_WIDE_INT size,
8429 unsigned int align)
8431 switch_to_section (bss_section);
8432 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8434 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
8435 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
8436 #endif
8438 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
8439 ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
8440 #endif
8442 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8443 ASM_OUTPUT_LABEL (stream, name);
8444 fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8447 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
8448 that doesn't allow the alignment of global common storage to be directly
8449 specified. The SOM linker aligns common storage based on the rounded
8450 value of the NUM_BYTES parameter in the .comm directive. It's not
8451 possible to use the .align directive as it doesn't affect the alignment
8452 of the label associated with a .comm directive. */
8454 void
8455 pa_asm_output_aligned_common (FILE *stream,
8456 const char *name,
8457 unsigned HOST_WIDE_INT size,
8458 unsigned int align)
8460 unsigned int max_common_align;
8462 max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
8463 if (align > max_common_align)
8465 warning (0, "alignment (%u) for %s exceeds maximum alignment "
8466 "for global common data. Using %u",
8467 align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT);
8468 align = max_common_align;
8471 switch_to_section (bss_section);
8473 assemble_name (stream, name);
8474 fprintf (stream, "\t.comm "HOST_WIDE_INT_PRINT_UNSIGNED"\n",
8475 MAX (size, align / BITS_PER_UNIT));
8478 /* We can't use .comm for local common storage as the SOM linker effectively
8479 treats the symbol as universal and uses the same storage for local symbols
8480 with the same name in different object files. The .block directive
8481 reserves an uninitialized block of storage. However, it's not common
8482 storage. Fortunately, GCC never requests common storage with the same
8483 name in any given translation unit. */
8485 void
8486 pa_asm_output_aligned_local (FILE *stream,
8487 const char *name,
8488 unsigned HOST_WIDE_INT size,
8489 unsigned int align)
8491 switch_to_section (bss_section);
8492 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8494 #ifdef LOCAL_ASM_OP
8495 fprintf (stream, "%s", LOCAL_ASM_OP);
8496 assemble_name (stream, name);
8497 fprintf (stream, "\n");
8498 #endif
8500 ASM_OUTPUT_LABEL (stream, name);
8501 fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8504 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8505 use in fmpysub instructions. */
8507 fmpysuboperands (rtx *operands)
8509 enum machine_mode mode = GET_MODE (operands[0]);
8511 /* Must be a floating point mode. */
8512 if (mode != SFmode && mode != DFmode)
8513 return 0;
8515 /* All modes must be the same. */
8516 if (! (mode == GET_MODE (operands[1])
8517 && mode == GET_MODE (operands[2])
8518 && mode == GET_MODE (operands[3])
8519 && mode == GET_MODE (operands[4])
8520 && mode == GET_MODE (operands[5])))
8521 return 0;
8523 /* All operands must be registers. */
8524 if (! (GET_CODE (operands[1]) == REG
8525 && GET_CODE (operands[2]) == REG
8526 && GET_CODE (operands[3]) == REG
8527 && GET_CODE (operands[4]) == REG
8528 && GET_CODE (operands[5]) == REG))
8529 return 0;
8531 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
8532 operation, so operands[4] must be the same as operand[3]. */
8533 if (! rtx_equal_p (operands[3], operands[4]))
8534 return 0;
8536 /* multiply cannot feed into subtraction. */
8537 if (rtx_equal_p (operands[5], operands[0]))
8538 return 0;
8540 /* Inout operand of sub cannot conflict with any operands from multiply. */
8541 if (rtx_equal_p (operands[3], operands[0])
8542 || rtx_equal_p (operands[3], operands[1])
8543 || rtx_equal_p (operands[3], operands[2]))
8544 return 0;
8546 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8547 if (mode == SFmode
8548 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8549 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8550 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8551 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8552 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8553 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8554 return 0;
8556 /* Passed. Operands are suitable for fmpysub. */
8557 return 1;
8560 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
8561 constants for shadd instructions. */
8563 shadd_constant_p (int val)
8565 if (val == 2 || val == 4 || val == 8)
8566 return 1;
8567 else
8568 return 0;
8571 /* Return 1 if OP is valid as a base or index register in a
8572 REG+REG address. */
8575 borx_reg_operand (rtx op, enum machine_mode mode)
8577 if (GET_CODE (op) != REG)
8578 return 0;
8580 /* We must reject virtual registers as the only expressions that
8581 can be instantiated are REG and REG+CONST. */
8582 if (op == virtual_incoming_args_rtx
8583 || op == virtual_stack_vars_rtx
8584 || op == virtual_stack_dynamic_rtx
8585 || op == virtual_outgoing_args_rtx
8586 || op == virtual_cfa_rtx)
8587 return 0;
8589 /* While it's always safe to index off the frame pointer, it's not
8590 profitable to do so when the frame pointer is being eliminated. */
8591 if (!reload_completed
8592 && flag_omit_frame_pointer
8593 && !cfun->calls_alloca
8594 && op == frame_pointer_rtx)
8595 return 0;
8597 return register_operand (op, mode);
8600 /* Return 1 if this operand is anything other than a hard register. */
8603 non_hard_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8605 return ! (GET_CODE (op) == REG && REGNO (op) < FIRST_PSEUDO_REGISTER);
8608 /* Return TRUE if INSN branches forward. */
8610 static bool
8611 forward_branch_p (rtx insn)
8613 rtx lab = JUMP_LABEL (insn);
8615 /* The INSN must have a jump label. */
8616 gcc_assert (lab != NULL_RTX);
8618 if (INSN_ADDRESSES_SET_P ())
8619 return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn));
8621 while (insn)
8623 if (insn == lab)
8624 return true;
8625 else
8626 insn = NEXT_INSN (insn);
8629 return false;
8632 /* Return 1 if OP is an equality comparison, else return 0. */
8634 eq_neq_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8636 return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
8639 /* Return 1 if INSN is in the delay slot of a call instruction. */
8641 jump_in_call_delay (rtx insn)
8644 if (GET_CODE (insn) != JUMP_INSN)
8645 return 0;
8647 if (PREV_INSN (insn)
8648 && PREV_INSN (PREV_INSN (insn))
8649 && GET_CODE (next_real_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
8651 rtx test_insn = next_real_insn (PREV_INSN (PREV_INSN (insn)));
8653 return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
8654 && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
8657 else
8658 return 0;
8661 /* Output an unconditional move and branch insn. */
8663 const char *
8664 output_parallel_movb (rtx *operands, rtx insn)
8666 int length = get_attr_length (insn);
8668 /* These are the cases in which we win. */
8669 if (length == 4)
8670 return "mov%I1b,tr %1,%0,%2";
8672 /* None of the following cases win, but they don't lose either. */
8673 if (length == 8)
8675 if (dbr_sequence_length () == 0)
8677 /* Nothing in the delay slot, fake it by putting the combined
8678 insn (the copy or add) in the delay slot of a bl. */
8679 if (GET_CODE (operands[1]) == CONST_INT)
8680 return "b %2\n\tldi %1,%0";
8681 else
8682 return "b %2\n\tcopy %1,%0";
8684 else
8686 /* Something in the delay slot, but we've got a long branch. */
8687 if (GET_CODE (operands[1]) == CONST_INT)
8688 return "ldi %1,%0\n\tb %2";
8689 else
8690 return "copy %1,%0\n\tb %2";
8694 if (GET_CODE (operands[1]) == CONST_INT)
8695 output_asm_insn ("ldi %1,%0", operands);
8696 else
8697 output_asm_insn ("copy %1,%0", operands);
8698 return output_lbranch (operands[2], insn, 1);
8701 /* Output an unconditional add and branch insn. */
8703 const char *
8704 output_parallel_addb (rtx *operands, rtx insn)
8706 int length = get_attr_length (insn);
8708 /* To make life easy we want operand0 to be the shared input/output
8709 operand and operand1 to be the readonly operand. */
8710 if (operands[0] == operands[1])
8711 operands[1] = operands[2];
8713 /* These are the cases in which we win. */
8714 if (length == 4)
8715 return "add%I1b,tr %1,%0,%3";
8717 /* None of the following cases win, but they don't lose either. */
8718 if (length == 8)
8720 if (dbr_sequence_length () == 0)
8721 /* Nothing in the delay slot, fake it by putting the combined
8722 insn (the copy or add) in the delay slot of a bl. */
8723 return "b %3\n\tadd%I1 %1,%0,%0";
8724 else
8725 /* Something in the delay slot, but we've got a long branch. */
8726 return "add%I1 %1,%0,%0\n\tb %3";
8729 output_asm_insn ("add%I1 %1,%0,%0", operands);
8730 return output_lbranch (operands[3], insn, 1);
8733 /* Return nonzero if INSN (a jump insn) immediately follows a call
8734 to a named function. This is used to avoid filling the delay slot
8735 of the jump since it can usually be eliminated by modifying RP in
8736 the delay slot of the call. */
8739 following_call (rtx insn)
8741 if (! TARGET_JUMP_IN_DELAY)
8742 return 0;
8744 /* Find the previous real insn, skipping NOTEs. */
8745 insn = PREV_INSN (insn);
8746 while (insn && GET_CODE (insn) == NOTE)
8747 insn = PREV_INSN (insn);
8749 /* Check for CALL_INSNs and millicode calls. */
8750 if (insn
8751 && ((GET_CODE (insn) == CALL_INSN
8752 && get_attr_type (insn) != TYPE_DYNCALL)
8753 || (GET_CODE (insn) == INSN
8754 && GET_CODE (PATTERN (insn)) != SEQUENCE
8755 && GET_CODE (PATTERN (insn)) != USE
8756 && GET_CODE (PATTERN (insn)) != CLOBBER
8757 && get_attr_type (insn) == TYPE_MILLI)))
8758 return 1;
8760 return 0;
8763 /* We use this hook to perform a PA specific optimization which is difficult
8764 to do in earlier passes.
8766 We want the delay slots of branches within jump tables to be filled.
8767 None of the compiler passes at the moment even has the notion that a
8768 PA jump table doesn't contain addresses, but instead contains actual
8769 instructions!
8771 Because we actually jump into the table, the addresses of each entry
8772 must stay constant in relation to the beginning of the table (which
8773 itself must stay constant relative to the instruction to jump into
8774 it). I don't believe we can guarantee earlier passes of the compiler
8775 will adhere to those rules.
8777 So, late in the compilation process we find all the jump tables, and
8778 expand them into real code -- e.g. each entry in the jump table vector
8779 will get an appropriate label followed by a jump to the final target.
8781 Reorg and the final jump pass can then optimize these branches and
8782 fill their delay slots. We end up with smaller, more efficient code.
8784 The jump instructions within the table are special; we must be able
8785 to identify them during assembly output (if the jumps don't get filled
8786 we need to emit a nop rather than nullifying the delay slot)). We
8787 identify jumps in switch tables by using insns with the attribute
8788 type TYPE_BTABLE_BRANCH.
8790 We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB
8791 insns. This serves two purposes, first it prevents jump.c from
8792 noticing that the last N entries in the table jump to the instruction
8793 immediately after the table and deleting the jumps. Second, those
8794 insns mark where we should emit .begin_brtab and .end_brtab directives
8795 when using GAS (allows for better link time optimizations). */
8797 static void
8798 pa_reorg (void)
8800 rtx insn;
8802 remove_useless_addtr_insns (1);
8804 if (pa_cpu < PROCESSOR_8000)
8805 pa_combine_instructions ();
8808 /* This is fairly cheap, so always run it if optimizing. */
8809 if (optimize > 0 && !TARGET_BIG_SWITCH)
8811 /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns. */
8812 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8814 rtx pattern, tmp, location, label;
8815 unsigned int length, i;
8817 /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode. */
8818 if (GET_CODE (insn) != JUMP_INSN
8819 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8820 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8821 continue;
8823 /* Emit marker for the beginning of the branch table. */
8824 emit_insn_before (gen_begin_brtab (), insn);
8826 pattern = PATTERN (insn);
8827 location = PREV_INSN (insn);
8828 length = XVECLEN (pattern, GET_CODE (pattern) == ADDR_DIFF_VEC);
8830 for (i = 0; i < length; i++)
8832 /* Emit a label before each jump to keep jump.c from
8833 removing this code. */
8834 tmp = gen_label_rtx ();
8835 LABEL_NUSES (tmp) = 1;
8836 emit_label_after (tmp, location);
8837 location = NEXT_INSN (location);
8839 if (GET_CODE (pattern) == ADDR_VEC)
8840 label = XEXP (XVECEXP (pattern, 0, i), 0);
8841 else
8842 label = XEXP (XVECEXP (pattern, 1, i), 0);
8844 tmp = gen_short_jump (label);
8846 /* Emit the jump itself. */
8847 tmp = emit_jump_insn_after (tmp, location);
8848 JUMP_LABEL (tmp) = label;
8849 LABEL_NUSES (label)++;
8850 location = NEXT_INSN (location);
8852 /* Emit a BARRIER after the jump. */
8853 emit_barrier_after (location);
8854 location = NEXT_INSN (location);
8857 /* Emit marker for the end of the branch table. */
8858 emit_insn_before (gen_end_brtab (), location);
8859 location = NEXT_INSN (location);
8860 emit_barrier_after (location);
8862 /* Delete the ADDR_VEC or ADDR_DIFF_VEC. */
8863 delete_insn (insn);
8866 else
8868 /* Still need brtab marker insns. FIXME: the presence of these
8869 markers disables output of the branch table to readonly memory,
8870 and any alignment directives that might be needed. Possibly,
8871 the begin_brtab insn should be output before the label for the
8872 table. This doesn't matter at the moment since the tables are
8873 always output in the text section. */
8874 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8876 /* Find an ADDR_VEC insn. */
8877 if (GET_CODE (insn) != JUMP_INSN
8878 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8879 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8880 continue;
8882 /* Now generate markers for the beginning and end of the
8883 branch table. */
8884 emit_insn_before (gen_begin_brtab (), insn);
8885 emit_insn_after (gen_end_brtab (), insn);
8890 /* The PA has a number of odd instructions which can perform multiple
8891 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
8892 it may be profitable to combine two instructions into one instruction
8893 with two outputs. It's not profitable PA2.0 machines because the
8894 two outputs would take two slots in the reorder buffers.
8896 This routine finds instructions which can be combined and combines
8897 them. We only support some of the potential combinations, and we
8898 only try common ways to find suitable instructions.
8900 * addb can add two registers or a register and a small integer
8901 and jump to a nearby (+-8k) location. Normally the jump to the
8902 nearby location is conditional on the result of the add, but by
8903 using the "true" condition we can make the jump unconditional.
8904 Thus addb can perform two independent operations in one insn.
8906 * movb is similar to addb in that it can perform a reg->reg
8907 or small immediate->reg copy and jump to a nearby (+-8k location).
8909 * fmpyadd and fmpysub can perform a FP multiply and either an
8910 FP add or FP sub if the operands of the multiply and add/sub are
8911 independent (there are other minor restrictions). Note both
8912 the fmpy and fadd/fsub can in theory move to better spots according
8913 to data dependencies, but for now we require the fmpy stay at a
8914 fixed location.
8916 * Many of the memory operations can perform pre & post updates
8917 of index registers. GCC's pre/post increment/decrement addressing
8918 is far too simple to take advantage of all the possibilities. This
8919 pass may not be suitable since those insns may not be independent.
8921 * comclr can compare two ints or an int and a register, nullify
8922 the following instruction and zero some other register. This
8923 is more difficult to use as it's harder to find an insn which
8924 will generate a comclr than finding something like an unconditional
8925 branch. (conditional moves & long branches create comclr insns).
8927 * Most arithmetic operations can conditionally skip the next
8928 instruction. They can be viewed as "perform this operation
8929 and conditionally jump to this nearby location" (where nearby
8930 is an insns away). These are difficult to use due to the
8931 branch length restrictions. */
8933 static void
8934 pa_combine_instructions (void)
8936 rtx anchor, new_rtx;
8938 /* This can get expensive since the basic algorithm is on the
8939 order of O(n^2) (or worse). Only do it for -O2 or higher
8940 levels of optimization. */
8941 if (optimize < 2)
8942 return;
8944 /* Walk down the list of insns looking for "anchor" insns which
8945 may be combined with "floating" insns. As the name implies,
8946 "anchor" instructions don't move, while "floating" insns may
8947 move around. */
8948 new_rtx = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
8949 new_rtx = make_insn_raw (new_rtx);
8951 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
8953 enum attr_pa_combine_type anchor_attr;
8954 enum attr_pa_combine_type floater_attr;
8956 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
8957 Also ignore any special USE insns. */
8958 if ((GET_CODE (anchor) != INSN
8959 && GET_CODE (anchor) != JUMP_INSN
8960 && GET_CODE (anchor) != CALL_INSN)
8961 || GET_CODE (PATTERN (anchor)) == USE
8962 || GET_CODE (PATTERN (anchor)) == CLOBBER
8963 || GET_CODE (PATTERN (anchor)) == ADDR_VEC
8964 || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC)
8965 continue;
8967 anchor_attr = get_attr_pa_combine_type (anchor);
8968 /* See if anchor is an insn suitable for combination. */
8969 if (anchor_attr == PA_COMBINE_TYPE_FMPY
8970 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
8971 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
8972 && ! forward_branch_p (anchor)))
8974 rtx floater;
8976 for (floater = PREV_INSN (anchor);
8977 floater;
8978 floater = PREV_INSN (floater))
8980 if (GET_CODE (floater) == NOTE
8981 || (GET_CODE (floater) == INSN
8982 && (GET_CODE (PATTERN (floater)) == USE
8983 || GET_CODE (PATTERN (floater)) == CLOBBER)))
8984 continue;
8986 /* Anything except a regular INSN will stop our search. */
8987 if (GET_CODE (floater) != INSN
8988 || GET_CODE (PATTERN (floater)) == ADDR_VEC
8989 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
8991 floater = NULL_RTX;
8992 break;
8995 /* See if FLOATER is suitable for combination with the
8996 anchor. */
8997 floater_attr = get_attr_pa_combine_type (floater);
8998 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
8999 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9000 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9001 && floater_attr == PA_COMBINE_TYPE_FMPY))
9003 /* If ANCHOR and FLOATER can be combined, then we're
9004 done with this pass. */
9005 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9006 SET_DEST (PATTERN (floater)),
9007 XEXP (SET_SRC (PATTERN (floater)), 0),
9008 XEXP (SET_SRC (PATTERN (floater)), 1)))
9009 break;
9012 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9013 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
9015 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
9017 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9018 SET_DEST (PATTERN (floater)),
9019 XEXP (SET_SRC (PATTERN (floater)), 0),
9020 XEXP (SET_SRC (PATTERN (floater)), 1)))
9021 break;
9023 else
9025 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9026 SET_DEST (PATTERN (floater)),
9027 SET_SRC (PATTERN (floater)),
9028 SET_SRC (PATTERN (floater))))
9029 break;
9034 /* If we didn't find anything on the backwards scan try forwards. */
9035 if (!floater
9036 && (anchor_attr == PA_COMBINE_TYPE_FMPY
9037 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
9039 for (floater = anchor; floater; floater = NEXT_INSN (floater))
9041 if (GET_CODE (floater) == NOTE
9042 || (GET_CODE (floater) == INSN
9043 && (GET_CODE (PATTERN (floater)) == USE
9044 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9046 continue;
9048 /* Anything except a regular INSN will stop our search. */
9049 if (GET_CODE (floater) != INSN
9050 || GET_CODE (PATTERN (floater)) == ADDR_VEC
9051 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
9053 floater = NULL_RTX;
9054 break;
9057 /* See if FLOATER is suitable for combination with the
9058 anchor. */
9059 floater_attr = get_attr_pa_combine_type (floater);
9060 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9061 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9062 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9063 && floater_attr == PA_COMBINE_TYPE_FMPY))
9065 /* If ANCHOR and FLOATER can be combined, then we're
9066 done with this pass. */
9067 if (pa_can_combine_p (new_rtx, anchor, floater, 1,
9068 SET_DEST (PATTERN (floater)),
9069 XEXP (SET_SRC (PATTERN (floater)),
9071 XEXP (SET_SRC (PATTERN (floater)),
9072 1)))
9073 break;
9078 /* FLOATER will be nonzero if we found a suitable floating
9079 insn for combination with ANCHOR. */
9080 if (floater
9081 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9082 || anchor_attr == PA_COMBINE_TYPE_FMPY))
9084 /* Emit the new instruction and delete the old anchor. */
9085 emit_insn_before (gen_rtx_PARALLEL
9086 (VOIDmode,
9087 gen_rtvec (2, PATTERN (anchor),
9088 PATTERN (floater))),
9089 anchor);
9091 SET_INSN_DELETED (anchor);
9093 /* Emit a special USE insn for FLOATER, then delete
9094 the floating insn. */
9095 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
9096 delete_insn (floater);
9098 continue;
9100 else if (floater
9101 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
9103 rtx temp;
9104 /* Emit the new_jump instruction and delete the old anchor. */
9105 temp
9106 = emit_jump_insn_before (gen_rtx_PARALLEL
9107 (VOIDmode,
9108 gen_rtvec (2, PATTERN (anchor),
9109 PATTERN (floater))),
9110 anchor);
9112 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
9113 SET_INSN_DELETED (anchor);
9115 /* Emit a special USE insn for FLOATER, then delete
9116 the floating insn. */
9117 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
9118 delete_insn (floater);
9119 continue;
9125 static int
9126 pa_can_combine_p (rtx new_rtx, rtx anchor, rtx floater, int reversed, rtx dest,
9127 rtx src1, rtx src2)
9129 int insn_code_number;
9130 rtx start, end;
9132 /* Create a PARALLEL with the patterns of ANCHOR and
9133 FLOATER, try to recognize it, then test constraints
9134 for the resulting pattern.
9136 If the pattern doesn't match or the constraints
9137 aren't met keep searching for a suitable floater
9138 insn. */
9139 XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor);
9140 XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater);
9141 INSN_CODE (new_rtx) = -1;
9142 insn_code_number = recog_memoized (new_rtx);
9143 if (insn_code_number < 0
9144 || (extract_insn (new_rtx), ! constrain_operands (1)))
9145 return 0;
9147 if (reversed)
9149 start = anchor;
9150 end = floater;
9152 else
9154 start = floater;
9155 end = anchor;
9158 /* There's up to three operands to consider. One
9159 output and two inputs.
9161 The output must not be used between FLOATER & ANCHOR
9162 exclusive. The inputs must not be set between
9163 FLOATER and ANCHOR exclusive. */
9165 if (reg_used_between_p (dest, start, end))
9166 return 0;
9168 if (reg_set_between_p (src1, start, end))
9169 return 0;
9171 if (reg_set_between_p (src2, start, end))
9172 return 0;
9174 /* If we get here, then everything is good. */
9175 return 1;
9178 /* Return nonzero if references for INSN are delayed.
9180 Millicode insns are actually function calls with some special
9181 constraints on arguments and register usage.
9183 Millicode calls always expect their arguments in the integer argument
9184 registers, and always return their result in %r29 (ret1). They
9185 are expected to clobber their arguments, %r1, %r29, and the return
9186 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9188 This function tells reorg that the references to arguments and
9189 millicode calls do not appear to happen until after the millicode call.
9190 This allows reorg to put insns which set the argument registers into the
9191 delay slot of the millicode call -- thus they act more like traditional
9192 CALL_INSNs.
9194 Note we cannot consider side effects of the insn to be delayed because
9195 the branch and link insn will clobber the return pointer. If we happened
9196 to use the return pointer in the delay slot of the call, then we lose.
9198 get_attr_type will try to recognize the given insn, so make sure to
9199 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9200 in particular. */
9202 insn_refs_are_delayed (rtx insn)
9204 return ((GET_CODE (insn) == INSN
9205 && GET_CODE (PATTERN (insn)) != SEQUENCE
9206 && GET_CODE (PATTERN (insn)) != USE
9207 && GET_CODE (PATTERN (insn)) != CLOBBER
9208 && get_attr_type (insn) == TYPE_MILLI));
9211 /* Promote the return value, but not the arguments. */
9213 static enum machine_mode
9214 pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9215 enum machine_mode mode,
9216 int *punsignedp ATTRIBUTE_UNUSED,
9217 const_tree fntype ATTRIBUTE_UNUSED,
9218 int for_return)
9220 if (for_return == 0)
9221 return mode;
9222 return promote_mode (type, mode, punsignedp);
9225 /* On the HP-PA the value is found in register(s) 28(-29), unless
9226 the mode is SF or DF. Then the value is returned in fr4 (32).
9228 This must perform the same promotions as PROMOTE_MODE, else promoting
9229 return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
9231 Small structures must be returned in a PARALLEL on PA64 in order
9232 to match the HP Compiler ABI. */
9235 pa_function_value (const_tree valtype,
9236 const_tree func ATTRIBUTE_UNUSED,
9237 bool outgoing ATTRIBUTE_UNUSED)
9239 enum machine_mode valmode;
9241 if (AGGREGATE_TYPE_P (valtype)
9242 || TREE_CODE (valtype) == COMPLEX_TYPE
9243 || TREE_CODE (valtype) == VECTOR_TYPE)
9245 if (TARGET_64BIT)
9247 /* Aggregates with a size less than or equal to 128 bits are
9248 returned in GR 28(-29). They are left justified. The pad
9249 bits are undefined. Larger aggregates are returned in
9250 memory. */
9251 rtx loc[2];
9252 int i, offset = 0;
9253 int ub = int_size_in_bytes (valtype) <= UNITS_PER_WORD ? 1 : 2;
9255 for (i = 0; i < ub; i++)
9257 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9258 gen_rtx_REG (DImode, 28 + i),
9259 GEN_INT (offset));
9260 offset += 8;
9263 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
9265 else if (int_size_in_bytes (valtype) > UNITS_PER_WORD)
9267 /* Aggregates 5 to 8 bytes in size are returned in general
9268 registers r28-r29 in the same manner as other non
9269 floating-point objects. The data is right-justified and
9270 zero-extended to 64 bits. This is opposite to the normal
9271 justification used on big endian targets and requires
9272 special treatment. */
9273 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9274 gen_rtx_REG (DImode, 28), const0_rtx);
9275 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9279 if ((INTEGRAL_TYPE_P (valtype)
9280 && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD)
9281 || POINTER_TYPE_P (valtype))
9282 valmode = word_mode;
9283 else
9284 valmode = TYPE_MODE (valtype);
9286 if (TREE_CODE (valtype) == REAL_TYPE
9287 && !AGGREGATE_TYPE_P (valtype)
9288 && TYPE_MODE (valtype) != TFmode
9289 && !TARGET_SOFT_FLOAT)
9290 return gen_rtx_REG (valmode, 32);
9292 return gen_rtx_REG (valmode, 28);
9295 /* Return the location of a parameter that is passed in a register or NULL
9296 if the parameter has any component that is passed in memory.
9298 This is new code and will be pushed to into the net sources after
9299 further testing.
9301 ??? We might want to restructure this so that it looks more like other
9302 ports. */
9304 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
9305 int named ATTRIBUTE_UNUSED)
9307 int max_arg_words = (TARGET_64BIT ? 8 : 4);
9308 int alignment = 0;
9309 int arg_size;
9310 int fpr_reg_base;
9311 int gpr_reg_base;
9312 rtx retval;
9314 if (mode == VOIDmode)
9315 return NULL_RTX;
9317 arg_size = FUNCTION_ARG_SIZE (mode, type);
9319 /* If this arg would be passed partially or totally on the stack, then
9320 this routine should return zero. pa_arg_partial_bytes will
9321 handle arguments which are split between regs and stack slots if
9322 the ABI mandates split arguments. */
9323 if (!TARGET_64BIT)
9325 /* The 32-bit ABI does not split arguments. */
9326 if (cum->words + arg_size > max_arg_words)
9327 return NULL_RTX;
9329 else
9331 if (arg_size > 1)
9332 alignment = cum->words & 1;
9333 if (cum->words + alignment >= max_arg_words)
9334 return NULL_RTX;
9337 /* The 32bit ABIs and the 64bit ABIs are rather different,
9338 particularly in their handling of FP registers. We might
9339 be able to cleverly share code between them, but I'm not
9340 going to bother in the hope that splitting them up results
9341 in code that is more easily understood. */
9343 if (TARGET_64BIT)
9345 /* Advance the base registers to their current locations.
9347 Remember, gprs grow towards smaller register numbers while
9348 fprs grow to higher register numbers. Also remember that
9349 although FP regs are 32-bit addressable, we pretend that
9350 the registers are 64-bits wide. */
9351 gpr_reg_base = 26 - cum->words;
9352 fpr_reg_base = 32 + cum->words;
9354 /* Arguments wider than one word and small aggregates need special
9355 treatment. */
9356 if (arg_size > 1
9357 || mode == BLKmode
9358 || (type && (AGGREGATE_TYPE_P (type)
9359 || TREE_CODE (type) == COMPLEX_TYPE
9360 || TREE_CODE (type) == VECTOR_TYPE)))
9362 /* Double-extended precision (80-bit), quad-precision (128-bit)
9363 and aggregates including complex numbers are aligned on
9364 128-bit boundaries. The first eight 64-bit argument slots
9365 are associated one-to-one, with general registers r26
9366 through r19, and also with floating-point registers fr4
9367 through fr11. Arguments larger than one word are always
9368 passed in general registers.
9370 Using a PARALLEL with a word mode register results in left
9371 justified data on a big-endian target. */
9373 rtx loc[8];
9374 int i, offset = 0, ub = arg_size;
9376 /* Align the base register. */
9377 gpr_reg_base -= alignment;
9379 ub = MIN (ub, max_arg_words - cum->words - alignment);
9380 for (i = 0; i < ub; i++)
9382 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9383 gen_rtx_REG (DImode, gpr_reg_base),
9384 GEN_INT (offset));
9385 gpr_reg_base -= 1;
9386 offset += 8;
9389 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
9392 else
9394 /* If the argument is larger than a word, then we know precisely
9395 which registers we must use. */
9396 if (arg_size > 1)
9398 if (cum->words)
9400 gpr_reg_base = 23;
9401 fpr_reg_base = 38;
9403 else
9405 gpr_reg_base = 25;
9406 fpr_reg_base = 34;
9409 /* Structures 5 to 8 bytes in size are passed in the general
9410 registers in the same manner as other non floating-point
9411 objects. The data is right-justified and zero-extended
9412 to 64 bits. This is opposite to the normal justification
9413 used on big endian targets and requires special treatment.
9414 We now define BLOCK_REG_PADDING to pad these objects.
9415 Aggregates, complex and vector types are passed in the same
9416 manner as structures. */
9417 if (mode == BLKmode
9418 || (type && (AGGREGATE_TYPE_P (type)
9419 || TREE_CODE (type) == COMPLEX_TYPE
9420 || TREE_CODE (type) == VECTOR_TYPE)))
9422 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9423 gen_rtx_REG (DImode, gpr_reg_base),
9424 const0_rtx);
9425 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9428 else
9430 /* We have a single word (32 bits). A simple computation
9431 will get us the register #s we need. */
9432 gpr_reg_base = 26 - cum->words;
9433 fpr_reg_base = 32 + 2 * cum->words;
9437 /* Determine if the argument needs to be passed in both general and
9438 floating point registers. */
9439 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
9440 /* If we are doing soft-float with portable runtime, then there
9441 is no need to worry about FP regs. */
9442 && !TARGET_SOFT_FLOAT
9443 /* The parameter must be some kind of scalar float, else we just
9444 pass it in integer registers. */
9445 && GET_MODE_CLASS (mode) == MODE_FLOAT
9446 /* The target function must not have a prototype. */
9447 && cum->nargs_prototype <= 0
9448 /* libcalls do not need to pass items in both FP and general
9449 registers. */
9450 && type != NULL_TREE
9451 /* All this hair applies to "outgoing" args only. This includes
9452 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
9453 && !cum->incoming)
9454 /* Also pass outgoing floating arguments in both registers in indirect
9455 calls with the 32 bit ABI and the HP assembler since there is no
9456 way to the specify argument locations in static functions. */
9457 || (!TARGET_64BIT
9458 && !TARGET_GAS
9459 && !cum->incoming
9460 && cum->indirect
9461 && GET_MODE_CLASS (mode) == MODE_FLOAT))
9463 retval
9464 = gen_rtx_PARALLEL
9465 (mode,
9466 gen_rtvec (2,
9467 gen_rtx_EXPR_LIST (VOIDmode,
9468 gen_rtx_REG (mode, fpr_reg_base),
9469 const0_rtx),
9470 gen_rtx_EXPR_LIST (VOIDmode,
9471 gen_rtx_REG (mode, gpr_reg_base),
9472 const0_rtx)));
9474 else
9476 /* See if we should pass this parameter in a general register. */
9477 if (TARGET_SOFT_FLOAT
9478 /* Indirect calls in the normal 32bit ABI require all arguments
9479 to be passed in general registers. */
9480 || (!TARGET_PORTABLE_RUNTIME
9481 && !TARGET_64BIT
9482 && !TARGET_ELF32
9483 && cum->indirect)
9484 /* If the parameter is not a scalar floating-point parameter,
9485 then it belongs in GPRs. */
9486 || GET_MODE_CLASS (mode) != MODE_FLOAT
9487 /* Structure with single SFmode field belongs in GPR. */
9488 || (type && AGGREGATE_TYPE_P (type)))
9489 retval = gen_rtx_REG (mode, gpr_reg_base);
9490 else
9491 retval = gen_rtx_REG (mode, fpr_reg_base);
9493 return retval;
9497 /* If this arg would be passed totally in registers or totally on the stack,
9498 then this routine should return zero. */
9500 static int
9501 pa_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
9502 tree type, bool named ATTRIBUTE_UNUSED)
9504 unsigned int max_arg_words = 8;
9505 unsigned int offset = 0;
9507 if (!TARGET_64BIT)
9508 return 0;
9510 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
9511 offset = 1;
9513 if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
9514 /* Arg fits fully into registers. */
9515 return 0;
9516 else if (cum->words + offset >= max_arg_words)
9517 /* Arg fully on the stack. */
9518 return 0;
9519 else
9520 /* Arg is split. */
9521 return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
9525 /* A get_unnamed_section callback for switching to the text section.
9527 This function is only used with SOM. Because we don't support
9528 named subspaces, we can only create a new subspace or switch back
9529 to the default text subspace. */
9531 static void
9532 som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED)
9534 gcc_assert (TARGET_SOM);
9535 if (TARGET_GAS)
9537 if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
9539 /* We only want to emit a .nsubspa directive once at the
9540 start of the function. */
9541 cfun->machine->in_nsubspa = 1;
9543 /* Create a new subspace for the text. This provides
9544 better stub placement and one-only functions. */
9545 if (cfun->decl
9546 && DECL_ONE_ONLY (cfun->decl)
9547 && !DECL_WEAK (cfun->decl))
9549 output_section_asm_op ("\t.SPACE $TEXT$\n"
9550 "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
9551 "ACCESS=44,SORT=24,COMDAT");
9552 return;
9555 else
9557 /* There isn't a current function or the body of the current
9558 function has been completed. So, we are changing to the
9559 text section to output debugging information. Thus, we
9560 need to forget that we are in the text section so that
9561 varasm.c will call us when text_section is selected again. */
9562 gcc_assert (!cfun || !cfun->machine
9563 || cfun->machine->in_nsubspa == 2);
9564 in_section = NULL;
9566 output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
9567 return;
9569 output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
9572 /* A get_unnamed_section callback for switching to comdat data
9573 sections. This function is only used with SOM. */
9575 static void
9576 som_output_comdat_data_section_asm_op (const void *data)
9578 in_section = NULL;
9579 output_section_asm_op (data);
9582 /* Implement TARGET_ASM_INITIALIZE_SECTIONS */
9584 static void
9585 pa_som_asm_init_sections (void)
9587 text_section
9588 = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
9590 /* SOM puts readonly data in the default $LIT$ subspace when PIC code
9591 is not being generated. */
9592 som_readonly_data_section
9593 = get_unnamed_section (0, output_section_asm_op,
9594 "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
9596 /* When secondary definitions are not supported, SOM makes readonly
9597 data one-only by creating a new $LIT$ subspace in $TEXT$ with
9598 the comdat flag. */
9599 som_one_only_readonly_data_section
9600 = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
9601 "\t.SPACE $TEXT$\n"
9602 "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
9603 "ACCESS=0x2c,SORT=16,COMDAT");
9606 /* When secondary definitions are not supported, SOM makes data one-only
9607 by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */
9608 som_one_only_data_section
9609 = get_unnamed_section (SECTION_WRITE,
9610 som_output_comdat_data_section_asm_op,
9611 "\t.SPACE $PRIVATE$\n"
9612 "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
9613 "ACCESS=31,SORT=24,COMDAT");
9615 /* FIXME: HPUX ld generates incorrect GOT entries for "T" fixups
9616 which reference data within the $TEXT$ space (for example constant
9617 strings in the $LIT$ subspace).
9619 The assemblers (GAS and HP as) both have problems with handling
9620 the difference of two symbols which is the other correct way to
9621 reference constant data during PIC code generation.
9623 So, there's no way to reference constant data which is in the
9624 $TEXT$ space during PIC generation. Instead place all constant
9625 data into the $PRIVATE$ subspace (this reduces sharing, but it
9626 works correctly). */
9627 readonly_data_section = flag_pic ? data_section : som_readonly_data_section;
9629 /* We must not have a reference to an external symbol defined in a
9630 shared library in a readonly section, else the SOM linker will
9631 complain.
9633 So, we force exception information into the data section. */
9634 exception_section = data_section;
9637 /* On hpux10, the linker will give an error if we have a reference
9638 in the read-only data section to a symbol defined in a shared
9639 library. Therefore, expressions that might require a reloc can
9640 not be placed in the read-only data section. */
9642 static section *
9643 pa_select_section (tree exp, int reloc,
9644 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
9646 if (TREE_CODE (exp) == VAR_DECL
9647 && TREE_READONLY (exp)
9648 && !TREE_THIS_VOLATILE (exp)
9649 && DECL_INITIAL (exp)
9650 && (DECL_INITIAL (exp) == error_mark_node
9651 || TREE_CONSTANT (DECL_INITIAL (exp)))
9652 && !reloc)
9654 if (TARGET_SOM
9655 && DECL_ONE_ONLY (exp)
9656 && !DECL_WEAK (exp))
9657 return som_one_only_readonly_data_section;
9658 else
9659 return readonly_data_section;
9661 else if (CONSTANT_CLASS_P (exp) && !reloc)
9662 return readonly_data_section;
9663 else if (TARGET_SOM
9664 && TREE_CODE (exp) == VAR_DECL
9665 && DECL_ONE_ONLY (exp)
9666 && !DECL_WEAK (exp))
9667 return som_one_only_data_section;
9668 else
9669 return data_section;
9672 static void
9673 pa_globalize_label (FILE *stream, const char *name)
9675 /* We only handle DATA objects here, functions are globalized in
9676 ASM_DECLARE_FUNCTION_NAME. */
9677 if (! FUNCTION_NAME_P (name))
9679 fputs ("\t.EXPORT ", stream);
9680 assemble_name (stream, name);
9681 fputs (",DATA\n", stream);
9685 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
9687 static rtx
9688 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
9689 int incoming ATTRIBUTE_UNUSED)
9691 return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
9694 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9696 bool
9697 pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9699 /* SOM ABI says that objects larger than 64 bits are returned in memory.
9700 PA64 ABI says that objects larger than 128 bits are returned in memory.
9701 Note, int_size_in_bytes can return -1 if the size of the object is
9702 variable or larger than the maximum value that can be expressed as
9703 a HOST_WIDE_INT. It can also return zero for an empty type. The
9704 simplest way to handle variable and empty types is to pass them in
9705 memory. This avoids problems in defining the boundaries of argument
9706 slots, allocating registers, etc. */
9707 return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
9708 || int_size_in_bytes (type) <= 0);
9711 /* Structure to hold declaration and name of external symbols that are
9712 emitted by GCC. We generate a vector of these symbols and output them
9713 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
9714 This avoids putting out names that are never really used. */
9716 typedef struct GTY(()) extern_symbol
9718 tree decl;
9719 const char *name;
9720 } extern_symbol;
9722 /* Define gc'd vector type for extern_symbol. */
9723 DEF_VEC_O(extern_symbol);
9724 DEF_VEC_ALLOC_O(extern_symbol,gc);
9726 /* Vector of extern_symbol pointers. */
9727 static GTY(()) VEC(extern_symbol,gc) *extern_symbols;
9729 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9730 /* Mark DECL (name NAME) as an external reference (assembler output
9731 file FILE). This saves the names to output at the end of the file
9732 if actually referenced. */
9734 void
9735 pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
9737 extern_symbol * p = VEC_safe_push (extern_symbol, gc, extern_symbols, NULL);
9739 gcc_assert (file == asm_out_file);
9740 p->decl = decl;
9741 p->name = name;
9744 /* Output text required at the end of an assembler file.
9745 This includes deferred plabels and .import directives for
9746 all external symbols that were actually referenced. */
9748 static void
9749 pa_hpux_file_end (void)
9751 unsigned int i;
9752 extern_symbol *p;
9754 if (!NO_DEFERRED_PROFILE_COUNTERS)
9755 output_deferred_profile_counters ();
9757 output_deferred_plabels ();
9759 for (i = 0; VEC_iterate (extern_symbol, extern_symbols, i, p); i++)
9761 tree decl = p->decl;
9763 if (!TREE_ASM_WRITTEN (decl)
9764 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
9765 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
9768 VEC_free (extern_symbol, gc, extern_symbols);
9770 #endif
9772 /* Return true if a change from mode FROM to mode TO for a register
9773 in register class RCLASS is invalid. */
9775 bool
9776 pa_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
9777 enum reg_class rclass)
9779 if (from == to)
9780 return false;
9782 /* Reject changes to/from complex and vector modes. */
9783 if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from)
9784 || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to))
9785 return true;
9787 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to))
9788 return false;
9790 /* There is no way to load QImode or HImode values directly from
9791 memory. SImode loads to the FP registers are not zero extended.
9792 On the 64-bit target, this conflicts with the definition of
9793 LOAD_EXTEND_OP. Thus, we can't allow changing between modes
9794 with different sizes in the floating-point registers. */
9795 if (MAYBE_FP_REG_CLASS_P (rclass))
9796 return true;
9798 /* HARD_REGNO_MODE_OK places modes with sizes larger than a word
9799 in specific sets of registers. Thus, we cannot allow changing
9800 to a larger mode when it's larger than a word. */
9801 if (GET_MODE_SIZE (to) > UNITS_PER_WORD
9802 && GET_MODE_SIZE (to) > GET_MODE_SIZE (from))
9803 return true;
9805 return false;
9808 /* Returns TRUE if it is a good idea to tie two pseudo registers
9809 when one has mode MODE1 and one has mode MODE2.
9810 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
9811 for any hard reg, then this must be FALSE for correct output.
9813 We should return FALSE for QImode and HImode because these modes
9814 are not ok in the floating-point registers. However, this prevents
9815 tieing these modes to SImode and DImode in the general registers.
9816 So, this isn't a good idea. We rely on HARD_REGNO_MODE_OK and
9817 CANNOT_CHANGE_MODE_CLASS to prevent these modes from being used
9818 in the floating-point registers. */
9820 bool
9821 pa_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
9823 /* Don't tie modes in different classes. */
9824 if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2))
9825 return false;
9827 return true;
9831 /* Length in units of the trampoline instruction code. */
9833 #define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 32 : 40))
9836 /* Output assembler code for a block containing the constant parts
9837 of a trampoline, leaving space for the variable parts.\
9839 The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
9840 and then branches to the specified routine.
9842 This code template is copied from text segment to stack location
9843 and then patched with pa_trampoline_init to contain valid values,
9844 and then entered as a subroutine.
9846 It is best to keep this as small as possible to avoid having to
9847 flush multiple lines in the cache. */
9849 static void
9850 pa_asm_trampoline_template (FILE *f)
9852 if (!TARGET_64BIT)
9854 fputs ("\tldw 36(%r22),%r21\n", f);
9855 fputs ("\tbb,>=,n %r21,30,.+16\n", f);
9856 if (ASSEMBLER_DIALECT == 0)
9857 fputs ("\tdepi 0,31,2,%r21\n", f);
9858 else
9859 fputs ("\tdepwi 0,31,2,%r21\n", f);
9860 fputs ("\tldw 4(%r21),%r19\n", f);
9861 fputs ("\tldw 0(%r21),%r21\n", f);
9862 if (TARGET_PA_20)
9864 fputs ("\tbve (%r21)\n", f);
9865 fputs ("\tldw 40(%r22),%r29\n", f);
9866 fputs ("\t.word 0\n", f);
9867 fputs ("\t.word 0\n", f);
9869 else
9871 fputs ("\tldsid (%r21),%r1\n", f);
9872 fputs ("\tmtsp %r1,%sr0\n", f);
9873 fputs ("\tbe 0(%sr0,%r21)\n", f);
9874 fputs ("\tldw 40(%r22),%r29\n", f);
9876 fputs ("\t.word 0\n", f);
9877 fputs ("\t.word 0\n", f);
9878 fputs ("\t.word 0\n", f);
9879 fputs ("\t.word 0\n", f);
9881 else
9883 fputs ("\t.dword 0\n", f);
9884 fputs ("\t.dword 0\n", f);
9885 fputs ("\t.dword 0\n", f);
9886 fputs ("\t.dword 0\n", f);
9887 fputs ("\tmfia %r31\n", f);
9888 fputs ("\tldd 24(%r31),%r1\n", f);
9889 fputs ("\tldd 24(%r1),%r27\n", f);
9890 fputs ("\tldd 16(%r1),%r1\n", f);
9891 fputs ("\tbve (%r1)\n", f);
9892 fputs ("\tldd 32(%r31),%r31\n", f);
9893 fputs ("\t.dword 0 ; fptr\n", f);
9894 fputs ("\t.dword 0 ; static link\n", f);
9898 /* Emit RTL insns to initialize the variable parts of a trampoline.
9899 FNADDR is an RTX for the address of the function's pure code.
9900 CXT is an RTX for the static chain value for the function.
9902 Move the function address to the trampoline template at offset 36.
9903 Move the static chain value to trampoline template at offset 40.
9904 Move the trampoline address to trampoline template at offset 44.
9905 Move r19 to trampoline template at offset 48. The latter two
9906 words create a plabel for the indirect call to the trampoline.
9908 A similar sequence is used for the 64-bit port but the plabel is
9909 at the beginning of the trampoline.
9911 Finally, the cache entries for the trampoline code are flushed.
9912 This is necessary to ensure that the trampoline instruction sequence
9913 is written to memory prior to any attempts at prefetching the code
9914 sequence. */
9916 static void
9917 pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
9919 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
9920 rtx start_addr = gen_reg_rtx (Pmode);
9921 rtx end_addr = gen_reg_rtx (Pmode);
9922 rtx line_length = gen_reg_rtx (Pmode);
9923 rtx r_tramp, tmp;
9925 emit_block_move (m_tramp, assemble_trampoline_template (),
9926 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
9927 r_tramp = force_reg (Pmode, XEXP (m_tramp, 0));
9929 if (!TARGET_64BIT)
9931 tmp = adjust_address (m_tramp, Pmode, 36);
9932 emit_move_insn (tmp, fnaddr);
9933 tmp = adjust_address (m_tramp, Pmode, 40);
9934 emit_move_insn (tmp, chain_value);
9936 /* Create a fat pointer for the trampoline. */
9937 tmp = adjust_address (m_tramp, Pmode, 44);
9938 emit_move_insn (tmp, r_tramp);
9939 tmp = adjust_address (m_tramp, Pmode, 48);
9940 emit_move_insn (tmp, gen_rtx_REG (Pmode, 19));
9942 /* fdc and fic only use registers for the address to flush,
9943 they do not accept integer displacements. We align the
9944 start and end addresses to the beginning of their respective
9945 cache lines to minimize the number of lines flushed. */
9946 emit_insn (gen_andsi3 (start_addr, r_tramp,
9947 GEN_INT (-MIN_CACHELINE_SIZE)));
9948 tmp = force_reg (Pmode, plus_constant (r_tramp, TRAMPOLINE_CODE_SIZE-1));
9949 emit_insn (gen_andsi3 (end_addr, tmp,
9950 GEN_INT (-MIN_CACHELINE_SIZE)));
9951 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
9952 emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length));
9953 emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length,
9954 gen_reg_rtx (Pmode),
9955 gen_reg_rtx (Pmode)));
9957 else
9959 tmp = adjust_address (m_tramp, Pmode, 56);
9960 emit_move_insn (tmp, fnaddr);
9961 tmp = adjust_address (m_tramp, Pmode, 64);
9962 emit_move_insn (tmp, chain_value);
9964 /* Create a fat pointer for the trampoline. */
9965 tmp = adjust_address (m_tramp, Pmode, 16);
9966 emit_move_insn (tmp, force_reg (Pmode, plus_constant (r_tramp, 32)));
9967 tmp = adjust_address (m_tramp, Pmode, 24);
9968 emit_move_insn (tmp, gen_rtx_REG (Pmode, 27));
9970 /* fdc and fic only use registers for the address to flush,
9971 they do not accept integer displacements. We align the
9972 start and end addresses to the beginning of their respective
9973 cache lines to minimize the number of lines flushed. */
9974 tmp = force_reg (Pmode, plus_constant (r_tramp, 32));
9975 emit_insn (gen_anddi3 (start_addr, tmp,
9976 GEN_INT (-MIN_CACHELINE_SIZE)));
9977 tmp = force_reg (Pmode, plus_constant (tmp, TRAMPOLINE_CODE_SIZE - 1));
9978 emit_insn (gen_anddi3 (end_addr, tmp,
9979 GEN_INT (-MIN_CACHELINE_SIZE)));
9980 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
9981 emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length));
9982 emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length,
9983 gen_reg_rtx (Pmode),
9984 gen_reg_rtx (Pmode)));
9988 /* Perform any machine-specific adjustment in the address of the trampoline.
9989 ADDR contains the address that was passed to pa_trampoline_init.
9990 Adjust the trampoline address to point to the plabel at offset 44. */
9992 static rtx
9993 pa_trampoline_adjust_address (rtx addr)
9995 if (!TARGET_64BIT)
9996 addr = memory_address (Pmode, plus_constant (addr, 46));
9997 return addr;
10000 static rtx
10001 pa_delegitimize_address (rtx orig_x)
10003 rtx x = delegitimize_mem_from_attrs (orig_x);
10005 if (GET_CODE (x) == LO_SUM
10006 && GET_CODE (XEXP (x, 1)) == UNSPEC
10007 && XINT (XEXP (x, 1), 1) == UNSPEC_DLTIND14R)
10008 return gen_const_mem (Pmode, XVECEXP (XEXP (x, 1), 0, 0));
10009 return x;
10012 #include "gt-pa.h"