Merged revision 156805 into branch.
[official-gcc.git] / gcc / config / pa / pa.c
blob7054c50b8f3a6d76c117c99f760448dbbaa04942
1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4 Free Software Foundation, Inc.
5 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "rtl.h"
28 #include "regs.h"
29 #include "hard-reg-set.h"
30 #include "real.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "insn-attr.h"
34 #include "flags.h"
35 #include "tree.h"
36 #include "output.h"
37 #include "except.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "reload.h"
41 #include "integrate.h"
42 #include "function.h"
43 #include "toplev.h"
44 #include "ggc.h"
45 #include "recog.h"
46 #include "predict.h"
47 #include "tm_p.h"
48 #include "target.h"
49 #include "target-def.h"
50 #include "df.h"
52 /* Return nonzero if there is a bypass for the output of
53 OUT_INSN and the fp store IN_INSN. */
54 int
55 hppa_fpstore_bypass_p (rtx out_insn, rtx in_insn)
57 enum machine_mode store_mode;
58 enum machine_mode other_mode;
59 rtx set;
61 if (recog_memoized (in_insn) < 0
62 || (get_attr_type (in_insn) != TYPE_FPSTORE
63 && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
64 || recog_memoized (out_insn) < 0)
65 return 0;
67 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
69 set = single_set (out_insn);
70 if (!set)
71 return 0;
73 other_mode = GET_MODE (SET_SRC (set));
75 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
79 #ifndef DO_FRAME_NOTES
80 #ifdef INCOMING_RETURN_ADDR_RTX
81 #define DO_FRAME_NOTES 1
82 #else
83 #define DO_FRAME_NOTES 0
84 #endif
85 #endif
87 static void copy_reg_pointer (rtx, rtx);
88 static void fix_range (const char *);
89 static bool pa_handle_option (size_t, const char *, int);
90 static int hppa_address_cost (rtx, bool);
91 static bool hppa_rtx_costs (rtx, int, int, int *, bool);
92 static inline rtx force_mode (enum machine_mode, rtx);
93 static void pa_reorg (void);
94 static void pa_combine_instructions (void);
95 static int pa_can_combine_p (rtx, rtx, rtx, int, rtx, rtx, rtx);
96 static bool forward_branch_p (rtx);
97 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
98 static int compute_movmem_length (rtx);
99 static int compute_clrmem_length (rtx);
100 static bool pa_assemble_integer (rtx, unsigned int, int);
101 static void remove_useless_addtr_insns (int);
102 static void store_reg (int, HOST_WIDE_INT, int);
103 static void store_reg_modify (int, int, HOST_WIDE_INT);
104 static void load_reg (int, HOST_WIDE_INT, int);
105 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
106 static rtx pa_function_value (const_tree, const_tree, bool);
107 static void pa_output_function_prologue (FILE *, HOST_WIDE_INT);
108 static void update_total_code_bytes (unsigned int);
109 static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT);
110 static int pa_adjust_cost (rtx, rtx, rtx, int);
111 static int pa_adjust_priority (rtx, int);
112 static int pa_issue_rate (void);
113 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
114 static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
115 ATTRIBUTE_UNUSED;
116 static void pa_encode_section_info (tree, rtx, int);
117 static const char *pa_strip_name_encoding (const char *);
118 static bool pa_function_ok_for_sibcall (tree, tree);
119 static void pa_globalize_label (FILE *, const char *)
120 ATTRIBUTE_UNUSED;
121 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
122 HOST_WIDE_INT, tree);
123 #if !defined(USE_COLLECT2)
124 static void pa_asm_out_constructor (rtx, int);
125 static void pa_asm_out_destructor (rtx, int);
126 #endif
127 static void pa_init_builtins (void);
128 static rtx hppa_builtin_saveregs (void);
129 static void hppa_va_start (tree, rtx);
130 static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
131 static bool pa_scalar_mode_supported_p (enum machine_mode);
132 static bool pa_commutative_p (const_rtx x, int outer_code);
133 static void copy_fp_args (rtx) ATTRIBUTE_UNUSED;
134 static int length_fp_args (rtx) ATTRIBUTE_UNUSED;
135 static rtx hppa_legitimize_address (rtx, rtx, enum machine_mode);
136 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
137 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
138 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
139 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
140 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
141 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
142 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
143 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
144 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
145 static void output_deferred_plabels (void);
146 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
147 #ifdef ASM_OUTPUT_EXTERNAL_REAL
148 static void pa_hpux_file_end (void);
149 #endif
150 #ifdef HPUX_LONG_DOUBLE_LIBRARY
151 static void pa_hpux_init_libfuncs (void);
152 #endif
153 static rtx pa_struct_value_rtx (tree, int);
154 static bool pa_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
155 const_tree, bool);
156 static int pa_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
157 tree, bool);
158 static struct machine_function * pa_init_machine_status (void);
159 static enum reg_class pa_secondary_reload (bool, rtx, enum reg_class,
160 enum machine_mode,
161 secondary_reload_info *);
162 static void pa_extra_live_on_entry (bitmap);
163 static enum machine_mode pa_promote_function_mode (const_tree,
164 enum machine_mode, int *,
165 const_tree, int);
167 static void pa_asm_trampoline_template (FILE *);
168 static void pa_trampoline_init (rtx, tree, rtx);
169 static rtx pa_trampoline_adjust_address (rtx);
170 static rtx pa_delegitimize_address (rtx);
172 /* The following extra sections are only used for SOM. */
173 static GTY(()) section *som_readonly_data_section;
174 static GTY(()) section *som_one_only_readonly_data_section;
175 static GTY(()) section *som_one_only_data_section;
177 /* Which cpu we are scheduling for. */
178 enum processor_type pa_cpu = TARGET_SCHED_DEFAULT;
180 /* The UNIX standard to use for predefines and linking. */
181 int flag_pa_unix = TARGET_HPUX_11_11 ? 1998 : TARGET_HPUX_10_10 ? 1995 : 1993;
183 /* Counts for the number of callee-saved general and floating point
184 registers which were saved by the current function's prologue. */
185 static int gr_saved, fr_saved;
187 /* Boolean indicating whether the return pointer was saved by the
188 current function's prologue. */
189 static bool rp_saved;
191 static rtx find_addr_reg (rtx);
193 /* Keep track of the number of bytes we have output in the CODE subspace
194 during this compilation so we'll know when to emit inline long-calls. */
195 unsigned long total_code_bytes;
197 /* The last address of the previous function plus the number of bytes in
198 associated thunks that have been output. This is used to determine if
199 a thunk can use an IA-relative branch to reach its target function. */
200 static unsigned int last_address;
202 /* Variables to handle plabels that we discover are necessary at assembly
203 output time. They are output after the current function. */
204 struct GTY(()) deferred_plabel
206 rtx internal_label;
207 rtx symbol;
209 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
210 deferred_plabels;
211 static size_t n_deferred_plabels = 0;
214 /* Initialize the GCC target structure. */
216 #undef TARGET_ASM_ALIGNED_HI_OP
217 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
218 #undef TARGET_ASM_ALIGNED_SI_OP
219 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
220 #undef TARGET_ASM_ALIGNED_DI_OP
221 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
222 #undef TARGET_ASM_UNALIGNED_HI_OP
223 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
224 #undef TARGET_ASM_UNALIGNED_SI_OP
225 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
226 #undef TARGET_ASM_UNALIGNED_DI_OP
227 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
228 #undef TARGET_ASM_INTEGER
229 #define TARGET_ASM_INTEGER pa_assemble_integer
231 #undef TARGET_ASM_FUNCTION_PROLOGUE
232 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
233 #undef TARGET_ASM_FUNCTION_EPILOGUE
234 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
236 #undef TARGET_FUNCTION_VALUE
237 #define TARGET_FUNCTION_VALUE pa_function_value
239 #undef TARGET_LEGITIMIZE_ADDRESS
240 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
242 #undef TARGET_SCHED_ADJUST_COST
243 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
244 #undef TARGET_SCHED_ADJUST_PRIORITY
245 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
246 #undef TARGET_SCHED_ISSUE_RATE
247 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
249 #undef TARGET_ENCODE_SECTION_INFO
250 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
251 #undef TARGET_STRIP_NAME_ENCODING
252 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
254 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
255 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
257 #undef TARGET_COMMUTATIVE_P
258 #define TARGET_COMMUTATIVE_P pa_commutative_p
260 #undef TARGET_ASM_OUTPUT_MI_THUNK
261 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
262 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
263 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
265 #undef TARGET_ASM_FILE_END
266 #ifdef ASM_OUTPUT_EXTERNAL_REAL
267 #define TARGET_ASM_FILE_END pa_hpux_file_end
268 #else
269 #define TARGET_ASM_FILE_END output_deferred_plabels
270 #endif
272 #if !defined(USE_COLLECT2)
273 #undef TARGET_ASM_CONSTRUCTOR
274 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
275 #undef TARGET_ASM_DESTRUCTOR
276 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
277 #endif
279 #undef TARGET_DEFAULT_TARGET_FLAGS
280 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
281 #undef TARGET_HANDLE_OPTION
282 #define TARGET_HANDLE_OPTION pa_handle_option
284 #undef TARGET_INIT_BUILTINS
285 #define TARGET_INIT_BUILTINS pa_init_builtins
287 #undef TARGET_RTX_COSTS
288 #define TARGET_RTX_COSTS hppa_rtx_costs
289 #undef TARGET_ADDRESS_COST
290 #define TARGET_ADDRESS_COST hppa_address_cost
292 #undef TARGET_MACHINE_DEPENDENT_REORG
293 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
295 #ifdef HPUX_LONG_DOUBLE_LIBRARY
296 #undef TARGET_INIT_LIBFUNCS
297 #define TARGET_INIT_LIBFUNCS pa_hpux_init_libfuncs
298 #endif
300 #undef TARGET_PROMOTE_FUNCTION_MODE
301 #define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
302 #undef TARGET_PROMOTE_PROTOTYPES
303 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
305 #undef TARGET_STRUCT_VALUE_RTX
306 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
307 #undef TARGET_RETURN_IN_MEMORY
308 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
309 #undef TARGET_MUST_PASS_IN_STACK
310 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
311 #undef TARGET_PASS_BY_REFERENCE
312 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
313 #undef TARGET_CALLEE_COPIES
314 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true
315 #undef TARGET_ARG_PARTIAL_BYTES
316 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
318 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
319 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
320 #undef TARGET_EXPAND_BUILTIN_VA_START
321 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
322 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
323 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
325 #undef TARGET_SCALAR_MODE_SUPPORTED_P
326 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
328 #undef TARGET_CANNOT_FORCE_CONST_MEM
329 #define TARGET_CANNOT_FORCE_CONST_MEM pa_tls_referenced_p
331 #undef TARGET_SECONDARY_RELOAD
332 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
334 #undef TARGET_EXTRA_LIVE_ON_ENTRY
335 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
337 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
338 #define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
339 #undef TARGET_TRAMPOLINE_INIT
340 #define TARGET_TRAMPOLINE_INIT pa_trampoline_init
341 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
342 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
343 #undef TARGET_DELEGITIMIZE_ADDRESS
344 #define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
346 struct gcc_target targetm = TARGET_INITIALIZER;
348 /* Parse the -mfixed-range= option string. */
350 static void
351 fix_range (const char *const_str)
353 int i, first, last;
354 char *str, *dash, *comma;
356 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
357 REG2 are either register names or register numbers. The effect
358 of this option is to mark the registers in the range from REG1 to
359 REG2 as ``fixed'' so they won't be used by the compiler. This is
360 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */
362 i = strlen (const_str);
363 str = (char *) alloca (i + 1);
364 memcpy (str, const_str, i + 1);
366 while (1)
368 dash = strchr (str, '-');
369 if (!dash)
371 warning (0, "value of -mfixed-range must have form REG1-REG2");
372 return;
374 *dash = '\0';
376 comma = strchr (dash + 1, ',');
377 if (comma)
378 *comma = '\0';
380 first = decode_reg_name (str);
381 if (first < 0)
383 warning (0, "unknown register name: %s", str);
384 return;
387 last = decode_reg_name (dash + 1);
388 if (last < 0)
390 warning (0, "unknown register name: %s", dash + 1);
391 return;
394 *dash = '-';
396 if (first > last)
398 warning (0, "%s-%s is an empty range", str, dash + 1);
399 return;
402 for (i = first; i <= last; ++i)
403 fixed_regs[i] = call_used_regs[i] = 1;
405 if (!comma)
406 break;
408 *comma = ',';
409 str = comma + 1;
412 /* Check if all floating point registers have been fixed. */
413 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
414 if (!fixed_regs[i])
415 break;
417 if (i > FP_REG_LAST)
418 target_flags |= MASK_DISABLE_FPREGS;
421 /* Implement TARGET_HANDLE_OPTION. */
423 static bool
424 pa_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
426 switch (code)
428 case OPT_mnosnake:
429 case OPT_mpa_risc_1_0:
430 case OPT_march_1_0:
431 target_flags &= ~(MASK_PA_11 | MASK_PA_20);
432 return true;
434 case OPT_msnake:
435 case OPT_mpa_risc_1_1:
436 case OPT_march_1_1:
437 target_flags &= ~MASK_PA_20;
438 target_flags |= MASK_PA_11;
439 return true;
441 case OPT_mpa_risc_2_0:
442 case OPT_march_2_0:
443 target_flags |= MASK_PA_11 | MASK_PA_20;
444 return true;
446 case OPT_mschedule_:
447 if (strcmp (arg, "8000") == 0)
448 pa_cpu = PROCESSOR_8000;
449 else if (strcmp (arg, "7100") == 0)
450 pa_cpu = PROCESSOR_7100;
451 else if (strcmp (arg, "700") == 0)
452 pa_cpu = PROCESSOR_700;
453 else if (strcmp (arg, "7100LC") == 0)
454 pa_cpu = PROCESSOR_7100LC;
455 else if (strcmp (arg, "7200") == 0)
456 pa_cpu = PROCESSOR_7200;
457 else if (strcmp (arg, "7300") == 0)
458 pa_cpu = PROCESSOR_7300;
459 else
460 return false;
461 return true;
463 case OPT_mfixed_range_:
464 fix_range (arg);
465 return true;
467 #if TARGET_HPUX
468 case OPT_munix_93:
469 flag_pa_unix = 1993;
470 return true;
471 #endif
473 #if TARGET_HPUX_10_10
474 case OPT_munix_95:
475 flag_pa_unix = 1995;
476 return true;
477 #endif
479 #if TARGET_HPUX_11_11
480 case OPT_munix_98:
481 flag_pa_unix = 1998;
482 return true;
483 #endif
485 default:
486 return true;
490 void
491 override_options (void)
493 /* Unconditional branches in the delay slot are not compatible with dwarf2
494 call frame information. There is no benefit in using this optimization
495 on PA8000 and later processors. */
496 if (pa_cpu >= PROCESSOR_8000
497 || (! USING_SJLJ_EXCEPTIONS && flag_exceptions)
498 || flag_unwind_tables)
499 target_flags &= ~MASK_JUMP_IN_DELAY;
501 if (flag_pic && TARGET_PORTABLE_RUNTIME)
503 warning (0, "PIC code generation is not supported in the portable runtime model");
506 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
508 warning (0, "PIC code generation is not compatible with fast indirect calls");
511 if (! TARGET_GAS && write_symbols != NO_DEBUG)
513 warning (0, "-g is only supported when using GAS on this processor,");
514 warning (0, "-g option disabled");
515 write_symbols = NO_DEBUG;
518 /* We only support the "big PIC" model now. And we always generate PIC
519 code when in 64bit mode. */
520 if (flag_pic == 1 || TARGET_64BIT)
521 flag_pic = 2;
523 /* We can't guarantee that .dword is available for 32-bit targets. */
524 if (UNITS_PER_WORD == 4)
525 targetm.asm_out.aligned_op.di = NULL;
527 /* The unaligned ops are only available when using GAS. */
528 if (!TARGET_GAS)
530 targetm.asm_out.unaligned_op.hi = NULL;
531 targetm.asm_out.unaligned_op.si = NULL;
532 targetm.asm_out.unaligned_op.di = NULL;
535 init_machine_status = pa_init_machine_status;
538 static void
539 pa_init_builtins (void)
541 #ifdef DONT_HAVE_FPUTC_UNLOCKED
542 built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED] =
543 built_in_decls[(int) BUILT_IN_PUTC_UNLOCKED];
544 implicit_built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED]
545 = implicit_built_in_decls[(int) BUILT_IN_PUTC_UNLOCKED];
546 #endif
547 #if TARGET_HPUX_11
548 if (built_in_decls [BUILT_IN_FINITE])
549 set_user_assembler_name (built_in_decls [BUILT_IN_FINITE], "_Isfinite");
550 if (built_in_decls [BUILT_IN_FINITEF])
551 set_user_assembler_name (built_in_decls [BUILT_IN_FINITEF], "_Isfinitef");
552 #endif
555 /* Function to init struct machine_function.
556 This will be called, via a pointer variable,
557 from push_function_context. */
559 static struct machine_function *
560 pa_init_machine_status (void)
562 return GGC_CNEW (machine_function);
565 /* If FROM is a probable pointer register, mark TO as a probable
566 pointer register with the same pointer alignment as FROM. */
568 static void
569 copy_reg_pointer (rtx to, rtx from)
571 if (REG_POINTER (from))
572 mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
575 /* Return 1 if X contains a symbolic expression. We know these
576 expressions will have one of a few well defined forms, so
577 we need only check those forms. */
579 symbolic_expression_p (rtx x)
582 /* Strip off any HIGH. */
583 if (GET_CODE (x) == HIGH)
584 x = XEXP (x, 0);
586 return (symbolic_operand (x, VOIDmode));
589 /* Accept any constant that can be moved in one instruction into a
590 general register. */
592 cint_ok_for_move (HOST_WIDE_INT ival)
594 /* OK if ldo, ldil, or zdepi, can be used. */
595 return (VAL_14_BITS_P (ival)
596 || ldil_cint_p (ival)
597 || zdepi_cint_p (ival));
600 /* Return truth value of whether OP can be used as an operand in a
601 adddi3 insn. */
603 adddi3_operand (rtx op, enum machine_mode mode)
605 return (register_operand (op, mode)
606 || (GET_CODE (op) == CONST_INT
607 && (TARGET_64BIT ? INT_14_BITS (op) : INT_11_BITS (op))));
610 /* True iff the operand OP can be used as the destination operand of
611 an integer store. This also implies the operand could be used as
612 the source operand of an integer load. Symbolic, lo_sum and indexed
613 memory operands are not allowed. We accept reloading pseudos and
614 other memory operands. */
616 integer_store_memory_operand (rtx op, enum machine_mode mode)
618 return ((reload_in_progress
619 && REG_P (op)
620 && REGNO (op) >= FIRST_PSEUDO_REGISTER
621 && reg_renumber [REGNO (op)] < 0)
622 || (GET_CODE (op) == MEM
623 && (reload_in_progress || memory_address_p (mode, XEXP (op, 0)))
624 && !symbolic_memory_operand (op, VOIDmode)
625 && !IS_LO_SUM_DLT_ADDR_P (XEXP (op, 0))
626 && !IS_INDEX_ADDR_P (XEXP (op, 0))));
629 /* True iff ldil can be used to load this CONST_INT. The least
630 significant 11 bits of the value must be zero and the value must
631 not change sign when extended from 32 to 64 bits. */
633 ldil_cint_p (HOST_WIDE_INT ival)
635 HOST_WIDE_INT x = ival & (((HOST_WIDE_INT) -1 << 31) | 0x7ff);
637 return x == 0 || x == ((HOST_WIDE_INT) -1 << 31);
640 /* True iff zdepi can be used to generate this CONST_INT.
641 zdepi first sign extends a 5-bit signed number to a given field
642 length, then places this field anywhere in a zero. */
644 zdepi_cint_p (unsigned HOST_WIDE_INT x)
646 unsigned HOST_WIDE_INT lsb_mask, t;
648 /* This might not be obvious, but it's at least fast.
649 This function is critical; we don't have the time loops would take. */
650 lsb_mask = x & -x;
651 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
652 /* Return true iff t is a power of two. */
653 return ((t & (t - 1)) == 0);
656 /* True iff depi or extru can be used to compute (reg & mask).
657 Accept bit pattern like these:
658 0....01....1
659 1....10....0
660 1..10..01..1 */
662 and_mask_p (unsigned HOST_WIDE_INT mask)
664 mask = ~mask;
665 mask += mask & -mask;
666 return (mask & (mask - 1)) == 0;
669 /* True iff depi can be used to compute (reg | MASK). */
671 ior_mask_p (unsigned HOST_WIDE_INT mask)
673 mask += mask & -mask;
674 return (mask & (mask - 1)) == 0;
677 /* Legitimize PIC addresses. If the address is already
678 position-independent, we return ORIG. Newly generated
679 position-independent addresses go to REG. If we need more
680 than one register, we lose. */
683 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
685 rtx pic_ref = orig;
687 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
689 /* Labels need special handling. */
690 if (pic_label_operand (orig, mode))
692 rtx insn;
694 /* We do not want to go through the movXX expanders here since that
695 would create recursion.
697 Nor do we really want to call a generator for a named pattern
698 since that requires multiple patterns if we want to support
699 multiple word sizes.
701 So instead we just emit the raw set, which avoids the movXX
702 expanders completely. */
703 mark_reg_pointer (reg, BITS_PER_UNIT);
704 insn = emit_insn (gen_rtx_SET (VOIDmode, reg, orig));
706 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
707 add_reg_note (insn, REG_EQUAL, orig);
709 /* During and after reload, we need to generate a REG_LABEL_OPERAND note
710 and update LABEL_NUSES because this is not done automatically. */
711 if (reload_in_progress || reload_completed)
713 /* Extract LABEL_REF. */
714 if (GET_CODE (orig) == CONST)
715 orig = XEXP (XEXP (orig, 0), 0);
716 /* Extract CODE_LABEL. */
717 orig = XEXP (orig, 0);
718 add_reg_note (insn, REG_LABEL_OPERAND, orig);
719 LABEL_NUSES (orig)++;
721 crtl->uses_pic_offset_table = 1;
722 return reg;
724 if (GET_CODE (orig) == SYMBOL_REF)
726 rtx insn, tmp_reg;
728 gcc_assert (reg);
730 /* Before reload, allocate a temporary register for the intermediate
731 result. This allows the sequence to be deleted when the final
732 result is unused and the insns are trivially dead. */
733 tmp_reg = ((reload_in_progress || reload_completed)
734 ? reg : gen_reg_rtx (Pmode));
736 if (function_label_operand (orig, mode))
738 /* Force function label into memory in word mode. */
739 orig = XEXP (force_const_mem (word_mode, orig), 0);
740 /* Load plabel address from DLT. */
741 emit_move_insn (tmp_reg,
742 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
743 gen_rtx_HIGH (word_mode, orig)));
744 pic_ref
745 = gen_const_mem (Pmode,
746 gen_rtx_LO_SUM (Pmode, tmp_reg,
747 gen_rtx_UNSPEC (Pmode,
748 gen_rtvec (1, orig),
749 UNSPEC_DLTIND14R)));
750 emit_move_insn (reg, pic_ref);
751 /* Now load address of function descriptor. */
752 pic_ref = gen_rtx_MEM (Pmode, reg);
754 else
756 /* Load symbol reference from DLT. */
757 emit_move_insn (tmp_reg,
758 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
759 gen_rtx_HIGH (word_mode, orig)));
760 pic_ref
761 = gen_const_mem (Pmode,
762 gen_rtx_LO_SUM (Pmode, tmp_reg,
763 gen_rtx_UNSPEC (Pmode,
764 gen_rtvec (1, orig),
765 UNSPEC_DLTIND14R)));
768 crtl->uses_pic_offset_table = 1;
769 mark_reg_pointer (reg, BITS_PER_UNIT);
770 insn = emit_move_insn (reg, pic_ref);
772 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
773 set_unique_reg_note (insn, REG_EQUAL, orig);
775 return reg;
777 else if (GET_CODE (orig) == CONST)
779 rtx base;
781 if (GET_CODE (XEXP (orig, 0)) == PLUS
782 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
783 return orig;
785 gcc_assert (reg);
786 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
788 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
789 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
790 base == reg ? 0 : reg);
792 if (GET_CODE (orig) == CONST_INT)
794 if (INT_14_BITS (orig))
795 return plus_constant (base, INTVAL (orig));
796 orig = force_reg (Pmode, orig);
798 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
799 /* Likewise, should we set special REG_NOTEs here? */
802 return pic_ref;
805 static GTY(()) rtx gen_tls_tga;
807 static rtx
808 gen_tls_get_addr (void)
810 if (!gen_tls_tga)
811 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
812 return gen_tls_tga;
815 static rtx
816 hppa_tls_call (rtx arg)
818 rtx ret;
820 ret = gen_reg_rtx (Pmode);
821 emit_library_call_value (gen_tls_get_addr (), ret,
822 LCT_CONST, Pmode, 1, arg, Pmode);
824 return ret;
827 static rtx
828 legitimize_tls_address (rtx addr)
830 rtx ret, insn, tmp, t1, t2, tp;
831 enum tls_model model = SYMBOL_REF_TLS_MODEL (addr);
833 switch (model)
835 case TLS_MODEL_GLOBAL_DYNAMIC:
836 tmp = gen_reg_rtx (Pmode);
837 if (flag_pic)
838 emit_insn (gen_tgd_load_pic (tmp, addr));
839 else
840 emit_insn (gen_tgd_load (tmp, addr));
841 ret = hppa_tls_call (tmp);
842 break;
844 case TLS_MODEL_LOCAL_DYNAMIC:
845 ret = gen_reg_rtx (Pmode);
846 tmp = gen_reg_rtx (Pmode);
847 start_sequence ();
848 if (flag_pic)
849 emit_insn (gen_tld_load_pic (tmp, addr));
850 else
851 emit_insn (gen_tld_load (tmp, addr));
852 t1 = hppa_tls_call (tmp);
853 insn = get_insns ();
854 end_sequence ();
855 t2 = gen_reg_rtx (Pmode);
856 emit_libcall_block (insn, t2, t1,
857 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
858 UNSPEC_TLSLDBASE));
859 emit_insn (gen_tld_offset_load (ret, addr, t2));
860 break;
862 case TLS_MODEL_INITIAL_EXEC:
863 tp = gen_reg_rtx (Pmode);
864 tmp = gen_reg_rtx (Pmode);
865 ret = gen_reg_rtx (Pmode);
866 emit_insn (gen_tp_load (tp));
867 if (flag_pic)
868 emit_insn (gen_tie_load_pic (tmp, addr));
869 else
870 emit_insn (gen_tie_load (tmp, addr));
871 emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
872 break;
874 case TLS_MODEL_LOCAL_EXEC:
875 tp = gen_reg_rtx (Pmode);
876 ret = gen_reg_rtx (Pmode);
877 emit_insn (gen_tp_load (tp));
878 emit_insn (gen_tle_load (ret, addr, tp));
879 break;
881 default:
882 gcc_unreachable ();
885 return ret;
888 /* Try machine-dependent ways of modifying an illegitimate address
889 to be legitimate. If we find one, return the new, valid address.
890 This macro is used in only one place: `memory_address' in explow.c.
892 OLDX is the address as it was before break_out_memory_refs was called.
893 In some cases it is useful to look at this to decide what needs to be done.
895 It is always safe for this macro to do nothing. It exists to recognize
896 opportunities to optimize the output.
898 For the PA, transform:
900 memory(X + <large int>)
902 into:
904 if (<large int> & mask) >= 16
905 Y = (<large int> & ~mask) + mask + 1 Round up.
906 else
907 Y = (<large int> & ~mask) Round down.
908 Z = X + Y
909 memory (Z + (<large int> - Y));
911 This is for CSE to find several similar references, and only use one Z.
913 X can either be a SYMBOL_REF or REG, but because combine cannot
914 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
915 D will not fit in 14 bits.
917 MODE_FLOAT references allow displacements which fit in 5 bits, so use
918 0x1f as the mask.
920 MODE_INT references allow displacements which fit in 14 bits, so use
921 0x3fff as the mask.
923 This relies on the fact that most mode MODE_FLOAT references will use FP
924 registers and most mode MODE_INT references will use integer registers.
925 (In the rare case of an FP register used in an integer MODE, we depend
926 on secondary reloads to clean things up.)
929 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
930 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
931 addressing modes to be used).
933 Put X and Z into registers. Then put the entire expression into
934 a register. */
937 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
938 enum machine_mode mode)
940 rtx orig = x;
942 /* We need to canonicalize the order of operands in unscaled indexed
943 addresses since the code that checks if an address is valid doesn't
944 always try both orders. */
945 if (!TARGET_NO_SPACE_REGS
946 && GET_CODE (x) == PLUS
947 && GET_MODE (x) == Pmode
948 && REG_P (XEXP (x, 0))
949 && REG_P (XEXP (x, 1))
950 && REG_POINTER (XEXP (x, 0))
951 && !REG_POINTER (XEXP (x, 1)))
952 return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
954 if (PA_SYMBOL_REF_TLS_P (x))
955 return legitimize_tls_address (x);
956 else if (flag_pic)
957 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
959 /* Strip off CONST. */
960 if (GET_CODE (x) == CONST)
961 x = XEXP (x, 0);
963 /* Special case. Get the SYMBOL_REF into a register and use indexing.
964 That should always be safe. */
965 if (GET_CODE (x) == PLUS
966 && GET_CODE (XEXP (x, 0)) == REG
967 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
969 rtx reg = force_reg (Pmode, XEXP (x, 1));
970 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
973 /* Note we must reject symbols which represent function addresses
974 since the assembler/linker can't handle arithmetic on plabels. */
975 if (GET_CODE (x) == PLUS
976 && GET_CODE (XEXP (x, 1)) == CONST_INT
977 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
978 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
979 || GET_CODE (XEXP (x, 0)) == REG))
981 rtx int_part, ptr_reg;
982 int newoffset;
983 int offset = INTVAL (XEXP (x, 1));
984 int mask;
986 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
987 ? (INT14_OK_STRICT ? 0x3fff : 0x1f) : 0x3fff);
989 /* Choose which way to round the offset. Round up if we
990 are >= halfway to the next boundary. */
991 if ((offset & mask) >= ((mask + 1) / 2))
992 newoffset = (offset & ~ mask) + mask + 1;
993 else
994 newoffset = (offset & ~ mask);
996 /* If the newoffset will not fit in 14 bits (ldo), then
997 handling this would take 4 or 5 instructions (2 to load
998 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
999 add the new offset and the SYMBOL_REF.) Combine can
1000 not handle 4->2 or 5->2 combinations, so do not create
1001 them. */
1002 if (! VAL_14_BITS_P (newoffset)
1003 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1005 rtx const_part = plus_constant (XEXP (x, 0), newoffset);
1006 rtx tmp_reg
1007 = force_reg (Pmode,
1008 gen_rtx_HIGH (Pmode, const_part));
1009 ptr_reg
1010 = force_reg (Pmode,
1011 gen_rtx_LO_SUM (Pmode,
1012 tmp_reg, const_part));
1014 else
1016 if (! VAL_14_BITS_P (newoffset))
1017 int_part = force_reg (Pmode, GEN_INT (newoffset));
1018 else
1019 int_part = GEN_INT (newoffset);
1021 ptr_reg = force_reg (Pmode,
1022 gen_rtx_PLUS (Pmode,
1023 force_reg (Pmode, XEXP (x, 0)),
1024 int_part));
1026 return plus_constant (ptr_reg, offset - newoffset);
1029 /* Handle (plus (mult (a) (shadd_constant)) (b)). */
1031 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
1032 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1033 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
1034 && (OBJECT_P (XEXP (x, 1))
1035 || GET_CODE (XEXP (x, 1)) == SUBREG)
1036 && GET_CODE (XEXP (x, 1)) != CONST)
1038 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1039 rtx reg1, reg2;
1041 reg1 = XEXP (x, 1);
1042 if (GET_CODE (reg1) != REG)
1043 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1045 reg2 = XEXP (XEXP (x, 0), 0);
1046 if (GET_CODE (reg2) != REG)
1047 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1049 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1050 gen_rtx_MULT (Pmode,
1051 reg2,
1052 GEN_INT (val)),
1053 reg1));
1056 /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
1058 Only do so for floating point modes since this is more speculative
1059 and we lose if it's an integer store. */
1060 if (GET_CODE (x) == PLUS
1061 && GET_CODE (XEXP (x, 0)) == PLUS
1062 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
1063 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
1064 && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
1065 && (mode == SFmode || mode == DFmode))
1068 /* First, try and figure out what to use as a base register. */
1069 rtx reg1, reg2, base, idx, orig_base;
1071 reg1 = XEXP (XEXP (x, 0), 1);
1072 reg2 = XEXP (x, 1);
1073 base = NULL_RTX;
1074 idx = NULL_RTX;
1076 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1077 then emit_move_sequence will turn on REG_POINTER so we'll know
1078 it's a base register below. */
1079 if (GET_CODE (reg1) != REG)
1080 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1082 if (GET_CODE (reg2) != REG)
1083 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1085 /* Figure out what the base and index are. */
1087 if (GET_CODE (reg1) == REG
1088 && REG_POINTER (reg1))
1090 base = reg1;
1091 orig_base = XEXP (XEXP (x, 0), 1);
1092 idx = gen_rtx_PLUS (Pmode,
1093 gen_rtx_MULT (Pmode,
1094 XEXP (XEXP (XEXP (x, 0), 0), 0),
1095 XEXP (XEXP (XEXP (x, 0), 0), 1)),
1096 XEXP (x, 1));
1098 else if (GET_CODE (reg2) == REG
1099 && REG_POINTER (reg2))
1101 base = reg2;
1102 orig_base = XEXP (x, 1);
1103 idx = XEXP (x, 0);
1106 if (base == 0)
1107 return orig;
1109 /* If the index adds a large constant, try to scale the
1110 constant so that it can be loaded with only one insn. */
1111 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1112 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1113 / INTVAL (XEXP (XEXP (idx, 0), 1)))
1114 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1116 /* Divide the CONST_INT by the scale factor, then add it to A. */
1117 int val = INTVAL (XEXP (idx, 1));
1119 val /= INTVAL (XEXP (XEXP (idx, 0), 1));
1120 reg1 = XEXP (XEXP (idx, 0), 0);
1121 if (GET_CODE (reg1) != REG)
1122 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1124 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1126 /* We can now generate a simple scaled indexed address. */
1127 return
1128 force_reg
1129 (Pmode, gen_rtx_PLUS (Pmode,
1130 gen_rtx_MULT (Pmode, reg1,
1131 XEXP (XEXP (idx, 0), 1)),
1132 base));
1135 /* If B + C is still a valid base register, then add them. */
1136 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1137 && INTVAL (XEXP (idx, 1)) <= 4096
1138 && INTVAL (XEXP (idx, 1)) >= -4096)
1140 int val = INTVAL (XEXP (XEXP (idx, 0), 1));
1141 rtx reg1, reg2;
1143 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1145 reg2 = XEXP (XEXP (idx, 0), 0);
1146 if (GET_CODE (reg2) != CONST_INT)
1147 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1149 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1150 gen_rtx_MULT (Pmode,
1151 reg2,
1152 GEN_INT (val)),
1153 reg1));
1156 /* Get the index into a register, then add the base + index and
1157 return a register holding the result. */
1159 /* First get A into a register. */
1160 reg1 = XEXP (XEXP (idx, 0), 0);
1161 if (GET_CODE (reg1) != REG)
1162 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1164 /* And get B into a register. */
1165 reg2 = XEXP (idx, 1);
1166 if (GET_CODE (reg2) != REG)
1167 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1169 reg1 = force_reg (Pmode,
1170 gen_rtx_PLUS (Pmode,
1171 gen_rtx_MULT (Pmode, reg1,
1172 XEXP (XEXP (idx, 0), 1)),
1173 reg2));
1175 /* Add the result to our base register and return. */
1176 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1180 /* Uh-oh. We might have an address for x[n-100000]. This needs
1181 special handling to avoid creating an indexed memory address
1182 with x-100000 as the base.
1184 If the constant part is small enough, then it's still safe because
1185 there is a guard page at the beginning and end of the data segment.
1187 Scaled references are common enough that we want to try and rearrange the
1188 terms so that we can use indexing for these addresses too. Only
1189 do the optimization for floatint point modes. */
1191 if (GET_CODE (x) == PLUS
1192 && symbolic_expression_p (XEXP (x, 1)))
1194 /* Ugly. We modify things here so that the address offset specified
1195 by the index expression is computed first, then added to x to form
1196 the entire address. */
1198 rtx regx1, regx2, regy1, regy2, y;
1200 /* Strip off any CONST. */
1201 y = XEXP (x, 1);
1202 if (GET_CODE (y) == CONST)
1203 y = XEXP (y, 0);
1205 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1207 /* See if this looks like
1208 (plus (mult (reg) (shadd_const))
1209 (const (plus (symbol_ref) (const_int))))
1211 Where const_int is small. In that case the const
1212 expression is a valid pointer for indexing.
1214 If const_int is big, but can be divided evenly by shadd_const
1215 and added to (reg). This allows more scaled indexed addresses. */
1216 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1217 && GET_CODE (XEXP (x, 0)) == MULT
1218 && GET_CODE (XEXP (y, 1)) == CONST_INT
1219 && INTVAL (XEXP (y, 1)) >= -4096
1220 && INTVAL (XEXP (y, 1)) <= 4095
1221 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1222 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1224 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1225 rtx reg1, reg2;
1227 reg1 = XEXP (x, 1);
1228 if (GET_CODE (reg1) != REG)
1229 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1231 reg2 = XEXP (XEXP (x, 0), 0);
1232 if (GET_CODE (reg2) != REG)
1233 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1235 return force_reg (Pmode,
1236 gen_rtx_PLUS (Pmode,
1237 gen_rtx_MULT (Pmode,
1238 reg2,
1239 GEN_INT (val)),
1240 reg1));
1242 else if ((mode == DFmode || mode == SFmode)
1243 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1244 && GET_CODE (XEXP (x, 0)) == MULT
1245 && GET_CODE (XEXP (y, 1)) == CONST_INT
1246 && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
1247 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1248 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1250 regx1
1251 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1252 / INTVAL (XEXP (XEXP (x, 0), 1))));
1253 regx2 = XEXP (XEXP (x, 0), 0);
1254 if (GET_CODE (regx2) != REG)
1255 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1256 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1257 regx2, regx1));
1258 return
1259 force_reg (Pmode,
1260 gen_rtx_PLUS (Pmode,
1261 gen_rtx_MULT (Pmode, regx2,
1262 XEXP (XEXP (x, 0), 1)),
1263 force_reg (Pmode, XEXP (y, 0))));
1265 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1266 && INTVAL (XEXP (y, 1)) >= -4096
1267 && INTVAL (XEXP (y, 1)) <= 4095)
1269 /* This is safe because of the guard page at the
1270 beginning and end of the data space. Just
1271 return the original address. */
1272 return orig;
1274 else
1276 /* Doesn't look like one we can optimize. */
1277 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1278 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1279 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1280 regx1 = force_reg (Pmode,
1281 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1282 regx1, regy2));
1283 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1288 return orig;
1291 /* For the HPPA, REG and REG+CONST is cost 0
1292 and addresses involving symbolic constants are cost 2.
1294 PIC addresses are very expensive.
1296 It is no coincidence that this has the same structure
1297 as GO_IF_LEGITIMATE_ADDRESS. */
1299 static int
1300 hppa_address_cost (rtx X,
1301 bool speed ATTRIBUTE_UNUSED)
1303 switch (GET_CODE (X))
1305 case REG:
1306 case PLUS:
1307 case LO_SUM:
1308 return 1;
1309 case HIGH:
1310 return 2;
1311 default:
1312 return 4;
1316 /* Compute a (partial) cost for rtx X. Return true if the complete
1317 cost has been computed, and false if subexpressions should be
1318 scanned. In either case, *TOTAL contains the cost result. */
1320 static bool
1321 hppa_rtx_costs (rtx x, int code, int outer_code, int *total,
1322 bool speed ATTRIBUTE_UNUSED)
1324 switch (code)
1326 case CONST_INT:
1327 if (INTVAL (x) == 0)
1328 *total = 0;
1329 else if (INT_14_BITS (x))
1330 *total = 1;
1331 else
1332 *total = 2;
1333 return true;
1335 case HIGH:
1336 *total = 2;
1337 return true;
1339 case CONST:
1340 case LABEL_REF:
1341 case SYMBOL_REF:
1342 *total = 4;
1343 return true;
1345 case CONST_DOUBLE:
1346 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1347 && outer_code != SET)
1348 *total = 0;
1349 else
1350 *total = 8;
1351 return true;
1353 case MULT:
1354 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1355 *total = COSTS_N_INSNS (3);
1356 else if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1357 *total = COSTS_N_INSNS (8);
1358 else
1359 *total = COSTS_N_INSNS (20);
1360 return true;
1362 case DIV:
1363 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1365 *total = COSTS_N_INSNS (14);
1366 return true;
1368 /* FALLTHRU */
1370 case UDIV:
1371 case MOD:
1372 case UMOD:
1373 *total = COSTS_N_INSNS (60);
1374 return true;
1376 case PLUS: /* this includes shNadd insns */
1377 case MINUS:
1378 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1379 *total = COSTS_N_INSNS (3);
1380 else
1381 *total = COSTS_N_INSNS (1);
1382 return true;
1384 case ASHIFT:
1385 case ASHIFTRT:
1386 case LSHIFTRT:
1387 *total = COSTS_N_INSNS (1);
1388 return true;
1390 default:
1391 return false;
1395 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1396 new rtx with the correct mode. */
1397 static inline rtx
1398 force_mode (enum machine_mode mode, rtx orig)
1400 if (mode == GET_MODE (orig))
1401 return orig;
1403 gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
1405 return gen_rtx_REG (mode, REGNO (orig));
1408 /* Return 1 if *X is a thread-local symbol. */
1410 static int
1411 pa_tls_symbol_ref_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
1413 return PA_SYMBOL_REF_TLS_P (*x);
1416 /* Return 1 if X contains a thread-local symbol. */
1418 bool
1419 pa_tls_referenced_p (rtx x)
1421 if (!TARGET_HAVE_TLS)
1422 return false;
1424 return for_each_rtx (&x, &pa_tls_symbol_ref_1, 0);
1427 /* Emit insns to move operands[1] into operands[0].
1429 Return 1 if we have written out everything that needs to be done to
1430 do the move. Otherwise, return 0 and the caller will emit the move
1431 normally.
1433 Note SCRATCH_REG may not be in the proper mode depending on how it
1434 will be used. This routine is responsible for creating a new copy
1435 of SCRATCH_REG in the proper mode. */
1438 emit_move_sequence (rtx *operands, enum machine_mode mode, rtx scratch_reg)
1440 register rtx operand0 = operands[0];
1441 register rtx operand1 = operands[1];
1442 register rtx tem;
1444 /* We can only handle indexed addresses in the destination operand
1445 of floating point stores. Thus, we need to break out indexed
1446 addresses from the destination operand. */
1447 if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1449 gcc_assert (can_create_pseudo_p ());
1451 tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1452 operand0 = replace_equiv_address (operand0, tem);
1455 /* On targets with non-equivalent space registers, break out unscaled
1456 indexed addresses from the source operand before the final CSE.
1457 We have to do this because the REG_POINTER flag is not correctly
1458 carried through various optimization passes and CSE may substitute
1459 a pseudo without the pointer set for one with the pointer set. As
1460 a result, we loose various opportunities to create insns with
1461 unscaled indexed addresses. */
1462 if (!TARGET_NO_SPACE_REGS
1463 && !cse_not_expected
1464 && GET_CODE (operand1) == MEM
1465 && GET_CODE (XEXP (operand1, 0)) == PLUS
1466 && REG_P (XEXP (XEXP (operand1, 0), 0))
1467 && REG_P (XEXP (XEXP (operand1, 0), 1)))
1468 operand1
1469 = replace_equiv_address (operand1,
1470 copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1472 if (scratch_reg
1473 && reload_in_progress && GET_CODE (operand0) == REG
1474 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1475 operand0 = reg_equiv_mem[REGNO (operand0)];
1476 else if (scratch_reg
1477 && reload_in_progress && GET_CODE (operand0) == SUBREG
1478 && GET_CODE (SUBREG_REG (operand0)) == REG
1479 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1481 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1482 the code which tracks sets/uses for delete_output_reload. */
1483 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1484 reg_equiv_mem [REGNO (SUBREG_REG (operand0))],
1485 SUBREG_BYTE (operand0));
1486 operand0 = alter_subreg (&temp);
1489 if (scratch_reg
1490 && reload_in_progress && GET_CODE (operand1) == REG
1491 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1492 operand1 = reg_equiv_mem[REGNO (operand1)];
1493 else if (scratch_reg
1494 && reload_in_progress && GET_CODE (operand1) == SUBREG
1495 && GET_CODE (SUBREG_REG (operand1)) == REG
1496 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1498 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1499 the code which tracks sets/uses for delete_output_reload. */
1500 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1501 reg_equiv_mem [REGNO (SUBREG_REG (operand1))],
1502 SUBREG_BYTE (operand1));
1503 operand1 = alter_subreg (&temp);
1506 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1507 && ((tem = find_replacement (&XEXP (operand0, 0)))
1508 != XEXP (operand0, 0)))
1509 operand0 = replace_equiv_address (operand0, tem);
1511 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1512 && ((tem = find_replacement (&XEXP (operand1, 0)))
1513 != XEXP (operand1, 0)))
1514 operand1 = replace_equiv_address (operand1, tem);
1516 /* Handle secondary reloads for loads/stores of FP registers from
1517 REG+D addresses where D does not fit in 5 or 14 bits, including
1518 (subreg (mem (addr))) cases. */
1519 if (scratch_reg
1520 && fp_reg_operand (operand0, mode)
1521 && ((GET_CODE (operand1) == MEM
1522 && !memory_address_p ((GET_MODE_SIZE (mode) == 4 ? SFmode : DFmode),
1523 XEXP (operand1, 0)))
1524 || ((GET_CODE (operand1) == SUBREG
1525 && GET_CODE (XEXP (operand1, 0)) == MEM
1526 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1527 ? SFmode : DFmode),
1528 XEXP (XEXP (operand1, 0), 0))))))
1530 if (GET_CODE (operand1) == SUBREG)
1531 operand1 = XEXP (operand1, 0);
1533 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1534 it in WORD_MODE regardless of what mode it was originally given
1535 to us. */
1536 scratch_reg = force_mode (word_mode, scratch_reg);
1538 /* D might not fit in 14 bits either; for such cases load D into
1539 scratch reg. */
1540 if (!memory_address_p (Pmode, XEXP (operand1, 0)))
1542 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1543 emit_move_insn (scratch_reg,
1544 gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
1545 Pmode,
1546 XEXP (XEXP (operand1, 0), 0),
1547 scratch_reg));
1549 else
1550 emit_move_insn (scratch_reg, XEXP (operand1, 0));
1551 emit_insn (gen_rtx_SET (VOIDmode, operand0,
1552 replace_equiv_address (operand1, scratch_reg)));
1553 return 1;
1555 else if (scratch_reg
1556 && fp_reg_operand (operand1, mode)
1557 && ((GET_CODE (operand0) == MEM
1558 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1559 ? SFmode : DFmode),
1560 XEXP (operand0, 0)))
1561 || ((GET_CODE (operand0) == SUBREG)
1562 && GET_CODE (XEXP (operand0, 0)) == MEM
1563 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1564 ? SFmode : DFmode),
1565 XEXP (XEXP (operand0, 0), 0)))))
1567 if (GET_CODE (operand0) == SUBREG)
1568 operand0 = XEXP (operand0, 0);
1570 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1571 it in WORD_MODE regardless of what mode it was originally given
1572 to us. */
1573 scratch_reg = force_mode (word_mode, scratch_reg);
1575 /* D might not fit in 14 bits either; for such cases load D into
1576 scratch reg. */
1577 if (!memory_address_p (Pmode, XEXP (operand0, 0)))
1579 emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
1580 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0,
1581 0)),
1582 Pmode,
1583 XEXP (XEXP (operand0, 0),
1585 scratch_reg));
1587 else
1588 emit_move_insn (scratch_reg, XEXP (operand0, 0));
1589 emit_insn (gen_rtx_SET (VOIDmode,
1590 replace_equiv_address (operand0, scratch_reg),
1591 operand1));
1592 return 1;
1594 /* Handle secondary reloads for loads of FP registers from constant
1595 expressions by forcing the constant into memory.
1597 Use scratch_reg to hold the address of the memory location.
1599 The proper fix is to change PREFERRED_RELOAD_CLASS to return
1600 NO_REGS when presented with a const_int and a register class
1601 containing only FP registers. Doing so unfortunately creates
1602 more problems than it solves. Fix this for 2.5. */
1603 else if (scratch_reg
1604 && CONSTANT_P (operand1)
1605 && fp_reg_operand (operand0, mode))
1607 rtx const_mem, xoperands[2];
1609 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1610 it in WORD_MODE regardless of what mode it was originally given
1611 to us. */
1612 scratch_reg = force_mode (word_mode, scratch_reg);
1614 /* Force the constant into memory and put the address of the
1615 memory location into scratch_reg. */
1616 const_mem = force_const_mem (mode, operand1);
1617 xoperands[0] = scratch_reg;
1618 xoperands[1] = XEXP (const_mem, 0);
1619 emit_move_sequence (xoperands, Pmode, 0);
1621 /* Now load the destination register. */
1622 emit_insn (gen_rtx_SET (mode, operand0,
1623 replace_equiv_address (const_mem, scratch_reg)));
1624 return 1;
1626 /* Handle secondary reloads for SAR. These occur when trying to load
1627 the SAR from memory, FP register, or with a constant. */
1628 else if (scratch_reg
1629 && GET_CODE (operand0) == REG
1630 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1631 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1632 && (GET_CODE (operand1) == MEM
1633 || GET_CODE (operand1) == CONST_INT
1634 || (GET_CODE (operand1) == REG
1635 && FP_REG_CLASS_P (REGNO_REG_CLASS (REGNO (operand1))))))
1637 /* D might not fit in 14 bits either; for such cases load D into
1638 scratch reg. */
1639 if (GET_CODE (operand1) == MEM
1640 && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0)))
1642 /* We are reloading the address into the scratch register, so we
1643 want to make sure the scratch register is a full register. */
1644 scratch_reg = force_mode (word_mode, scratch_reg);
1646 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1647 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1648 0)),
1649 Pmode,
1650 XEXP (XEXP (operand1, 0),
1652 scratch_reg));
1654 /* Now we are going to load the scratch register from memory,
1655 we want to load it in the same width as the original MEM,
1656 which must be the same as the width of the ultimate destination,
1657 OPERAND0. */
1658 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1660 emit_move_insn (scratch_reg,
1661 replace_equiv_address (operand1, scratch_reg));
1663 else
1665 /* We want to load the scratch register using the same mode as
1666 the ultimate destination. */
1667 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1669 emit_move_insn (scratch_reg, operand1);
1672 /* And emit the insn to set the ultimate destination. We know that
1673 the scratch register has the same mode as the destination at this
1674 point. */
1675 emit_move_insn (operand0, scratch_reg);
1676 return 1;
1678 /* Handle the most common case: storing into a register. */
1679 else if (register_operand (operand0, mode))
1681 if (register_operand (operand1, mode)
1682 || (GET_CODE (operand1) == CONST_INT
1683 && cint_ok_for_move (INTVAL (operand1)))
1684 || (operand1 == CONST0_RTX (mode))
1685 || (GET_CODE (operand1) == HIGH
1686 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1687 /* Only `general_operands' can come here, so MEM is ok. */
1688 || GET_CODE (operand1) == MEM)
1690 /* Various sets are created during RTL generation which don't
1691 have the REG_POINTER flag correctly set. After the CSE pass,
1692 instruction recognition can fail if we don't consistently
1693 set this flag when performing register copies. This should
1694 also improve the opportunities for creating insns that use
1695 unscaled indexing. */
1696 if (REG_P (operand0) && REG_P (operand1))
1698 if (REG_POINTER (operand1)
1699 && !REG_POINTER (operand0)
1700 && !HARD_REGISTER_P (operand0))
1701 copy_reg_pointer (operand0, operand1);
1702 else if (REG_POINTER (operand0)
1703 && !REG_POINTER (operand1)
1704 && !HARD_REGISTER_P (operand1))
1705 copy_reg_pointer (operand1, operand0);
1708 /* When MEMs are broken out, the REG_POINTER flag doesn't
1709 get set. In some cases, we can set the REG_POINTER flag
1710 from the declaration for the MEM. */
1711 if (REG_P (operand0)
1712 && GET_CODE (operand1) == MEM
1713 && !REG_POINTER (operand0))
1715 tree decl = MEM_EXPR (operand1);
1717 /* Set the register pointer flag and register alignment
1718 if the declaration for this memory reference is a
1719 pointer type. Fortran indirect argument references
1720 are ignored. */
1721 if (decl
1722 && !(flag_argument_noalias > 1
1723 && TREE_CODE (decl) == INDIRECT_REF
1724 && TREE_CODE (TREE_OPERAND (decl, 0)) == PARM_DECL))
1726 tree type;
1728 /* If this is a COMPONENT_REF, use the FIELD_DECL from
1729 tree operand 1. */
1730 if (TREE_CODE (decl) == COMPONENT_REF)
1731 decl = TREE_OPERAND (decl, 1);
1733 type = TREE_TYPE (decl);
1734 type = strip_array_types (type);
1736 if (POINTER_TYPE_P (type))
1738 int align;
1740 type = TREE_TYPE (type);
1741 /* Using TYPE_ALIGN_OK is rather conservative as
1742 only the ada frontend actually sets it. */
1743 align = (TYPE_ALIGN_OK (type) ? TYPE_ALIGN (type)
1744 : BITS_PER_UNIT);
1745 mark_reg_pointer (operand0, align);
1750 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1751 return 1;
1754 else if (GET_CODE (operand0) == MEM)
1756 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1757 && !(reload_in_progress || reload_completed))
1759 rtx temp = gen_reg_rtx (DFmode);
1761 emit_insn (gen_rtx_SET (VOIDmode, temp, operand1));
1762 emit_insn (gen_rtx_SET (VOIDmode, operand0, temp));
1763 return 1;
1765 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1767 /* Run this case quickly. */
1768 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1769 return 1;
1771 if (! (reload_in_progress || reload_completed))
1773 operands[0] = validize_mem (operand0);
1774 operands[1] = operand1 = force_reg (mode, operand1);
1778 /* Simplify the source if we need to.
1779 Note we do have to handle function labels here, even though we do
1780 not consider them legitimate constants. Loop optimizations can
1781 call the emit_move_xxx with one as a source. */
1782 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1783 || function_label_operand (operand1, mode)
1784 || (GET_CODE (operand1) == HIGH
1785 && symbolic_operand (XEXP (operand1, 0), mode)))
1787 int ishighonly = 0;
1789 if (GET_CODE (operand1) == HIGH)
1791 ishighonly = 1;
1792 operand1 = XEXP (operand1, 0);
1794 if (symbolic_operand (operand1, mode))
1796 /* Argh. The assembler and linker can't handle arithmetic
1797 involving plabels.
1799 So we force the plabel into memory, load operand0 from
1800 the memory location, then add in the constant part. */
1801 if ((GET_CODE (operand1) == CONST
1802 && GET_CODE (XEXP (operand1, 0)) == PLUS
1803 && function_label_operand (XEXP (XEXP (operand1, 0), 0), Pmode))
1804 || function_label_operand (operand1, mode))
1806 rtx temp, const_part;
1808 /* Figure out what (if any) scratch register to use. */
1809 if (reload_in_progress || reload_completed)
1811 scratch_reg = scratch_reg ? scratch_reg : operand0;
1812 /* SCRATCH_REG will hold an address and maybe the actual
1813 data. We want it in WORD_MODE regardless of what mode it
1814 was originally given to us. */
1815 scratch_reg = force_mode (word_mode, scratch_reg);
1817 else if (flag_pic)
1818 scratch_reg = gen_reg_rtx (Pmode);
1820 if (GET_CODE (operand1) == CONST)
1822 /* Save away the constant part of the expression. */
1823 const_part = XEXP (XEXP (operand1, 0), 1);
1824 gcc_assert (GET_CODE (const_part) == CONST_INT);
1826 /* Force the function label into memory. */
1827 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
1829 else
1831 /* No constant part. */
1832 const_part = NULL_RTX;
1834 /* Force the function label into memory. */
1835 temp = force_const_mem (mode, operand1);
1839 /* Get the address of the memory location. PIC-ify it if
1840 necessary. */
1841 temp = XEXP (temp, 0);
1842 if (flag_pic)
1843 temp = legitimize_pic_address (temp, mode, scratch_reg);
1845 /* Put the address of the memory location into our destination
1846 register. */
1847 operands[1] = temp;
1848 emit_move_sequence (operands, mode, scratch_reg);
1850 /* Now load from the memory location into our destination
1851 register. */
1852 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
1853 emit_move_sequence (operands, mode, scratch_reg);
1855 /* And add back in the constant part. */
1856 if (const_part != NULL_RTX)
1857 expand_inc (operand0, const_part);
1859 return 1;
1862 if (flag_pic)
1864 rtx temp;
1866 if (reload_in_progress || reload_completed)
1868 temp = scratch_reg ? scratch_reg : operand0;
1869 /* TEMP will hold an address and maybe the actual
1870 data. We want it in WORD_MODE regardless of what mode it
1871 was originally given to us. */
1872 temp = force_mode (word_mode, temp);
1874 else
1875 temp = gen_reg_rtx (Pmode);
1877 /* (const (plus (symbol) (const_int))) must be forced to
1878 memory during/after reload if the const_int will not fit
1879 in 14 bits. */
1880 if (GET_CODE (operand1) == CONST
1881 && GET_CODE (XEXP (operand1, 0)) == PLUS
1882 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
1883 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
1884 && (reload_completed || reload_in_progress)
1885 && flag_pic)
1887 rtx const_mem = force_const_mem (mode, operand1);
1888 operands[1] = legitimize_pic_address (XEXP (const_mem, 0),
1889 mode, temp);
1890 operands[1] = replace_equiv_address (const_mem, operands[1]);
1891 emit_move_sequence (operands, mode, temp);
1893 else
1895 operands[1] = legitimize_pic_address (operand1, mode, temp);
1896 if (REG_P (operand0) && REG_P (operands[1]))
1897 copy_reg_pointer (operand0, operands[1]);
1898 emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1]));
1901 /* On the HPPA, references to data space are supposed to use dp,
1902 register 27, but showing it in the RTL inhibits various cse
1903 and loop optimizations. */
1904 else
1906 rtx temp, set;
1908 if (reload_in_progress || reload_completed)
1910 temp = scratch_reg ? scratch_reg : operand0;
1911 /* TEMP will hold an address and maybe the actual
1912 data. We want it in WORD_MODE regardless of what mode it
1913 was originally given to us. */
1914 temp = force_mode (word_mode, temp);
1916 else
1917 temp = gen_reg_rtx (mode);
1919 /* Loading a SYMBOL_REF into a register makes that register
1920 safe to be used as the base in an indexed address.
1922 Don't mark hard registers though. That loses. */
1923 if (GET_CODE (operand0) == REG
1924 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1925 mark_reg_pointer (operand0, BITS_PER_UNIT);
1926 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
1927 mark_reg_pointer (temp, BITS_PER_UNIT);
1929 if (ishighonly)
1930 set = gen_rtx_SET (mode, operand0, temp);
1931 else
1932 set = gen_rtx_SET (VOIDmode,
1933 operand0,
1934 gen_rtx_LO_SUM (mode, temp, operand1));
1936 emit_insn (gen_rtx_SET (VOIDmode,
1937 temp,
1938 gen_rtx_HIGH (mode, operand1)));
1939 emit_insn (set);
1942 return 1;
1944 else if (pa_tls_referenced_p (operand1))
1946 rtx tmp = operand1;
1947 rtx addend = NULL;
1949 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
1951 addend = XEXP (XEXP (tmp, 0), 1);
1952 tmp = XEXP (XEXP (tmp, 0), 0);
1955 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
1956 tmp = legitimize_tls_address (tmp);
1957 if (addend)
1959 tmp = gen_rtx_PLUS (mode, tmp, addend);
1960 tmp = force_operand (tmp, operands[0]);
1962 operands[1] = tmp;
1964 else if (GET_CODE (operand1) != CONST_INT
1965 || !cint_ok_for_move (INTVAL (operand1)))
1967 rtx insn, temp;
1968 rtx op1 = operand1;
1969 HOST_WIDE_INT value = 0;
1970 HOST_WIDE_INT insv = 0;
1971 int insert = 0;
1973 if (GET_CODE (operand1) == CONST_INT)
1974 value = INTVAL (operand1);
1976 if (TARGET_64BIT
1977 && GET_CODE (operand1) == CONST_INT
1978 && HOST_BITS_PER_WIDE_INT > 32
1979 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
1981 HOST_WIDE_INT nval;
1983 /* Extract the low order 32 bits of the value and sign extend.
1984 If the new value is the same as the original value, we can
1985 can use the original value as-is. If the new value is
1986 different, we use it and insert the most-significant 32-bits
1987 of the original value into the final result. */
1988 nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
1989 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
1990 if (value != nval)
1992 #if HOST_BITS_PER_WIDE_INT > 32
1993 insv = value >= 0 ? value >> 32 : ~(~value >> 32);
1994 #endif
1995 insert = 1;
1996 value = nval;
1997 operand1 = GEN_INT (nval);
2001 if (reload_in_progress || reload_completed)
2002 temp = scratch_reg ? scratch_reg : operand0;
2003 else
2004 temp = gen_reg_rtx (mode);
2006 /* We don't directly split DImode constants on 32-bit targets
2007 because PLUS uses an 11-bit immediate and the insn sequence
2008 generated is not as efficient as the one using HIGH/LO_SUM. */
2009 if (GET_CODE (operand1) == CONST_INT
2010 && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
2011 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
2012 && !insert)
2014 /* Directly break constant into high and low parts. This
2015 provides better optimization opportunities because various
2016 passes recognize constants split with PLUS but not LO_SUM.
2017 We use a 14-bit signed low part except when the addition
2018 of 0x4000 to the high part might change the sign of the
2019 high part. */
2020 HOST_WIDE_INT low = value & 0x3fff;
2021 HOST_WIDE_INT high = value & ~ 0x3fff;
2023 if (low >= 0x2000)
2025 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
2026 high += 0x2000;
2027 else
2028 high += 0x4000;
2031 low = value - high;
2033 emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (high)));
2034 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
2036 else
2038 emit_insn (gen_rtx_SET (VOIDmode, temp,
2039 gen_rtx_HIGH (mode, operand1)));
2040 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
2043 insn = emit_move_insn (operands[0], operands[1]);
2045 /* Now insert the most significant 32 bits of the value
2046 into the register. When we don't have a second register
2047 available, it could take up to nine instructions to load
2048 a 64-bit integer constant. Prior to reload, we force
2049 constants that would take more than three instructions
2050 to load to the constant pool. During and after reload,
2051 we have to handle all possible values. */
2052 if (insert)
2054 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2055 register and the value to be inserted is outside the
2056 range that can be loaded with three depdi instructions. */
2057 if (temp != operand0 && (insv >= 16384 || insv < -16384))
2059 operand1 = GEN_INT (insv);
2061 emit_insn (gen_rtx_SET (VOIDmode, temp,
2062 gen_rtx_HIGH (mode, operand1)));
2063 emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
2064 emit_insn (gen_insv (operand0, GEN_INT (32),
2065 const0_rtx, temp));
2067 else
2069 int len = 5, pos = 27;
2071 /* Insert the bits using the depdi instruction. */
2072 while (pos >= 0)
2074 HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2075 HOST_WIDE_INT sign = v5 < 0;
2077 /* Left extend the insertion. */
2078 insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2079 while (pos > 0 && (insv & 1) == sign)
2081 insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2082 len += 1;
2083 pos -= 1;
2086 emit_insn (gen_insv (operand0, GEN_INT (len),
2087 GEN_INT (pos), GEN_INT (v5)));
2089 len = pos > 0 && pos < 5 ? pos : 5;
2090 pos -= len;
2095 set_unique_reg_note (insn, REG_EQUAL, op1);
2097 return 1;
2100 /* Now have insn-emit do whatever it normally does. */
2101 return 0;
2104 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2105 it will need a link/runtime reloc). */
2108 reloc_needed (tree exp)
2110 int reloc = 0;
2112 switch (TREE_CODE (exp))
2114 case ADDR_EXPR:
2115 return 1;
2117 case POINTER_PLUS_EXPR:
2118 case PLUS_EXPR:
2119 case MINUS_EXPR:
2120 reloc = reloc_needed (TREE_OPERAND (exp, 0));
2121 reloc |= reloc_needed (TREE_OPERAND (exp, 1));
2122 break;
2124 CASE_CONVERT:
2125 case NON_LVALUE_EXPR:
2126 reloc = reloc_needed (TREE_OPERAND (exp, 0));
2127 break;
2129 case CONSTRUCTOR:
2131 tree value;
2132 unsigned HOST_WIDE_INT ix;
2134 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2135 if (value)
2136 reloc |= reloc_needed (value);
2138 break;
2140 case ERROR_MARK:
2141 break;
2143 default:
2144 break;
2146 return reloc;
2149 /* Does operand (which is a symbolic_operand) live in text space?
2150 If so, SYMBOL_REF_FLAG, which is set by pa_encode_section_info,
2151 will be true. */
2154 read_only_operand (rtx operand, enum machine_mode mode ATTRIBUTE_UNUSED)
2156 if (GET_CODE (operand) == CONST)
2157 operand = XEXP (XEXP (operand, 0), 0);
2158 if (flag_pic)
2160 if (GET_CODE (operand) == SYMBOL_REF)
2161 return SYMBOL_REF_FLAG (operand) && !CONSTANT_POOL_ADDRESS_P (operand);
2163 else
2165 if (GET_CODE (operand) == SYMBOL_REF)
2166 return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand);
2168 return 1;
2172 /* Return the best assembler insn template
2173 for moving operands[1] into operands[0] as a fullword. */
2174 const char *
2175 singlemove_string (rtx *operands)
2177 HOST_WIDE_INT intval;
2179 if (GET_CODE (operands[0]) == MEM)
2180 return "stw %r1,%0";
2181 if (GET_CODE (operands[1]) == MEM)
2182 return "ldw %1,%0";
2183 if (GET_CODE (operands[1]) == CONST_DOUBLE)
2185 long i;
2186 REAL_VALUE_TYPE d;
2188 gcc_assert (GET_MODE (operands[1]) == SFmode);
2190 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2191 bit pattern. */
2192 REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
2193 REAL_VALUE_TO_TARGET_SINGLE (d, i);
2195 operands[1] = GEN_INT (i);
2196 /* Fall through to CONST_INT case. */
2198 if (GET_CODE (operands[1]) == CONST_INT)
2200 intval = INTVAL (operands[1]);
2202 if (VAL_14_BITS_P (intval))
2203 return "ldi %1,%0";
2204 else if ((intval & 0x7ff) == 0)
2205 return "ldil L'%1,%0";
2206 else if (zdepi_cint_p (intval))
2207 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2208 else
2209 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2211 return "copy %1,%0";
2215 /* Compute position (in OP[1]) and width (in OP[2])
2216 useful for copying IMM to a register using the zdepi
2217 instructions. Store the immediate value to insert in OP[0]. */
2218 static void
2219 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2221 int lsb, len;
2223 /* Find the least significant set bit in IMM. */
2224 for (lsb = 0; lsb < 32; lsb++)
2226 if ((imm & 1) != 0)
2227 break;
2228 imm >>= 1;
2231 /* Choose variants based on *sign* of the 5-bit field. */
2232 if ((imm & 0x10) == 0)
2233 len = (lsb <= 28) ? 4 : 32 - lsb;
2234 else
2236 /* Find the width of the bitstring in IMM. */
2237 for (len = 5; len < 32 - lsb; len++)
2239 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2240 break;
2243 /* Sign extend IMM as a 5-bit value. */
2244 imm = (imm & 0xf) - 0x10;
2247 op[0] = imm;
2248 op[1] = 31 - lsb;
2249 op[2] = len;
2252 /* Compute position (in OP[1]) and width (in OP[2])
2253 useful for copying IMM to a register using the depdi,z
2254 instructions. Store the immediate value to insert in OP[0]. */
2255 void
2256 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2258 int lsb, len, maxlen;
2260 maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64);
2262 /* Find the least significant set bit in IMM. */
2263 for (lsb = 0; lsb < maxlen; lsb++)
2265 if ((imm & 1) != 0)
2266 break;
2267 imm >>= 1;
2270 /* Choose variants based on *sign* of the 5-bit field. */
2271 if ((imm & 0x10) == 0)
2272 len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb;
2273 else
2275 /* Find the width of the bitstring in IMM. */
2276 for (len = 5; len < maxlen - lsb; len++)
2278 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2279 break;
2282 /* Extend length if host is narrow and IMM is negative. */
2283 if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb)
2284 len += 32;
2286 /* Sign extend IMM as a 5-bit value. */
2287 imm = (imm & 0xf) - 0x10;
2290 op[0] = imm;
2291 op[1] = 63 - lsb;
2292 op[2] = len;
2295 /* Output assembler code to perform a doubleword move insn
2296 with operands OPERANDS. */
2298 const char *
2299 output_move_double (rtx *operands)
2301 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2302 rtx latehalf[2];
2303 rtx addreg0 = 0, addreg1 = 0;
2305 /* First classify both operands. */
2307 if (REG_P (operands[0]))
2308 optype0 = REGOP;
2309 else if (offsettable_memref_p (operands[0]))
2310 optype0 = OFFSOP;
2311 else if (GET_CODE (operands[0]) == MEM)
2312 optype0 = MEMOP;
2313 else
2314 optype0 = RNDOP;
2316 if (REG_P (operands[1]))
2317 optype1 = REGOP;
2318 else if (CONSTANT_P (operands[1]))
2319 optype1 = CNSTOP;
2320 else if (offsettable_memref_p (operands[1]))
2321 optype1 = OFFSOP;
2322 else if (GET_CODE (operands[1]) == MEM)
2323 optype1 = MEMOP;
2324 else
2325 optype1 = RNDOP;
2327 /* Check for the cases that the operand constraints are not
2328 supposed to allow to happen. */
2329 gcc_assert (optype0 == REGOP || optype1 == REGOP);
2331 /* Handle copies between general and floating registers. */
2333 if (optype0 == REGOP && optype1 == REGOP
2334 && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2336 if (FP_REG_P (operands[0]))
2338 output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2339 output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2340 return "{fldds|fldd} -16(%%sp),%0";
2342 else
2344 output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2345 output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2346 return "{ldws|ldw} -12(%%sp),%R0";
2350 /* Handle auto decrementing and incrementing loads and stores
2351 specifically, since the structure of the function doesn't work
2352 for them without major modification. Do it better when we learn
2353 this port about the general inc/dec addressing of PA.
2354 (This was written by tege. Chide him if it doesn't work.) */
2356 if (optype0 == MEMOP)
2358 /* We have to output the address syntax ourselves, since print_operand
2359 doesn't deal with the addresses we want to use. Fix this later. */
2361 rtx addr = XEXP (operands[0], 0);
2362 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2364 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2366 operands[0] = XEXP (addr, 0);
2367 gcc_assert (GET_CODE (operands[1]) == REG
2368 && GET_CODE (operands[0]) == REG);
2370 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2372 /* No overlap between high target register and address
2373 register. (We do this in a non-obvious way to
2374 save a register file writeback) */
2375 if (GET_CODE (addr) == POST_INC)
2376 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2377 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2379 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2381 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2383 operands[0] = XEXP (addr, 0);
2384 gcc_assert (GET_CODE (operands[1]) == REG
2385 && GET_CODE (operands[0]) == REG);
2387 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2388 /* No overlap between high target register and address
2389 register. (We do this in a non-obvious way to save a
2390 register file writeback) */
2391 if (GET_CODE (addr) == PRE_INC)
2392 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2393 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2396 if (optype1 == MEMOP)
2398 /* We have to output the address syntax ourselves, since print_operand
2399 doesn't deal with the addresses we want to use. Fix this later. */
2401 rtx addr = XEXP (operands[1], 0);
2402 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2404 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2406 operands[1] = XEXP (addr, 0);
2407 gcc_assert (GET_CODE (operands[0]) == REG
2408 && GET_CODE (operands[1]) == REG);
2410 if (!reg_overlap_mentioned_p (high_reg, addr))
2412 /* No overlap between high target register and address
2413 register. (We do this in a non-obvious way to
2414 save a register file writeback) */
2415 if (GET_CODE (addr) == POST_INC)
2416 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2417 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2419 else
2421 /* This is an undefined situation. We should load into the
2422 address register *and* update that register. Probably
2423 we don't need to handle this at all. */
2424 if (GET_CODE (addr) == POST_INC)
2425 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2426 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2429 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2431 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2433 operands[1] = XEXP (addr, 0);
2434 gcc_assert (GET_CODE (operands[0]) == REG
2435 && GET_CODE (operands[1]) == REG);
2437 if (!reg_overlap_mentioned_p (high_reg, addr))
2439 /* No overlap between high target register and address
2440 register. (We do this in a non-obvious way to
2441 save a register file writeback) */
2442 if (GET_CODE (addr) == PRE_INC)
2443 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2444 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2446 else
2448 /* This is an undefined situation. We should load into the
2449 address register *and* update that register. Probably
2450 we don't need to handle this at all. */
2451 if (GET_CODE (addr) == PRE_INC)
2452 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2453 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2456 else if (GET_CODE (addr) == PLUS
2457 && GET_CODE (XEXP (addr, 0)) == MULT)
2459 rtx xoperands[4];
2460 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2462 if (!reg_overlap_mentioned_p (high_reg, addr))
2464 xoperands[0] = high_reg;
2465 xoperands[1] = XEXP (addr, 1);
2466 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2467 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2468 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2469 xoperands);
2470 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2472 else
2474 xoperands[0] = high_reg;
2475 xoperands[1] = XEXP (addr, 1);
2476 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2477 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2478 output_asm_insn ("{sh%O3addl %2,%1,%R0|shladd,l %2,%O3,%1,%R0}",
2479 xoperands);
2480 return "ldw 0(%R0),%0\n\tldw 4(%R0),%R0";
2485 /* If an operand is an unoffsettable memory ref, find a register
2486 we can increment temporarily to make it refer to the second word. */
2488 if (optype0 == MEMOP)
2489 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2491 if (optype1 == MEMOP)
2492 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2494 /* Ok, we can do one word at a time.
2495 Normally we do the low-numbered word first.
2497 In either case, set up in LATEHALF the operands to use
2498 for the high-numbered word and in some cases alter the
2499 operands in OPERANDS to be suitable for the low-numbered word. */
2501 if (optype0 == REGOP)
2502 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2503 else if (optype0 == OFFSOP)
2504 latehalf[0] = adjust_address (operands[0], SImode, 4);
2505 else
2506 latehalf[0] = operands[0];
2508 if (optype1 == REGOP)
2509 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2510 else if (optype1 == OFFSOP)
2511 latehalf[1] = adjust_address (operands[1], SImode, 4);
2512 else if (optype1 == CNSTOP)
2513 split_double (operands[1], &operands[1], &latehalf[1]);
2514 else
2515 latehalf[1] = operands[1];
2517 /* If the first move would clobber the source of the second one,
2518 do them in the other order.
2520 This can happen in two cases:
2522 mem -> register where the first half of the destination register
2523 is the same register used in the memory's address. Reload
2524 can create such insns.
2526 mem in this case will be either register indirect or register
2527 indirect plus a valid offset.
2529 register -> register move where REGNO(dst) == REGNO(src + 1)
2530 someone (Tim/Tege?) claimed this can happen for parameter loads.
2532 Handle mem -> register case first. */
2533 if (optype0 == REGOP
2534 && (optype1 == MEMOP || optype1 == OFFSOP)
2535 && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
2536 operands[1], 0))
2538 /* Do the late half first. */
2539 if (addreg1)
2540 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2541 output_asm_insn (singlemove_string (latehalf), latehalf);
2543 /* Then clobber. */
2544 if (addreg1)
2545 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2546 return singlemove_string (operands);
2549 /* Now handle register -> register case. */
2550 if (optype0 == REGOP && optype1 == REGOP
2551 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2553 output_asm_insn (singlemove_string (latehalf), latehalf);
2554 return singlemove_string (operands);
2557 /* Normal case: do the two words, low-numbered first. */
2559 output_asm_insn (singlemove_string (operands), operands);
2561 /* Make any unoffsettable addresses point at high-numbered word. */
2562 if (addreg0)
2563 output_asm_insn ("ldo 4(%0),%0", &addreg0);
2564 if (addreg1)
2565 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2567 /* Do that word. */
2568 output_asm_insn (singlemove_string (latehalf), latehalf);
2570 /* Undo the adds we just did. */
2571 if (addreg0)
2572 output_asm_insn ("ldo -4(%0),%0", &addreg0);
2573 if (addreg1)
2574 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2576 return "";
2579 const char *
2580 output_fp_move_double (rtx *operands)
2582 if (FP_REG_P (operands[0]))
2584 if (FP_REG_P (operands[1])
2585 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2586 output_asm_insn ("fcpy,dbl %f1,%0", operands);
2587 else
2588 output_asm_insn ("fldd%F1 %1,%0", operands);
2590 else if (FP_REG_P (operands[1]))
2592 output_asm_insn ("fstd%F0 %1,%0", operands);
2594 else
2596 rtx xoperands[2];
2598 gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2600 /* This is a pain. You have to be prepared to deal with an
2601 arbitrary address here including pre/post increment/decrement.
2603 so avoid this in the MD. */
2604 gcc_assert (GET_CODE (operands[0]) == REG);
2606 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2607 xoperands[0] = operands[0];
2608 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2610 return "";
2613 /* Return a REG that occurs in ADDR with coefficient 1.
2614 ADDR can be effectively incremented by incrementing REG. */
2616 static rtx
2617 find_addr_reg (rtx addr)
2619 while (GET_CODE (addr) == PLUS)
2621 if (GET_CODE (XEXP (addr, 0)) == REG)
2622 addr = XEXP (addr, 0);
2623 else if (GET_CODE (XEXP (addr, 1)) == REG)
2624 addr = XEXP (addr, 1);
2625 else if (CONSTANT_P (XEXP (addr, 0)))
2626 addr = XEXP (addr, 1);
2627 else if (CONSTANT_P (XEXP (addr, 1)))
2628 addr = XEXP (addr, 0);
2629 else
2630 gcc_unreachable ();
2632 gcc_assert (GET_CODE (addr) == REG);
2633 return addr;
2636 /* Emit code to perform a block move.
2638 OPERANDS[0] is the destination pointer as a REG, clobbered.
2639 OPERANDS[1] is the source pointer as a REG, clobbered.
2640 OPERANDS[2] is a register for temporary storage.
2641 OPERANDS[3] is a register for temporary storage.
2642 OPERANDS[4] is the size as a CONST_INT
2643 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2644 OPERANDS[6] is another temporary register. */
2646 const char *
2647 output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2649 int align = INTVAL (operands[5]);
2650 unsigned long n_bytes = INTVAL (operands[4]);
2652 /* We can't move more than a word at a time because the PA
2653 has no longer integer move insns. (Could use fp mem ops?) */
2654 if (align > (TARGET_64BIT ? 8 : 4))
2655 align = (TARGET_64BIT ? 8 : 4);
2657 /* Note that we know each loop below will execute at least twice
2658 (else we would have open-coded the copy). */
2659 switch (align)
2661 case 8:
2662 /* Pre-adjust the loop counter. */
2663 operands[4] = GEN_INT (n_bytes - 16);
2664 output_asm_insn ("ldi %4,%2", operands);
2666 /* Copying loop. */
2667 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2668 output_asm_insn ("ldd,ma 8(%1),%6", operands);
2669 output_asm_insn ("std,ma %3,8(%0)", operands);
2670 output_asm_insn ("addib,>= -16,%2,.-12", operands);
2671 output_asm_insn ("std,ma %6,8(%0)", operands);
2673 /* Handle the residual. There could be up to 7 bytes of
2674 residual to copy! */
2675 if (n_bytes % 16 != 0)
2677 operands[4] = GEN_INT (n_bytes % 8);
2678 if (n_bytes % 16 >= 8)
2679 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2680 if (n_bytes % 8 != 0)
2681 output_asm_insn ("ldd 0(%1),%6", operands);
2682 if (n_bytes % 16 >= 8)
2683 output_asm_insn ("std,ma %3,8(%0)", operands);
2684 if (n_bytes % 8 != 0)
2685 output_asm_insn ("stdby,e %6,%4(%0)", operands);
2687 return "";
2689 case 4:
2690 /* Pre-adjust the loop counter. */
2691 operands[4] = GEN_INT (n_bytes - 8);
2692 output_asm_insn ("ldi %4,%2", operands);
2694 /* Copying loop. */
2695 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2696 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2697 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2698 output_asm_insn ("addib,>= -8,%2,.-12", operands);
2699 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2701 /* Handle the residual. There could be up to 7 bytes of
2702 residual to copy! */
2703 if (n_bytes % 8 != 0)
2705 operands[4] = GEN_INT (n_bytes % 4);
2706 if (n_bytes % 8 >= 4)
2707 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2708 if (n_bytes % 4 != 0)
2709 output_asm_insn ("ldw 0(%1),%6", operands);
2710 if (n_bytes % 8 >= 4)
2711 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2712 if (n_bytes % 4 != 0)
2713 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2715 return "";
2717 case 2:
2718 /* Pre-adjust the loop counter. */
2719 operands[4] = GEN_INT (n_bytes - 4);
2720 output_asm_insn ("ldi %4,%2", operands);
2722 /* Copying loop. */
2723 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2724 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2725 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2726 output_asm_insn ("addib,>= -4,%2,.-12", operands);
2727 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2729 /* Handle the residual. */
2730 if (n_bytes % 4 != 0)
2732 if (n_bytes % 4 >= 2)
2733 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2734 if (n_bytes % 2 != 0)
2735 output_asm_insn ("ldb 0(%1),%6", operands);
2736 if (n_bytes % 4 >= 2)
2737 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2738 if (n_bytes % 2 != 0)
2739 output_asm_insn ("stb %6,0(%0)", operands);
2741 return "";
2743 case 1:
2744 /* Pre-adjust the loop counter. */
2745 operands[4] = GEN_INT (n_bytes - 2);
2746 output_asm_insn ("ldi %4,%2", operands);
2748 /* Copying loop. */
2749 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2750 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2751 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2752 output_asm_insn ("addib,>= -2,%2,.-12", operands);
2753 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2755 /* Handle the residual. */
2756 if (n_bytes % 2 != 0)
2758 output_asm_insn ("ldb 0(%1),%3", operands);
2759 output_asm_insn ("stb %3,0(%0)", operands);
2761 return "";
2763 default:
2764 gcc_unreachable ();
2768 /* Count the number of insns necessary to handle this block move.
2770 Basic structure is the same as emit_block_move, except that we
2771 count insns rather than emit them. */
2773 static int
2774 compute_movmem_length (rtx insn)
2776 rtx pat = PATTERN (insn);
2777 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2778 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2779 unsigned int n_insns = 0;
2781 /* We can't move more than four bytes at a time because the PA
2782 has no longer integer move insns. (Could use fp mem ops?) */
2783 if (align > (TARGET_64BIT ? 8 : 4))
2784 align = (TARGET_64BIT ? 8 : 4);
2786 /* The basic copying loop. */
2787 n_insns = 6;
2789 /* Residuals. */
2790 if (n_bytes % (2 * align) != 0)
2792 if ((n_bytes % (2 * align)) >= align)
2793 n_insns += 2;
2795 if ((n_bytes % align) != 0)
2796 n_insns += 2;
2799 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2800 return n_insns * 4;
2803 /* Emit code to perform a block clear.
2805 OPERANDS[0] is the destination pointer as a REG, clobbered.
2806 OPERANDS[1] is a register for temporary storage.
2807 OPERANDS[2] is the size as a CONST_INT
2808 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
2810 const char *
2811 output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2813 int align = INTVAL (operands[3]);
2814 unsigned long n_bytes = INTVAL (operands[2]);
2816 /* We can't clear more than a word at a time because the PA
2817 has no longer integer move insns. */
2818 if (align > (TARGET_64BIT ? 8 : 4))
2819 align = (TARGET_64BIT ? 8 : 4);
2821 /* Note that we know each loop below will execute at least twice
2822 (else we would have open-coded the copy). */
2823 switch (align)
2825 case 8:
2826 /* Pre-adjust the loop counter. */
2827 operands[2] = GEN_INT (n_bytes - 16);
2828 output_asm_insn ("ldi %2,%1", operands);
2830 /* Loop. */
2831 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2832 output_asm_insn ("addib,>= -16,%1,.-4", operands);
2833 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2835 /* Handle the residual. There could be up to 7 bytes of
2836 residual to copy! */
2837 if (n_bytes % 16 != 0)
2839 operands[2] = GEN_INT (n_bytes % 8);
2840 if (n_bytes % 16 >= 8)
2841 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2842 if (n_bytes % 8 != 0)
2843 output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
2845 return "";
2847 case 4:
2848 /* Pre-adjust the loop counter. */
2849 operands[2] = GEN_INT (n_bytes - 8);
2850 output_asm_insn ("ldi %2,%1", operands);
2852 /* Loop. */
2853 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2854 output_asm_insn ("addib,>= -8,%1,.-4", operands);
2855 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2857 /* Handle the residual. There could be up to 7 bytes of
2858 residual to copy! */
2859 if (n_bytes % 8 != 0)
2861 operands[2] = GEN_INT (n_bytes % 4);
2862 if (n_bytes % 8 >= 4)
2863 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2864 if (n_bytes % 4 != 0)
2865 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
2867 return "";
2869 case 2:
2870 /* Pre-adjust the loop counter. */
2871 operands[2] = GEN_INT (n_bytes - 4);
2872 output_asm_insn ("ldi %2,%1", operands);
2874 /* Loop. */
2875 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2876 output_asm_insn ("addib,>= -4,%1,.-4", operands);
2877 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2879 /* Handle the residual. */
2880 if (n_bytes % 4 != 0)
2882 if (n_bytes % 4 >= 2)
2883 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2884 if (n_bytes % 2 != 0)
2885 output_asm_insn ("stb %%r0,0(%0)", operands);
2887 return "";
2889 case 1:
2890 /* Pre-adjust the loop counter. */
2891 operands[2] = GEN_INT (n_bytes - 2);
2892 output_asm_insn ("ldi %2,%1", operands);
2894 /* Loop. */
2895 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2896 output_asm_insn ("addib,>= -2,%1,.-4", operands);
2897 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2899 /* Handle the residual. */
2900 if (n_bytes % 2 != 0)
2901 output_asm_insn ("stb %%r0,0(%0)", operands);
2903 return "";
2905 default:
2906 gcc_unreachable ();
2910 /* Count the number of insns necessary to handle this block move.
2912 Basic structure is the same as emit_block_move, except that we
2913 count insns rather than emit them. */
2915 static int
2916 compute_clrmem_length (rtx insn)
2918 rtx pat = PATTERN (insn);
2919 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
2920 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
2921 unsigned int n_insns = 0;
2923 /* We can't clear more than a word at a time because the PA
2924 has no longer integer move insns. */
2925 if (align > (TARGET_64BIT ? 8 : 4))
2926 align = (TARGET_64BIT ? 8 : 4);
2928 /* The basic loop. */
2929 n_insns = 4;
2931 /* Residuals. */
2932 if (n_bytes % (2 * align) != 0)
2934 if ((n_bytes % (2 * align)) >= align)
2935 n_insns++;
2937 if ((n_bytes % align) != 0)
2938 n_insns++;
2941 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2942 return n_insns * 4;
2946 const char *
2947 output_and (rtx *operands)
2949 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2951 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2952 int ls0, ls1, ms0, p, len;
2954 for (ls0 = 0; ls0 < 32; ls0++)
2955 if ((mask & (1 << ls0)) == 0)
2956 break;
2958 for (ls1 = ls0; ls1 < 32; ls1++)
2959 if ((mask & (1 << ls1)) != 0)
2960 break;
2962 for (ms0 = ls1; ms0 < 32; ms0++)
2963 if ((mask & (1 << ms0)) == 0)
2964 break;
2966 gcc_assert (ms0 == 32);
2968 if (ls1 == 32)
2970 len = ls0;
2972 gcc_assert (len);
2974 operands[2] = GEN_INT (len);
2975 return "{extru|extrw,u} %1,31,%2,%0";
2977 else
2979 /* We could use this `depi' for the case above as well, but `depi'
2980 requires one more register file access than an `extru'. */
2982 p = 31 - ls0;
2983 len = ls1 - ls0;
2985 operands[2] = GEN_INT (p);
2986 operands[3] = GEN_INT (len);
2987 return "{depi|depwi} 0,%2,%3,%0";
2990 else
2991 return "and %1,%2,%0";
2994 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
2995 storing the result in operands[0]. */
2996 const char *
2997 output_64bit_and (rtx *operands)
2999 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3001 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3002 int ls0, ls1, ms0, p, len;
3004 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
3005 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
3006 break;
3008 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
3009 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
3010 break;
3012 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
3013 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
3014 break;
3016 gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
3018 if (ls1 == HOST_BITS_PER_WIDE_INT)
3020 len = ls0;
3022 gcc_assert (len);
3024 operands[2] = GEN_INT (len);
3025 return "extrd,u %1,63,%2,%0";
3027 else
3029 /* We could use this `depi' for the case above as well, but `depi'
3030 requires one more register file access than an `extru'. */
3032 p = 63 - ls0;
3033 len = ls1 - ls0;
3035 operands[2] = GEN_INT (p);
3036 operands[3] = GEN_INT (len);
3037 return "depdi 0,%2,%3,%0";
3040 else
3041 return "and %1,%2,%0";
3044 const char *
3045 output_ior (rtx *operands)
3047 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3048 int bs0, bs1, p, len;
3050 if (INTVAL (operands[2]) == 0)
3051 return "copy %1,%0";
3053 for (bs0 = 0; bs0 < 32; bs0++)
3054 if ((mask & (1 << bs0)) != 0)
3055 break;
3057 for (bs1 = bs0; bs1 < 32; bs1++)
3058 if ((mask & (1 << bs1)) == 0)
3059 break;
3061 gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3063 p = 31 - bs0;
3064 len = bs1 - bs0;
3066 operands[2] = GEN_INT (p);
3067 operands[3] = GEN_INT (len);
3068 return "{depi|depwi} -1,%2,%3,%0";
3071 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3072 storing the result in operands[0]. */
3073 const char *
3074 output_64bit_ior (rtx *operands)
3076 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3077 int bs0, bs1, p, len;
3079 if (INTVAL (operands[2]) == 0)
3080 return "copy %1,%0";
3082 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
3083 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
3084 break;
3086 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
3087 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
3088 break;
3090 gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
3091 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3093 p = 63 - bs0;
3094 len = bs1 - bs0;
3096 operands[2] = GEN_INT (p);
3097 operands[3] = GEN_INT (len);
3098 return "depdi -1,%2,%3,%0";
3101 /* Target hook for assembling integer objects. This code handles
3102 aligned SI and DI integers specially since function references
3103 must be preceded by P%. */
3105 static bool
3106 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
3108 if (size == UNITS_PER_WORD
3109 && aligned_p
3110 && function_label_operand (x, VOIDmode))
3112 fputs (size == 8? "\t.dword\tP%" : "\t.word\tP%", asm_out_file);
3113 output_addr_const (asm_out_file, x);
3114 fputc ('\n', asm_out_file);
3115 return true;
3117 return default_assemble_integer (x, size, aligned_p);
3120 /* Output an ascii string. */
3121 void
3122 output_ascii (FILE *file, const char *p, int size)
3124 int i;
3125 int chars_output;
3126 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
3128 /* The HP assembler can only take strings of 256 characters at one
3129 time. This is a limitation on input line length, *not* the
3130 length of the string. Sigh. Even worse, it seems that the
3131 restriction is in number of input characters (see \xnn &
3132 \whatever). So we have to do this very carefully. */
3134 fputs ("\t.STRING \"", file);
3136 chars_output = 0;
3137 for (i = 0; i < size; i += 4)
3139 int co = 0;
3140 int io = 0;
3141 for (io = 0, co = 0; io < MIN (4, size - i); io++)
3143 register unsigned int c = (unsigned char) p[i + io];
3145 if (c == '\"' || c == '\\')
3146 partial_output[co++] = '\\';
3147 if (c >= ' ' && c < 0177)
3148 partial_output[co++] = c;
3149 else
3151 unsigned int hexd;
3152 partial_output[co++] = '\\';
3153 partial_output[co++] = 'x';
3154 hexd = c / 16 - 0 + '0';
3155 if (hexd > '9')
3156 hexd -= '9' - 'a' + 1;
3157 partial_output[co++] = hexd;
3158 hexd = c % 16 - 0 + '0';
3159 if (hexd > '9')
3160 hexd -= '9' - 'a' + 1;
3161 partial_output[co++] = hexd;
3164 if (chars_output + co > 243)
3166 fputs ("\"\n\t.STRING \"", file);
3167 chars_output = 0;
3169 fwrite (partial_output, 1, (size_t) co, file);
3170 chars_output += co;
3171 co = 0;
3173 fputs ("\"\n", file);
3176 /* Try to rewrite floating point comparisons & branches to avoid
3177 useless add,tr insns.
3179 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3180 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3181 first attempt to remove useless add,tr insns. It is zero
3182 for the second pass as reorg sometimes leaves bogus REG_DEAD
3183 notes lying around.
3185 When CHECK_NOTES is zero we can only eliminate add,tr insns
3186 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3187 instructions. */
3188 static void
3189 remove_useless_addtr_insns (int check_notes)
3191 rtx insn;
3192 static int pass = 0;
3194 /* This is fairly cheap, so always run it when optimizing. */
3195 if (optimize > 0)
3197 int fcmp_count = 0;
3198 int fbranch_count = 0;
3200 /* Walk all the insns in this function looking for fcmp & fbranch
3201 instructions. Keep track of how many of each we find. */
3202 for (insn = get_insns (); insn; insn = next_insn (insn))
3204 rtx tmp;
3206 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
3207 if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
3208 continue;
3210 tmp = PATTERN (insn);
3212 /* It must be a set. */
3213 if (GET_CODE (tmp) != SET)
3214 continue;
3216 /* If the destination is CCFP, then we've found an fcmp insn. */
3217 tmp = SET_DEST (tmp);
3218 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3220 fcmp_count++;
3221 continue;
3224 tmp = PATTERN (insn);
3225 /* If this is an fbranch instruction, bump the fbranch counter. */
3226 if (GET_CODE (tmp) == SET
3227 && SET_DEST (tmp) == pc_rtx
3228 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3229 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3230 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3231 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3233 fbranch_count++;
3234 continue;
3239 /* Find all floating point compare + branch insns. If possible,
3240 reverse the comparison & the branch to avoid add,tr insns. */
3241 for (insn = get_insns (); insn; insn = next_insn (insn))
3243 rtx tmp, next;
3245 /* Ignore anything that isn't an INSN. */
3246 if (GET_CODE (insn) != INSN)
3247 continue;
3249 tmp = PATTERN (insn);
3251 /* It must be a set. */
3252 if (GET_CODE (tmp) != SET)
3253 continue;
3255 /* The destination must be CCFP, which is register zero. */
3256 tmp = SET_DEST (tmp);
3257 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3258 continue;
3260 /* INSN should be a set of CCFP.
3262 See if the result of this insn is used in a reversed FP
3263 conditional branch. If so, reverse our condition and
3264 the branch. Doing so avoids useless add,tr insns. */
3265 next = next_insn (insn);
3266 while (next)
3268 /* Jumps, calls and labels stop our search. */
3269 if (GET_CODE (next) == JUMP_INSN
3270 || GET_CODE (next) == CALL_INSN
3271 || GET_CODE (next) == CODE_LABEL)
3272 break;
3274 /* As does another fcmp insn. */
3275 if (GET_CODE (next) == INSN
3276 && GET_CODE (PATTERN (next)) == SET
3277 && GET_CODE (SET_DEST (PATTERN (next))) == REG
3278 && REGNO (SET_DEST (PATTERN (next))) == 0)
3279 break;
3281 next = next_insn (next);
3284 /* Is NEXT_INSN a branch? */
3285 if (next
3286 && GET_CODE (next) == JUMP_INSN)
3288 rtx pattern = PATTERN (next);
3290 /* If it a reversed fp conditional branch (e.g. uses add,tr)
3291 and CCFP dies, then reverse our conditional and the branch
3292 to avoid the add,tr. */
3293 if (GET_CODE (pattern) == SET
3294 && SET_DEST (pattern) == pc_rtx
3295 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3296 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3297 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3298 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3299 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3300 && (fcmp_count == fbranch_count
3301 || (check_notes
3302 && find_regno_note (next, REG_DEAD, 0))))
3304 /* Reverse the branch. */
3305 tmp = XEXP (SET_SRC (pattern), 1);
3306 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3307 XEXP (SET_SRC (pattern), 2) = tmp;
3308 INSN_CODE (next) = -1;
3310 /* Reverse our condition. */
3311 tmp = PATTERN (insn);
3312 PUT_CODE (XEXP (tmp, 1),
3313 (reverse_condition_maybe_unordered
3314 (GET_CODE (XEXP (tmp, 1)))));
3320 pass = !pass;
3324 /* You may have trouble believing this, but this is the 32 bit HP-PA
3325 stack layout. Wow.
3327 Offset Contents
3329 Variable arguments (optional; any number may be allocated)
3331 SP-(4*(N+9)) arg word N
3333 SP-56 arg word 5
3334 SP-52 arg word 4
3336 Fixed arguments (must be allocated; may remain unused)
3338 SP-48 arg word 3
3339 SP-44 arg word 2
3340 SP-40 arg word 1
3341 SP-36 arg word 0
3343 Frame Marker
3345 SP-32 External Data Pointer (DP)
3346 SP-28 External sr4
3347 SP-24 External/stub RP (RP')
3348 SP-20 Current RP
3349 SP-16 Static Link
3350 SP-12 Clean up
3351 SP-8 Calling Stub RP (RP'')
3352 SP-4 Previous SP
3354 Top of Frame
3356 SP-0 Stack Pointer (points to next available address)
3360 /* This function saves registers as follows. Registers marked with ' are
3361 this function's registers (as opposed to the previous function's).
3362 If a frame_pointer isn't needed, r4 is saved as a general register;
3363 the space for the frame pointer is still allocated, though, to keep
3364 things simple.
3367 Top of Frame
3369 SP (FP') Previous FP
3370 SP + 4 Alignment filler (sigh)
3371 SP + 8 Space for locals reserved here.
3375 SP + n All call saved register used.
3379 SP + o All call saved fp registers used.
3383 SP + p (SP') points to next available address.
3387 /* Global variables set by output_function_prologue(). */
3388 /* Size of frame. Need to know this to emit return insns from
3389 leaf procedures. */
3390 static HOST_WIDE_INT actual_fsize, local_fsize;
3391 static int save_fregs;
3393 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3394 Handle case where DISP > 8k by using the add_high_const patterns.
3396 Note in DISP > 8k case, we will leave the high part of the address
3397 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3399 static void
3400 store_reg (int reg, HOST_WIDE_INT disp, int base)
3402 rtx insn, dest, src, basereg;
3404 src = gen_rtx_REG (word_mode, reg);
3405 basereg = gen_rtx_REG (Pmode, base);
3406 if (VAL_14_BITS_P (disp))
3408 dest = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3409 insn = emit_move_insn (dest, src);
3411 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3413 rtx delta = GEN_INT (disp);
3414 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3416 emit_move_insn (tmpreg, delta);
3417 insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3418 if (DO_FRAME_NOTES)
3420 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3421 gen_rtx_SET (VOIDmode, tmpreg,
3422 gen_rtx_PLUS (Pmode, basereg, delta)));
3423 RTX_FRAME_RELATED_P (insn) = 1;
3425 dest = gen_rtx_MEM (word_mode, tmpreg);
3426 insn = emit_move_insn (dest, src);
3428 else
3430 rtx delta = GEN_INT (disp);
3431 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3432 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3434 emit_move_insn (tmpreg, high);
3435 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3436 insn = emit_move_insn (dest, src);
3437 if (DO_FRAME_NOTES)
3438 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3439 gen_rtx_SET (VOIDmode,
3440 gen_rtx_MEM (word_mode,
3441 gen_rtx_PLUS (word_mode,
3442 basereg,
3443 delta)),
3444 src));
3447 if (DO_FRAME_NOTES)
3448 RTX_FRAME_RELATED_P (insn) = 1;
3451 /* Emit RTL to store REG at the memory location specified by BASE and then
3452 add MOD to BASE. MOD must be <= 8k. */
3454 static void
3455 store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3457 rtx insn, basereg, srcreg, delta;
3459 gcc_assert (VAL_14_BITS_P (mod));
3461 basereg = gen_rtx_REG (Pmode, base);
3462 srcreg = gen_rtx_REG (word_mode, reg);
3463 delta = GEN_INT (mod);
3465 insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3466 if (DO_FRAME_NOTES)
3468 RTX_FRAME_RELATED_P (insn) = 1;
3470 /* RTX_FRAME_RELATED_P must be set on each frame related set
3471 in a parallel with more than one element. */
3472 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3473 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3477 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3478 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3479 whether to add a frame note or not.
3481 In the DISP > 8k case, we leave the high part of the address in %r1.
3482 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3484 static void
3485 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3487 rtx insn;
3489 if (VAL_14_BITS_P (disp))
3491 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3492 plus_constant (gen_rtx_REG (Pmode, base), disp));
3494 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3496 rtx basereg = gen_rtx_REG (Pmode, base);
3497 rtx delta = GEN_INT (disp);
3498 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3500 emit_move_insn (tmpreg, delta);
3501 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3502 gen_rtx_PLUS (Pmode, tmpreg, basereg));
3503 if (DO_FRAME_NOTES)
3504 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3505 gen_rtx_SET (VOIDmode, tmpreg,
3506 gen_rtx_PLUS (Pmode, basereg, delta)));
3508 else
3510 rtx basereg = gen_rtx_REG (Pmode, base);
3511 rtx delta = GEN_INT (disp);
3512 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3514 emit_move_insn (tmpreg,
3515 gen_rtx_PLUS (Pmode, basereg,
3516 gen_rtx_HIGH (Pmode, delta)));
3517 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3518 gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3521 if (DO_FRAME_NOTES && note)
3522 RTX_FRAME_RELATED_P (insn) = 1;
3525 HOST_WIDE_INT
3526 compute_frame_size (HOST_WIDE_INT size, int *fregs_live)
3528 int freg_saved = 0;
3529 int i, j;
3531 /* The code in hppa_expand_prologue and hppa_expand_epilogue must
3532 be consistent with the rounding and size calculation done here.
3533 Change them at the same time. */
3535 /* We do our own stack alignment. First, round the size of the
3536 stack locals up to a word boundary. */
3537 size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3539 /* Space for previous frame pointer + filler. If any frame is
3540 allocated, we need to add in the STARTING_FRAME_OFFSET. We
3541 waste some space here for the sake of HP compatibility. The
3542 first slot is only used when the frame pointer is needed. */
3543 if (size || frame_pointer_needed)
3544 size += STARTING_FRAME_OFFSET;
3546 /* If the current function calls __builtin_eh_return, then we need
3547 to allocate stack space for registers that will hold data for
3548 the exception handler. */
3549 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3551 unsigned int i;
3553 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3554 continue;
3555 size += i * UNITS_PER_WORD;
3558 /* Account for space used by the callee general register saves. */
3559 for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3560 if (df_regs_ever_live_p (i))
3561 size += UNITS_PER_WORD;
3563 /* Account for space used by the callee floating point register saves. */
3564 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3565 if (df_regs_ever_live_p (i)
3566 || (!TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3568 freg_saved = 1;
3570 /* We always save both halves of the FP register, so always
3571 increment the frame size by 8 bytes. */
3572 size += 8;
3575 /* If any of the floating registers are saved, account for the
3576 alignment needed for the floating point register save block. */
3577 if (freg_saved)
3579 size = (size + 7) & ~7;
3580 if (fregs_live)
3581 *fregs_live = 1;
3584 /* The various ABIs include space for the outgoing parameters in the
3585 size of the current function's stack frame. We don't need to align
3586 for the outgoing arguments as their alignment is set by the final
3587 rounding for the frame as a whole. */
3588 size += crtl->outgoing_args_size;
3590 /* Allocate space for the fixed frame marker. This space must be
3591 allocated for any function that makes calls or allocates
3592 stack space. */
3593 if (!current_function_is_leaf || size)
3594 size += TARGET_64BIT ? 48 : 32;
3596 /* Finally, round to the preferred stack boundary. */
3597 return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3598 & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3601 /* Generate the assembly code for function entry. FILE is a stdio
3602 stream to output the code to. SIZE is an int: how many units of
3603 temporary storage to allocate.
3605 Refer to the array `regs_ever_live' to determine which registers to
3606 save; `regs_ever_live[I]' is nonzero if register number I is ever
3607 used in the function. This function is responsible for knowing
3608 which registers should not be saved even if used. */
3610 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3611 of memory. If any fpu reg is used in the function, we allocate
3612 such a block here, at the bottom of the frame, just in case it's needed.
3614 If this function is a leaf procedure, then we may choose not
3615 to do a "save" insn. The decision about whether or not
3616 to do this is made in regclass.c. */
3618 static void
3619 pa_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3621 /* The function's label and associated .PROC must never be
3622 separated and must be output *after* any profiling declarations
3623 to avoid changing spaces/subspaces within a procedure. */
3624 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3625 fputs ("\t.PROC\n", file);
3627 /* hppa_expand_prologue does the dirty work now. We just need
3628 to output the assembler directives which denote the start
3629 of a function. */
3630 fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
3631 if (current_function_is_leaf)
3632 fputs (",NO_CALLS", file);
3633 else
3634 fputs (",CALLS", file);
3635 if (rp_saved)
3636 fputs (",SAVE_RP", file);
3638 /* The SAVE_SP flag is used to indicate that register %r3 is stored
3639 at the beginning of the frame and that it is used as the frame
3640 pointer for the frame. We do this because our current frame
3641 layout doesn't conform to that specified in the HP runtime
3642 documentation and we need a way to indicate to programs such as
3643 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
3644 isn't used by HP compilers but is supported by the assembler.
3645 However, SAVE_SP is supposed to indicate that the previous stack
3646 pointer has been saved in the frame marker. */
3647 if (frame_pointer_needed)
3648 fputs (",SAVE_SP", file);
3650 /* Pass on information about the number of callee register saves
3651 performed in the prologue.
3653 The compiler is supposed to pass the highest register number
3654 saved, the assembler then has to adjust that number before
3655 entering it into the unwind descriptor (to account for any
3656 caller saved registers with lower register numbers than the
3657 first callee saved register). */
3658 if (gr_saved)
3659 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3661 if (fr_saved)
3662 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3664 fputs ("\n\t.ENTRY\n", file);
3666 remove_useless_addtr_insns (0);
3669 void
3670 hppa_expand_prologue (void)
3672 int merge_sp_adjust_with_store = 0;
3673 HOST_WIDE_INT size = get_frame_size ();
3674 HOST_WIDE_INT offset;
3675 int i;
3676 rtx insn, tmpreg;
3678 gr_saved = 0;
3679 fr_saved = 0;
3680 save_fregs = 0;
3682 /* Compute total size for frame pointer, filler, locals and rounding to
3683 the next word boundary. Similar code appears in compute_frame_size
3684 and must be changed in tandem with this code. */
3685 local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3686 if (local_fsize || frame_pointer_needed)
3687 local_fsize += STARTING_FRAME_OFFSET;
3689 actual_fsize = compute_frame_size (size, &save_fregs);
3691 /* Compute a few things we will use often. */
3692 tmpreg = gen_rtx_REG (word_mode, 1);
3694 /* Save RP first. The calling conventions manual states RP will
3695 always be stored into the caller's frame at sp - 20 or sp - 16
3696 depending on which ABI is in use. */
3697 if (df_regs_ever_live_p (2) || crtl->calls_eh_return)
3699 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3700 rp_saved = true;
3702 else
3703 rp_saved = false;
3705 /* Allocate the local frame and set up the frame pointer if needed. */
3706 if (actual_fsize != 0)
3708 if (frame_pointer_needed)
3710 /* Copy the old frame pointer temporarily into %r1. Set up the
3711 new stack pointer, then store away the saved old frame pointer
3712 into the stack at sp and at the same time update the stack
3713 pointer by actual_fsize bytes. Two versions, first
3714 handles small (<8k) frames. The second handles large (>=8k)
3715 frames. */
3716 insn = emit_move_insn (tmpreg, frame_pointer_rtx);
3717 if (DO_FRAME_NOTES)
3718 RTX_FRAME_RELATED_P (insn) = 1;
3720 insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
3721 if (DO_FRAME_NOTES)
3722 RTX_FRAME_RELATED_P (insn) = 1;
3724 if (VAL_14_BITS_P (actual_fsize))
3725 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
3726 else
3728 /* It is incorrect to store the saved frame pointer at *sp,
3729 then increment sp (writes beyond the current stack boundary).
3731 So instead use stwm to store at *sp and post-increment the
3732 stack pointer as an atomic operation. Then increment sp to
3733 finish allocating the new frame. */
3734 HOST_WIDE_INT adjust1 = 8192 - 64;
3735 HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
3737 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
3738 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3739 adjust2, 1);
3742 /* We set SAVE_SP in frames that need a frame pointer. Thus,
3743 we need to store the previous stack pointer (frame pointer)
3744 into the frame marker on targets that use the HP unwind
3745 library. This allows the HP unwind library to be used to
3746 unwind GCC frames. However, we are not fully compatible
3747 with the HP library because our frame layout differs from
3748 that specified in the HP runtime specification.
3750 We don't want a frame note on this instruction as the frame
3751 marker moves during dynamic stack allocation.
3753 This instruction also serves as a blockage to prevent
3754 register spills from being scheduled before the stack
3755 pointer is raised. This is necessary as we store
3756 registers using the frame pointer as a base register,
3757 and the frame pointer is set before sp is raised. */
3758 if (TARGET_HPUX_UNWIND_LIBRARY)
3760 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
3761 GEN_INT (TARGET_64BIT ? -8 : -4));
3763 emit_move_insn (gen_rtx_MEM (word_mode, addr),
3764 frame_pointer_rtx);
3766 else
3767 emit_insn (gen_blockage ());
3769 /* no frame pointer needed. */
3770 else
3772 /* In some cases we can perform the first callee register save
3773 and allocating the stack frame at the same time. If so, just
3774 make a note of it and defer allocating the frame until saving
3775 the callee registers. */
3776 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
3777 merge_sp_adjust_with_store = 1;
3778 /* Can not optimize. Adjust the stack frame by actual_fsize
3779 bytes. */
3780 else
3781 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3782 actual_fsize, 1);
3786 /* Normal register save.
3788 Do not save the frame pointer in the frame_pointer_needed case. It
3789 was done earlier. */
3790 if (frame_pointer_needed)
3792 offset = local_fsize;
3794 /* Saving the EH return data registers in the frame is the simplest
3795 way to get the frame unwind information emitted. We put them
3796 just before the general registers. */
3797 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3799 unsigned int i, regno;
3801 for (i = 0; ; ++i)
3803 regno = EH_RETURN_DATA_REGNO (i);
3804 if (regno == INVALID_REGNUM)
3805 break;
3807 store_reg (regno, offset, FRAME_POINTER_REGNUM);
3808 offset += UNITS_PER_WORD;
3812 for (i = 18; i >= 4; i--)
3813 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
3815 store_reg (i, offset, FRAME_POINTER_REGNUM);
3816 offset += UNITS_PER_WORD;
3817 gr_saved++;
3819 /* Account for %r3 which is saved in a special place. */
3820 gr_saved++;
3822 /* No frame pointer needed. */
3823 else
3825 offset = local_fsize - actual_fsize;
3827 /* Saving the EH return data registers in the frame is the simplest
3828 way to get the frame unwind information emitted. */
3829 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3831 unsigned int i, regno;
3833 for (i = 0; ; ++i)
3835 regno = EH_RETURN_DATA_REGNO (i);
3836 if (regno == INVALID_REGNUM)
3837 break;
3839 /* If merge_sp_adjust_with_store is nonzero, then we can
3840 optimize the first save. */
3841 if (merge_sp_adjust_with_store)
3843 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
3844 merge_sp_adjust_with_store = 0;
3846 else
3847 store_reg (regno, offset, STACK_POINTER_REGNUM);
3848 offset += UNITS_PER_WORD;
3852 for (i = 18; i >= 3; i--)
3853 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
3855 /* If merge_sp_adjust_with_store is nonzero, then we can
3856 optimize the first GR save. */
3857 if (merge_sp_adjust_with_store)
3859 store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
3860 merge_sp_adjust_with_store = 0;
3862 else
3863 store_reg (i, offset, STACK_POINTER_REGNUM);
3864 offset += UNITS_PER_WORD;
3865 gr_saved++;
3868 /* If we wanted to merge the SP adjustment with a GR save, but we never
3869 did any GR saves, then just emit the adjustment here. */
3870 if (merge_sp_adjust_with_store)
3871 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3872 actual_fsize, 1);
3875 /* The hppa calling conventions say that %r19, the pic offset
3876 register, is saved at sp - 32 (in this function's frame)
3877 when generating PIC code. FIXME: What is the correct thing
3878 to do for functions which make no calls and allocate no
3879 frame? Do we need to allocate a frame, or can we just omit
3880 the save? For now we'll just omit the save.
3882 We don't want a note on this insn as the frame marker can
3883 move if there is a dynamic stack allocation. */
3884 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
3886 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
3888 emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
3892 /* Align pointer properly (doubleword boundary). */
3893 offset = (offset + 7) & ~7;
3895 /* Floating point register store. */
3896 if (save_fregs)
3898 rtx base;
3900 /* First get the frame or stack pointer to the start of the FP register
3901 save area. */
3902 if (frame_pointer_needed)
3904 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
3905 base = frame_pointer_rtx;
3907 else
3909 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
3910 base = stack_pointer_rtx;
3913 /* Now actually save the FP registers. */
3914 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3916 if (df_regs_ever_live_p (i)
3917 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3919 rtx addr, insn, reg;
3920 addr = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
3921 reg = gen_rtx_REG (DFmode, i);
3922 insn = emit_move_insn (addr, reg);
3923 if (DO_FRAME_NOTES)
3925 RTX_FRAME_RELATED_P (insn) = 1;
3926 if (TARGET_64BIT)
3928 rtx mem = gen_rtx_MEM (DFmode,
3929 plus_constant (base, offset));
3930 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3931 gen_rtx_SET (VOIDmode, mem, reg));
3933 else
3935 rtx meml = gen_rtx_MEM (SFmode,
3936 plus_constant (base, offset));
3937 rtx memr = gen_rtx_MEM (SFmode,
3938 plus_constant (base, offset + 4));
3939 rtx regl = gen_rtx_REG (SFmode, i);
3940 rtx regr = gen_rtx_REG (SFmode, i + 1);
3941 rtx setl = gen_rtx_SET (VOIDmode, meml, regl);
3942 rtx setr = gen_rtx_SET (VOIDmode, memr, regr);
3943 rtvec vec;
3945 RTX_FRAME_RELATED_P (setl) = 1;
3946 RTX_FRAME_RELATED_P (setr) = 1;
3947 vec = gen_rtvec (2, setl, setr);
3948 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3949 gen_rtx_SEQUENCE (VOIDmode, vec));
3952 offset += GET_MODE_SIZE (DFmode);
3953 fr_saved++;
3959 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
3960 Handle case where DISP > 8k by using the add_high_const patterns. */
3962 static void
3963 load_reg (int reg, HOST_WIDE_INT disp, int base)
3965 rtx dest = gen_rtx_REG (word_mode, reg);
3966 rtx basereg = gen_rtx_REG (Pmode, base);
3967 rtx src;
3969 if (VAL_14_BITS_P (disp))
3970 src = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3971 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3973 rtx delta = GEN_INT (disp);
3974 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3976 emit_move_insn (tmpreg, delta);
3977 if (TARGET_DISABLE_INDEXING)
3979 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3980 src = gen_rtx_MEM (word_mode, tmpreg);
3982 else
3983 src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3985 else
3987 rtx delta = GEN_INT (disp);
3988 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3989 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3991 emit_move_insn (tmpreg, high);
3992 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3995 emit_move_insn (dest, src);
3998 /* Update the total code bytes output to the text section. */
4000 static void
4001 update_total_code_bytes (unsigned int nbytes)
4003 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
4004 && !IN_NAMED_SECTION_P (cfun->decl))
4006 unsigned int old_total = total_code_bytes;
4008 total_code_bytes += nbytes;
4010 /* Be prepared to handle overflows. */
4011 if (old_total > total_code_bytes)
4012 total_code_bytes = UINT_MAX;
4016 /* This function generates the assembly code for function exit.
4017 Args are as for output_function_prologue ().
4019 The function epilogue should not depend on the current stack
4020 pointer! It should use the frame pointer only. This is mandatory
4021 because of alloca; we also take advantage of it to omit stack
4022 adjustments before returning. */
4024 static void
4025 pa_output_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4027 rtx insn = get_last_insn ();
4029 last_address = 0;
4031 /* hppa_expand_epilogue does the dirty work now. We just need
4032 to output the assembler directives which denote the end
4033 of a function.
4035 To make debuggers happy, emit a nop if the epilogue was completely
4036 eliminated due to a volatile call as the last insn in the
4037 current function. That way the return address (in %r2) will
4038 always point to a valid instruction in the current function. */
4040 /* Get the last real insn. */
4041 if (GET_CODE (insn) == NOTE)
4042 insn = prev_real_insn (insn);
4044 /* If it is a sequence, then look inside. */
4045 if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
4046 insn = XVECEXP (PATTERN (insn), 0, 0);
4048 /* If insn is a CALL_INSN, then it must be a call to a volatile
4049 function (otherwise there would be epilogue insns). */
4050 if (insn && GET_CODE (insn) == CALL_INSN)
4052 fputs ("\tnop\n", file);
4053 last_address += 4;
4056 fputs ("\t.EXIT\n\t.PROCEND\n", file);
4058 if (TARGET_SOM && TARGET_GAS)
4060 /* We done with this subspace except possibly for some additional
4061 debug information. Forget that we are in this subspace to ensure
4062 that the next function is output in its own subspace. */
4063 in_section = NULL;
4064 cfun->machine->in_nsubspa = 2;
4067 if (INSN_ADDRESSES_SET_P ())
4069 insn = get_last_nonnote_insn ();
4070 last_address += INSN_ADDRESSES (INSN_UID (insn));
4071 if (INSN_P (insn))
4072 last_address += insn_default_length (insn);
4073 last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4074 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
4076 else
4077 last_address = UINT_MAX;
4079 /* Finally, update the total number of code bytes output so far. */
4080 update_total_code_bytes (last_address);
4083 void
4084 hppa_expand_epilogue (void)
4086 rtx tmpreg;
4087 HOST_WIDE_INT offset;
4088 HOST_WIDE_INT ret_off = 0;
4089 int i;
4090 int merge_sp_adjust_with_load = 0;
4092 /* We will use this often. */
4093 tmpreg = gen_rtx_REG (word_mode, 1);
4095 /* Try to restore RP early to avoid load/use interlocks when
4096 RP gets used in the return (bv) instruction. This appears to still
4097 be necessary even when we schedule the prologue and epilogue. */
4098 if (rp_saved)
4100 ret_off = TARGET_64BIT ? -16 : -20;
4101 if (frame_pointer_needed)
4103 load_reg (2, ret_off, FRAME_POINTER_REGNUM);
4104 ret_off = 0;
4106 else
4108 /* No frame pointer, and stack is smaller than 8k. */
4109 if (VAL_14_BITS_P (ret_off - actual_fsize))
4111 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4112 ret_off = 0;
4117 /* General register restores. */
4118 if (frame_pointer_needed)
4120 offset = local_fsize;
4122 /* If the current function calls __builtin_eh_return, then we need
4123 to restore the saved EH data registers. */
4124 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4126 unsigned int i, regno;
4128 for (i = 0; ; ++i)
4130 regno = EH_RETURN_DATA_REGNO (i);
4131 if (regno == INVALID_REGNUM)
4132 break;
4134 load_reg (regno, offset, FRAME_POINTER_REGNUM);
4135 offset += UNITS_PER_WORD;
4139 for (i = 18; i >= 4; i--)
4140 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4142 load_reg (i, offset, FRAME_POINTER_REGNUM);
4143 offset += UNITS_PER_WORD;
4146 else
4148 offset = local_fsize - actual_fsize;
4150 /* If the current function calls __builtin_eh_return, then we need
4151 to restore the saved EH data registers. */
4152 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4154 unsigned int i, regno;
4156 for (i = 0; ; ++i)
4158 regno = EH_RETURN_DATA_REGNO (i);
4159 if (regno == INVALID_REGNUM)
4160 break;
4162 /* Only for the first load.
4163 merge_sp_adjust_with_load holds the register load
4164 with which we will merge the sp adjustment. */
4165 if (merge_sp_adjust_with_load == 0
4166 && local_fsize == 0
4167 && VAL_14_BITS_P (-actual_fsize))
4168 merge_sp_adjust_with_load = regno;
4169 else
4170 load_reg (regno, offset, STACK_POINTER_REGNUM);
4171 offset += UNITS_PER_WORD;
4175 for (i = 18; i >= 3; i--)
4177 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4179 /* Only for the first load.
4180 merge_sp_adjust_with_load holds the register load
4181 with which we will merge the sp adjustment. */
4182 if (merge_sp_adjust_with_load == 0
4183 && local_fsize == 0
4184 && VAL_14_BITS_P (-actual_fsize))
4185 merge_sp_adjust_with_load = i;
4186 else
4187 load_reg (i, offset, STACK_POINTER_REGNUM);
4188 offset += UNITS_PER_WORD;
4193 /* Align pointer properly (doubleword boundary). */
4194 offset = (offset + 7) & ~7;
4196 /* FP register restores. */
4197 if (save_fregs)
4199 /* Adjust the register to index off of. */
4200 if (frame_pointer_needed)
4201 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
4202 else
4203 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4205 /* Actually do the restores now. */
4206 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4207 if (df_regs_ever_live_p (i)
4208 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4210 rtx src = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
4211 rtx dest = gen_rtx_REG (DFmode, i);
4212 emit_move_insn (dest, src);
4216 /* Emit a blockage insn here to keep these insns from being moved to
4217 an earlier spot in the epilogue, or into the main instruction stream.
4219 This is necessary as we must not cut the stack back before all the
4220 restores are finished. */
4221 emit_insn (gen_blockage ());
4223 /* Reset stack pointer (and possibly frame pointer). The stack
4224 pointer is initially set to fp + 64 to avoid a race condition. */
4225 if (frame_pointer_needed)
4227 rtx delta = GEN_INT (-64);
4229 set_reg_plus_d (STACK_POINTER_REGNUM, FRAME_POINTER_REGNUM, 64, 0);
4230 emit_insn (gen_pre_load (frame_pointer_rtx, stack_pointer_rtx, delta));
4232 /* If we were deferring a callee register restore, do it now. */
4233 else if (merge_sp_adjust_with_load)
4235 rtx delta = GEN_INT (-actual_fsize);
4236 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4238 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4240 else if (actual_fsize != 0)
4241 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4242 - actual_fsize, 0);
4244 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4245 frame greater than 8k), do so now. */
4246 if (ret_off != 0)
4247 load_reg (2, ret_off, STACK_POINTER_REGNUM);
4249 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4251 rtx sa = EH_RETURN_STACKADJ_RTX;
4253 emit_insn (gen_blockage ());
4254 emit_insn (TARGET_64BIT
4255 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4256 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4261 hppa_pic_save_rtx (void)
4263 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4266 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4267 #define NO_DEFERRED_PROFILE_COUNTERS 0
4268 #endif
4271 /* Vector of funcdef numbers. */
4272 static VEC(int,heap) *funcdef_nos;
4274 /* Output deferred profile counters. */
4275 static void
4276 output_deferred_profile_counters (void)
4278 unsigned int i;
4279 int align, n;
4281 if (VEC_empty (int, funcdef_nos))
4282 return;
4284 switch_to_section (data_section);
4285 align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4286 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4288 for (i = 0; VEC_iterate (int, funcdef_nos, i, n); i++)
4290 targetm.asm_out.internal_label (asm_out_file, "LP", n);
4291 assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4294 VEC_free (int, heap, funcdef_nos);
4297 void
4298 hppa_profile_hook (int label_no)
4300 /* We use SImode for the address of the function in both 32 and
4301 64-bit code to avoid having to provide DImode versions of the
4302 lcla2 and load_offset_label_address insn patterns. */
4303 rtx reg = gen_reg_rtx (SImode);
4304 rtx label_rtx = gen_label_rtx ();
4305 rtx begin_label_rtx, call_insn;
4306 char begin_label_name[16];
4308 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4309 label_no);
4310 begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4312 if (TARGET_64BIT)
4313 emit_move_insn (arg_pointer_rtx,
4314 gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
4315 GEN_INT (64)));
4317 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4319 /* The address of the function is loaded into %r25 with an instruction-
4320 relative sequence that avoids the use of relocations. The sequence
4321 is split so that the load_offset_label_address instruction can
4322 occupy the delay slot of the call to _mcount. */
4323 if (TARGET_PA_20)
4324 emit_insn (gen_lcla2 (reg, label_rtx));
4325 else
4326 emit_insn (gen_lcla1 (reg, label_rtx));
4328 emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25),
4329 reg, begin_label_rtx, label_rtx));
4331 #if !NO_DEFERRED_PROFILE_COUNTERS
4333 rtx count_label_rtx, addr, r24;
4334 char count_label_name[16];
4336 VEC_safe_push (int, heap, funcdef_nos, label_no);
4337 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4338 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name));
4340 addr = force_reg (Pmode, count_label_rtx);
4341 r24 = gen_rtx_REG (Pmode, 24);
4342 emit_move_insn (r24, addr);
4344 call_insn =
4345 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4346 gen_rtx_SYMBOL_REF (Pmode,
4347 "_mcount")),
4348 GEN_INT (TARGET_64BIT ? 24 : 12)));
4350 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4352 #else
4354 call_insn =
4355 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4356 gen_rtx_SYMBOL_REF (Pmode,
4357 "_mcount")),
4358 GEN_INT (TARGET_64BIT ? 16 : 8)));
4360 #endif
4362 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4363 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4365 /* Indicate the _mcount call cannot throw, nor will it execute a
4366 non-local goto. */
4367 make_reg_eh_region_note_nothrow_nononlocal (call_insn);
4370 /* Fetch the return address for the frame COUNT steps up from
4371 the current frame, after the prologue. FRAMEADDR is the
4372 frame pointer of the COUNT frame.
4374 We want to ignore any export stub remnants here. To handle this,
4375 we examine the code at the return address, and if it is an export
4376 stub, we return a memory rtx for the stub return address stored
4377 at frame-24.
4379 The value returned is used in two different ways:
4381 1. To find a function's caller.
4383 2. To change the return address for a function.
4385 This function handles most instances of case 1; however, it will
4386 fail if there are two levels of stubs to execute on the return
4387 path. The only way I believe that can happen is if the return value
4388 needs a parameter relocation, which never happens for C code.
4390 This function handles most instances of case 2; however, it will
4391 fail if we did not originally have stub code on the return path
4392 but will need stub code on the new return path. This can happen if
4393 the caller & callee are both in the main program, but the new
4394 return location is in a shared library. */
4397 return_addr_rtx (int count, rtx frameaddr)
4399 rtx label;
4400 rtx rp;
4401 rtx saved_rp;
4402 rtx ins;
4404 /* Instruction stream at the normal return address for the export stub:
4406 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4407 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4408 0x00011820 | stub+16: mtsp r1,sr0
4409 0xe0400002 | stub+20: be,n 0(sr0,rp)
4411 0xe0400002 must be specified as -532676606 so that it won't be
4412 rejected as an invalid immediate operand on 64-bit hosts. */
4414 HOST_WIDE_INT insns[4] = {0x4bc23fd1, 0x004010a1, 0x00011820, -532676606};
4415 int i;
4417 if (count != 0)
4418 return NULL_RTX;
4420 rp = get_hard_reg_initial_val (Pmode, 2);
4422 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4423 return rp;
4425 /* If there is no export stub then just use the value saved from
4426 the return pointer register. */
4428 saved_rp = gen_reg_rtx (Pmode);
4429 emit_move_insn (saved_rp, rp);
4431 /* Get pointer to the instruction stream. We have to mask out the
4432 privilege level from the two low order bits of the return address
4433 pointer here so that ins will point to the start of the first
4434 instruction that would have been executed if we returned. */
4435 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4436 label = gen_label_rtx ();
4438 /* Check the instruction stream at the normal return address for the
4439 export stub. If it is an export stub, than our return address is
4440 really in -24[frameaddr]. */
4442 for (i = 0; i < 3; i++)
4444 rtx op0 = gen_rtx_MEM (SImode, plus_constant (ins, i * 4));
4445 rtx op1 = GEN_INT (insns[i]);
4446 emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label);
4449 /* Here we know that our return address points to an export
4450 stub. We don't want to return the address of the export stub,
4451 but rather the return address of the export stub. That return
4452 address is stored at -24[frameaddr]. */
4454 emit_move_insn (saved_rp,
4455 gen_rtx_MEM (Pmode,
4456 memory_address (Pmode,
4457 plus_constant (frameaddr,
4458 -24))));
4460 emit_label (label);
4462 return saved_rp;
4465 void
4466 emit_bcond_fp (rtx operands[])
4468 enum rtx_code code = GET_CODE (operands[0]);
4469 rtx operand0 = operands[1];
4470 rtx operand1 = operands[2];
4471 rtx label = operands[3];
4473 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0),
4474 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1)));
4476 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4477 gen_rtx_IF_THEN_ELSE (VOIDmode,
4478 gen_rtx_fmt_ee (NE,
4479 VOIDmode,
4480 gen_rtx_REG (CCFPmode, 0),
4481 const0_rtx),
4482 gen_rtx_LABEL_REF (VOIDmode, label),
4483 pc_rtx)));
4487 /* Adjust the cost of a scheduling dependency. Return the new cost of
4488 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4490 static int
4491 pa_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
4493 enum attr_type attr_type;
4495 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4496 true dependencies as they are described with bypasses now. */
4497 if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0)
4498 return cost;
4500 if (! recog_memoized (insn))
4501 return 0;
4503 attr_type = get_attr_type (insn);
4505 switch (REG_NOTE_KIND (link))
4507 case REG_DEP_ANTI:
4508 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4509 cycles later. */
4511 if (attr_type == TYPE_FPLOAD)
4513 rtx pat = PATTERN (insn);
4514 rtx dep_pat = PATTERN (dep_insn);
4515 if (GET_CODE (pat) == PARALLEL)
4517 /* This happens for the fldXs,mb patterns. */
4518 pat = XVECEXP (pat, 0, 0);
4520 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4521 /* If this happens, we have to extend this to schedule
4522 optimally. Return 0 for now. */
4523 return 0;
4525 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4527 if (! recog_memoized (dep_insn))
4528 return 0;
4529 switch (get_attr_type (dep_insn))
4531 case TYPE_FPALU:
4532 case TYPE_FPMULSGL:
4533 case TYPE_FPMULDBL:
4534 case TYPE_FPDIVSGL:
4535 case TYPE_FPDIVDBL:
4536 case TYPE_FPSQRTSGL:
4537 case TYPE_FPSQRTDBL:
4538 /* A fpload can't be issued until one cycle before a
4539 preceding arithmetic operation has finished if
4540 the target of the fpload is any of the sources
4541 (or destination) of the arithmetic operation. */
4542 return insn_default_latency (dep_insn) - 1;
4544 default:
4545 return 0;
4549 else if (attr_type == TYPE_FPALU)
4551 rtx pat = PATTERN (insn);
4552 rtx dep_pat = PATTERN (dep_insn);
4553 if (GET_CODE (pat) == PARALLEL)
4555 /* This happens for the fldXs,mb patterns. */
4556 pat = XVECEXP (pat, 0, 0);
4558 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4559 /* If this happens, we have to extend this to schedule
4560 optimally. Return 0 for now. */
4561 return 0;
4563 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4565 if (! recog_memoized (dep_insn))
4566 return 0;
4567 switch (get_attr_type (dep_insn))
4569 case TYPE_FPDIVSGL:
4570 case TYPE_FPDIVDBL:
4571 case TYPE_FPSQRTSGL:
4572 case TYPE_FPSQRTDBL:
4573 /* An ALU flop can't be issued until two cycles before a
4574 preceding divide or sqrt operation has finished if
4575 the target of the ALU flop is any of the sources
4576 (or destination) of the divide or sqrt operation. */
4577 return insn_default_latency (dep_insn) - 2;
4579 default:
4580 return 0;
4585 /* For other anti dependencies, the cost is 0. */
4586 return 0;
4588 case REG_DEP_OUTPUT:
4589 /* Output dependency; DEP_INSN writes a register that INSN writes some
4590 cycles later. */
4591 if (attr_type == TYPE_FPLOAD)
4593 rtx pat = PATTERN (insn);
4594 rtx dep_pat = PATTERN (dep_insn);
4595 if (GET_CODE (pat) == PARALLEL)
4597 /* This happens for the fldXs,mb patterns. */
4598 pat = XVECEXP (pat, 0, 0);
4600 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4601 /* If this happens, we have to extend this to schedule
4602 optimally. Return 0 for now. */
4603 return 0;
4605 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4607 if (! recog_memoized (dep_insn))
4608 return 0;
4609 switch (get_attr_type (dep_insn))
4611 case TYPE_FPALU:
4612 case TYPE_FPMULSGL:
4613 case TYPE_FPMULDBL:
4614 case TYPE_FPDIVSGL:
4615 case TYPE_FPDIVDBL:
4616 case TYPE_FPSQRTSGL:
4617 case TYPE_FPSQRTDBL:
4618 /* A fpload can't be issued until one cycle before a
4619 preceding arithmetic operation has finished if
4620 the target of the fpload is the destination of the
4621 arithmetic operation.
4623 Exception: For PA7100LC, PA7200 and PA7300, the cost
4624 is 3 cycles, unless they bundle together. We also
4625 pay the penalty if the second insn is a fpload. */
4626 return insn_default_latency (dep_insn) - 1;
4628 default:
4629 return 0;
4633 else if (attr_type == TYPE_FPALU)
4635 rtx pat = PATTERN (insn);
4636 rtx dep_pat = PATTERN (dep_insn);
4637 if (GET_CODE (pat) == PARALLEL)
4639 /* This happens for the fldXs,mb patterns. */
4640 pat = XVECEXP (pat, 0, 0);
4642 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4643 /* If this happens, we have to extend this to schedule
4644 optimally. Return 0 for now. */
4645 return 0;
4647 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4649 if (! recog_memoized (dep_insn))
4650 return 0;
4651 switch (get_attr_type (dep_insn))
4653 case TYPE_FPDIVSGL:
4654 case TYPE_FPDIVDBL:
4655 case TYPE_FPSQRTSGL:
4656 case TYPE_FPSQRTDBL:
4657 /* An ALU flop can't be issued until two cycles before a
4658 preceding divide or sqrt operation has finished if
4659 the target of the ALU flop is also the target of
4660 the divide or sqrt operation. */
4661 return insn_default_latency (dep_insn) - 2;
4663 default:
4664 return 0;
4669 /* For other output dependencies, the cost is 0. */
4670 return 0;
4672 default:
4673 gcc_unreachable ();
4677 /* Adjust scheduling priorities. We use this to try and keep addil
4678 and the next use of %r1 close together. */
4679 static int
4680 pa_adjust_priority (rtx insn, int priority)
4682 rtx set = single_set (insn);
4683 rtx src, dest;
4684 if (set)
4686 src = SET_SRC (set);
4687 dest = SET_DEST (set);
4688 if (GET_CODE (src) == LO_SUM
4689 && symbolic_operand (XEXP (src, 1), VOIDmode)
4690 && ! read_only_operand (XEXP (src, 1), VOIDmode))
4691 priority >>= 3;
4693 else if (GET_CODE (src) == MEM
4694 && GET_CODE (XEXP (src, 0)) == LO_SUM
4695 && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
4696 && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
4697 priority >>= 1;
4699 else if (GET_CODE (dest) == MEM
4700 && GET_CODE (XEXP (dest, 0)) == LO_SUM
4701 && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
4702 && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
4703 priority >>= 3;
4705 return priority;
4708 /* The 700 can only issue a single insn at a time.
4709 The 7XXX processors can issue two insns at a time.
4710 The 8000 can issue 4 insns at a time. */
4711 static int
4712 pa_issue_rate (void)
4714 switch (pa_cpu)
4716 case PROCESSOR_700: return 1;
4717 case PROCESSOR_7100: return 2;
4718 case PROCESSOR_7100LC: return 2;
4719 case PROCESSOR_7200: return 2;
4720 case PROCESSOR_7300: return 2;
4721 case PROCESSOR_8000: return 4;
4723 default:
4724 gcc_unreachable ();
4730 /* Return any length adjustment needed by INSN which already has its length
4731 computed as LENGTH. Return zero if no adjustment is necessary.
4733 For the PA: function calls, millicode calls, and backwards short
4734 conditional branches with unfilled delay slots need an adjustment by +1
4735 (to account for the NOP which will be inserted into the instruction stream).
4737 Also compute the length of an inline block move here as it is too
4738 complicated to express as a length attribute in pa.md. */
4740 pa_adjust_insn_length (rtx insn, int length)
4742 rtx pat = PATTERN (insn);
4744 /* Jumps inside switch tables which have unfilled delay slots need
4745 adjustment. */
4746 if (GET_CODE (insn) == JUMP_INSN
4747 && GET_CODE (pat) == PARALLEL
4748 && get_attr_type (insn) == TYPE_BTABLE_BRANCH)
4749 return 4;
4750 /* Millicode insn with an unfilled delay slot. */
4751 else if (GET_CODE (insn) == INSN
4752 && GET_CODE (pat) != SEQUENCE
4753 && GET_CODE (pat) != USE
4754 && GET_CODE (pat) != CLOBBER
4755 && get_attr_type (insn) == TYPE_MILLI)
4756 return 4;
4757 /* Block move pattern. */
4758 else if (GET_CODE (insn) == INSN
4759 && GET_CODE (pat) == PARALLEL
4760 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4761 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4762 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
4763 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
4764 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
4765 return compute_movmem_length (insn) - 4;
4766 /* Block clear pattern. */
4767 else if (GET_CODE (insn) == INSN
4768 && GET_CODE (pat) == PARALLEL
4769 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4770 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4771 && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
4772 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
4773 return compute_clrmem_length (insn) - 4;
4774 /* Conditional branch with an unfilled delay slot. */
4775 else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
4777 /* Adjust a short backwards conditional with an unfilled delay slot. */
4778 if (GET_CODE (pat) == SET
4779 && length == 4
4780 && JUMP_LABEL (insn) != NULL_RTX
4781 && ! forward_branch_p (insn))
4782 return 4;
4783 else if (GET_CODE (pat) == PARALLEL
4784 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
4785 && length == 4)
4786 return 4;
4787 /* Adjust dbra insn with short backwards conditional branch with
4788 unfilled delay slot -- only for case where counter is in a
4789 general register register. */
4790 else if (GET_CODE (pat) == PARALLEL
4791 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
4792 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
4793 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
4794 && length == 4
4795 && ! forward_branch_p (insn))
4796 return 4;
4797 else
4798 return 0;
4800 return 0;
4803 /* Print operand X (an rtx) in assembler syntax to file FILE.
4804 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
4805 For `%' followed by punctuation, CODE is the punctuation and X is null. */
4807 void
4808 print_operand (FILE *file, rtx x, int code)
4810 switch (code)
4812 case '#':
4813 /* Output a 'nop' if there's nothing for the delay slot. */
4814 if (dbr_sequence_length () == 0)
4815 fputs ("\n\tnop", file);
4816 return;
4817 case '*':
4818 /* Output a nullification completer if there's nothing for the */
4819 /* delay slot or nullification is requested. */
4820 if (dbr_sequence_length () == 0 ||
4821 (final_sequence &&
4822 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
4823 fputs (",n", file);
4824 return;
4825 case 'R':
4826 /* Print out the second register name of a register pair.
4827 I.e., R (6) => 7. */
4828 fputs (reg_names[REGNO (x) + 1], file);
4829 return;
4830 case 'r':
4831 /* A register or zero. */
4832 if (x == const0_rtx
4833 || (x == CONST0_RTX (DFmode))
4834 || (x == CONST0_RTX (SFmode)))
4836 fputs ("%r0", file);
4837 return;
4839 else
4840 break;
4841 case 'f':
4842 /* A register or zero (floating point). */
4843 if (x == const0_rtx
4844 || (x == CONST0_RTX (DFmode))
4845 || (x == CONST0_RTX (SFmode)))
4847 fputs ("%fr0", file);
4848 return;
4850 else
4851 break;
4852 case 'A':
4854 rtx xoperands[2];
4856 xoperands[0] = XEXP (XEXP (x, 0), 0);
4857 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
4858 output_global_address (file, xoperands[1], 0);
4859 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
4860 return;
4863 case 'C': /* Plain (C)ondition */
4864 case 'X':
4865 switch (GET_CODE (x))
4867 case EQ:
4868 fputs ("=", file); break;
4869 case NE:
4870 fputs ("<>", file); break;
4871 case GT:
4872 fputs (">", file); break;
4873 case GE:
4874 fputs (">=", file); break;
4875 case GEU:
4876 fputs (">>=", file); break;
4877 case GTU:
4878 fputs (">>", file); break;
4879 case LT:
4880 fputs ("<", file); break;
4881 case LE:
4882 fputs ("<=", file); break;
4883 case LEU:
4884 fputs ("<<=", file); break;
4885 case LTU:
4886 fputs ("<<", file); break;
4887 default:
4888 gcc_unreachable ();
4890 return;
4891 case 'N': /* Condition, (N)egated */
4892 switch (GET_CODE (x))
4894 case EQ:
4895 fputs ("<>", file); break;
4896 case NE:
4897 fputs ("=", file); break;
4898 case GT:
4899 fputs ("<=", file); break;
4900 case GE:
4901 fputs ("<", file); break;
4902 case GEU:
4903 fputs ("<<", file); break;
4904 case GTU:
4905 fputs ("<<=", file); break;
4906 case LT:
4907 fputs (">=", file); break;
4908 case LE:
4909 fputs (">", file); break;
4910 case LEU:
4911 fputs (">>", file); break;
4912 case LTU:
4913 fputs (">>=", file); break;
4914 default:
4915 gcc_unreachable ();
4917 return;
4918 /* For floating point comparisons. Note that the output
4919 predicates are the complement of the desired mode. The
4920 conditions for GT, GE, LT, LE and LTGT cause an invalid
4921 operation exception if the result is unordered and this
4922 exception is enabled in the floating-point status register. */
4923 case 'Y':
4924 switch (GET_CODE (x))
4926 case EQ:
4927 fputs ("!=", file); break;
4928 case NE:
4929 fputs ("=", file); break;
4930 case GT:
4931 fputs ("!>", file); break;
4932 case GE:
4933 fputs ("!>=", file); break;
4934 case LT:
4935 fputs ("!<", file); break;
4936 case LE:
4937 fputs ("!<=", file); break;
4938 case LTGT:
4939 fputs ("!<>", file); break;
4940 case UNLE:
4941 fputs ("!?<=", file); break;
4942 case UNLT:
4943 fputs ("!?<", file); break;
4944 case UNGE:
4945 fputs ("!?>=", file); break;
4946 case UNGT:
4947 fputs ("!?>", file); break;
4948 case UNEQ:
4949 fputs ("!?=", file); break;
4950 case UNORDERED:
4951 fputs ("!?", file); break;
4952 case ORDERED:
4953 fputs ("?", file); break;
4954 default:
4955 gcc_unreachable ();
4957 return;
4958 case 'S': /* Condition, operands are (S)wapped. */
4959 switch (GET_CODE (x))
4961 case EQ:
4962 fputs ("=", file); break;
4963 case NE:
4964 fputs ("<>", file); break;
4965 case GT:
4966 fputs ("<", file); break;
4967 case GE:
4968 fputs ("<=", file); break;
4969 case GEU:
4970 fputs ("<<=", file); break;
4971 case GTU:
4972 fputs ("<<", file); break;
4973 case LT:
4974 fputs (">", file); break;
4975 case LE:
4976 fputs (">=", file); break;
4977 case LEU:
4978 fputs (">>=", file); break;
4979 case LTU:
4980 fputs (">>", file); break;
4981 default:
4982 gcc_unreachable ();
4984 return;
4985 case 'B': /* Condition, (B)oth swapped and negate. */
4986 switch (GET_CODE (x))
4988 case EQ:
4989 fputs ("<>", file); break;
4990 case NE:
4991 fputs ("=", file); break;
4992 case GT:
4993 fputs (">=", file); break;
4994 case GE:
4995 fputs (">", file); break;
4996 case GEU:
4997 fputs (">>", file); break;
4998 case GTU:
4999 fputs (">>=", file); break;
5000 case LT:
5001 fputs ("<=", file); break;
5002 case LE:
5003 fputs ("<", file); break;
5004 case LEU:
5005 fputs ("<<", file); break;
5006 case LTU:
5007 fputs ("<<=", file); break;
5008 default:
5009 gcc_unreachable ();
5011 return;
5012 case 'k':
5013 gcc_assert (GET_CODE (x) == CONST_INT);
5014 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
5015 return;
5016 case 'Q':
5017 gcc_assert (GET_CODE (x) == CONST_INT);
5018 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
5019 return;
5020 case 'L':
5021 gcc_assert (GET_CODE (x) == CONST_INT);
5022 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
5023 return;
5024 case 'O':
5025 gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
5026 fprintf (file, "%d", exact_log2 (INTVAL (x)));
5027 return;
5028 case 'p':
5029 gcc_assert (GET_CODE (x) == CONST_INT);
5030 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
5031 return;
5032 case 'P':
5033 gcc_assert (GET_CODE (x) == CONST_INT);
5034 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
5035 return;
5036 case 'I':
5037 if (GET_CODE (x) == CONST_INT)
5038 fputs ("i", file);
5039 return;
5040 case 'M':
5041 case 'F':
5042 switch (GET_CODE (XEXP (x, 0)))
5044 case PRE_DEC:
5045 case PRE_INC:
5046 if (ASSEMBLER_DIALECT == 0)
5047 fputs ("s,mb", file);
5048 else
5049 fputs (",mb", file);
5050 break;
5051 case POST_DEC:
5052 case POST_INC:
5053 if (ASSEMBLER_DIALECT == 0)
5054 fputs ("s,ma", file);
5055 else
5056 fputs (",ma", file);
5057 break;
5058 case PLUS:
5059 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5060 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5062 if (ASSEMBLER_DIALECT == 0)
5063 fputs ("x", file);
5065 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5066 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5068 if (ASSEMBLER_DIALECT == 0)
5069 fputs ("x,s", file);
5070 else
5071 fputs (",s", file);
5073 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
5074 fputs ("s", file);
5075 break;
5076 default:
5077 if (code == 'F' && ASSEMBLER_DIALECT == 0)
5078 fputs ("s", file);
5079 break;
5081 return;
5082 case 'G':
5083 output_global_address (file, x, 0);
5084 return;
5085 case 'H':
5086 output_global_address (file, x, 1);
5087 return;
5088 case 0: /* Don't do anything special */
5089 break;
5090 case 'Z':
5092 unsigned op[3];
5093 compute_zdepwi_operands (INTVAL (x), op);
5094 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5095 return;
5097 case 'z':
5099 unsigned op[3];
5100 compute_zdepdi_operands (INTVAL (x), op);
5101 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5102 return;
5104 case 'c':
5105 /* We can get here from a .vtable_inherit due to our
5106 CONSTANT_ADDRESS_P rejecting perfectly good constant
5107 addresses. */
5108 break;
5109 default:
5110 gcc_unreachable ();
5112 if (GET_CODE (x) == REG)
5114 fputs (reg_names [REGNO (x)], file);
5115 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5117 fputs ("R", file);
5118 return;
5120 if (FP_REG_P (x)
5121 && GET_MODE_SIZE (GET_MODE (x)) <= 4
5122 && (REGNO (x) & 1) == 0)
5123 fputs ("L", file);
5125 else if (GET_CODE (x) == MEM)
5127 int size = GET_MODE_SIZE (GET_MODE (x));
5128 rtx base = NULL_RTX;
5129 switch (GET_CODE (XEXP (x, 0)))
5131 case PRE_DEC:
5132 case POST_DEC:
5133 base = XEXP (XEXP (x, 0), 0);
5134 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5135 break;
5136 case PRE_INC:
5137 case POST_INC:
5138 base = XEXP (XEXP (x, 0), 0);
5139 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5140 break;
5141 case PLUS:
5142 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5143 fprintf (file, "%s(%s)",
5144 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5145 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5146 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5147 fprintf (file, "%s(%s)",
5148 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5149 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5150 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5151 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5153 /* Because the REG_POINTER flag can get lost during reload,
5154 GO_IF_LEGITIMATE_ADDRESS canonicalizes the order of the
5155 index and base registers in the combined move patterns. */
5156 rtx base = XEXP (XEXP (x, 0), 1);
5157 rtx index = XEXP (XEXP (x, 0), 0);
5159 fprintf (file, "%s(%s)",
5160 reg_names [REGNO (index)], reg_names [REGNO (base)]);
5162 else
5163 output_address (XEXP (x, 0));
5164 break;
5165 default:
5166 output_address (XEXP (x, 0));
5167 break;
5170 else
5171 output_addr_const (file, x);
5174 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
5176 void
5177 output_global_address (FILE *file, rtx x, int round_constant)
5180 /* Imagine (high (const (plus ...))). */
5181 if (GET_CODE (x) == HIGH)
5182 x = XEXP (x, 0);
5184 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5185 output_addr_const (file, x);
5186 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5188 output_addr_const (file, x);
5189 fputs ("-$global$", file);
5191 else if (GET_CODE (x) == CONST)
5193 const char *sep = "";
5194 int offset = 0; /* assembler wants -$global$ at end */
5195 rtx base = NULL_RTX;
5197 switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
5199 case SYMBOL_REF:
5200 base = XEXP (XEXP (x, 0), 0);
5201 output_addr_const (file, base);
5202 break;
5203 case CONST_INT:
5204 offset = INTVAL (XEXP (XEXP (x, 0), 0));
5205 break;
5206 default:
5207 gcc_unreachable ();
5210 switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
5212 case SYMBOL_REF:
5213 base = XEXP (XEXP (x, 0), 1);
5214 output_addr_const (file, base);
5215 break;
5216 case CONST_INT:
5217 offset = INTVAL (XEXP (XEXP (x, 0), 1));
5218 break;
5219 default:
5220 gcc_unreachable ();
5223 /* How bogus. The compiler is apparently responsible for
5224 rounding the constant if it uses an LR field selector.
5226 The linker and/or assembler seem a better place since
5227 they have to do this kind of thing already.
5229 If we fail to do this, HP's optimizing linker may eliminate
5230 an addil, but not update the ldw/stw/ldo instruction that
5231 uses the result of the addil. */
5232 if (round_constant)
5233 offset = ((offset + 0x1000) & ~0x1fff);
5235 switch (GET_CODE (XEXP (x, 0)))
5237 case PLUS:
5238 if (offset < 0)
5240 offset = -offset;
5241 sep = "-";
5243 else
5244 sep = "+";
5245 break;
5247 case MINUS:
5248 gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5249 sep = "-";
5250 break;
5252 default:
5253 gcc_unreachable ();
5256 if (!read_only_operand (base, VOIDmode) && !flag_pic)
5257 fputs ("-$global$", file);
5258 if (offset)
5259 fprintf (file, "%s%d", sep, offset);
5261 else
5262 output_addr_const (file, x);
5265 /* Output boilerplate text to appear at the beginning of the file.
5266 There are several possible versions. */
5267 #define aputs(x) fputs(x, asm_out_file)
5268 static inline void
5269 pa_file_start_level (void)
5271 if (TARGET_64BIT)
5272 aputs ("\t.LEVEL 2.0w\n");
5273 else if (TARGET_PA_20)
5274 aputs ("\t.LEVEL 2.0\n");
5275 else if (TARGET_PA_11)
5276 aputs ("\t.LEVEL 1.1\n");
5277 else
5278 aputs ("\t.LEVEL 1.0\n");
5281 static inline void
5282 pa_file_start_space (int sortspace)
5284 aputs ("\t.SPACE $PRIVATE$");
5285 if (sortspace)
5286 aputs (",SORT=16");
5287 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31"
5288 "\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5289 "\n\t.SPACE $TEXT$");
5290 if (sortspace)
5291 aputs (",SORT=8");
5292 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5293 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5296 static inline void
5297 pa_file_start_file (int want_version)
5299 if (write_symbols != NO_DEBUG)
5301 output_file_directive (asm_out_file, main_input_filename);
5302 if (want_version)
5303 aputs ("\t.version\t\"01.01\"\n");
5307 static inline void
5308 pa_file_start_mcount (const char *aswhat)
5310 if (profile_flag)
5311 fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5314 static void
5315 pa_elf_file_start (void)
5317 pa_file_start_level ();
5318 pa_file_start_mcount ("ENTRY");
5319 pa_file_start_file (0);
5322 static void
5323 pa_som_file_start (void)
5325 pa_file_start_level ();
5326 pa_file_start_space (0);
5327 aputs ("\t.IMPORT $global$,DATA\n"
5328 "\t.IMPORT $$dyncall,MILLICODE\n");
5329 pa_file_start_mcount ("CODE");
5330 pa_file_start_file (0);
5333 static void
5334 pa_linux_file_start (void)
5336 pa_file_start_file (1);
5337 pa_file_start_level ();
5338 pa_file_start_mcount ("CODE");
5341 static void
5342 pa_hpux64_gas_file_start (void)
5344 pa_file_start_level ();
5345 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5346 if (profile_flag)
5347 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5348 #endif
5349 pa_file_start_file (1);
5352 static void
5353 pa_hpux64_hpas_file_start (void)
5355 pa_file_start_level ();
5356 pa_file_start_space (1);
5357 pa_file_start_mcount ("CODE");
5358 pa_file_start_file (0);
5360 #undef aputs
5362 /* Search the deferred plabel list for SYMBOL and return its internal
5363 label. If an entry for SYMBOL is not found, a new entry is created. */
5366 get_deferred_plabel (rtx symbol)
5368 const char *fname = XSTR (symbol, 0);
5369 size_t i;
5371 /* See if we have already put this function on the list of deferred
5372 plabels. This list is generally small, so a liner search is not
5373 too ugly. If it proves too slow replace it with something faster. */
5374 for (i = 0; i < n_deferred_plabels; i++)
5375 if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
5376 break;
5378 /* If the deferred plabel list is empty, or this entry was not found
5379 on the list, create a new entry on the list. */
5380 if (deferred_plabels == NULL || i == n_deferred_plabels)
5382 tree id;
5384 if (deferred_plabels == 0)
5385 deferred_plabels = (struct deferred_plabel *)
5386 ggc_alloc (sizeof (struct deferred_plabel));
5387 else
5388 deferred_plabels = (struct deferred_plabel *)
5389 ggc_realloc (deferred_plabels,
5390 ((n_deferred_plabels + 1)
5391 * sizeof (struct deferred_plabel)));
5393 i = n_deferred_plabels++;
5394 deferred_plabels[i].internal_label = gen_label_rtx ();
5395 deferred_plabels[i].symbol = symbol;
5397 /* Gross. We have just implicitly taken the address of this
5398 function. Mark it in the same manner as assemble_name. */
5399 id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5400 if (id)
5401 mark_referenced (id);
5404 return deferred_plabels[i].internal_label;
5407 static void
5408 output_deferred_plabels (void)
5410 size_t i;
5412 /* If we have some deferred plabels, then we need to switch into the
5413 data or readonly data section, and align it to a 4 byte boundary
5414 before outputting the deferred plabels. */
5415 if (n_deferred_plabels)
5417 switch_to_section (flag_pic ? data_section : readonly_data_section);
5418 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5421 /* Now output the deferred plabels. */
5422 for (i = 0; i < n_deferred_plabels; i++)
5424 targetm.asm_out.internal_label (asm_out_file, "L",
5425 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5426 assemble_integer (deferred_plabels[i].symbol,
5427 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5431 #ifdef HPUX_LONG_DOUBLE_LIBRARY
5432 /* Initialize optabs to point to HPUX long double emulation routines. */
5433 static void
5434 pa_hpux_init_libfuncs (void)
5436 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5437 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5438 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5439 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5440 set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5441 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5442 set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5443 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5444 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5446 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5447 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5448 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5449 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5450 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5451 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5452 set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5454 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5455 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5456 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5457 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5459 set_conv_libfunc (sfix_optab, SImode, TFmode, TARGET_64BIT
5460 ? "__U_Qfcnvfxt_quad_to_sgl"
5461 : "_U_Qfcnvfxt_quad_to_sgl");
5462 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
5463 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_usgl");
5464 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_udbl");
5466 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
5467 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
5468 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_U_Qfcnvxf_usgl_to_quad");
5469 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxf_udbl_to_quad");
5471 #endif
5473 /* HP's millicode routines mean something special to the assembler.
5474 Keep track of which ones we have used. */
5476 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5477 static void import_milli (enum millicodes);
5478 static char imported[(int) end1000];
5479 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5480 static const char import_string[] = ".IMPORT $$....,MILLICODE";
5481 #define MILLI_START 10
5483 static void
5484 import_milli (enum millicodes code)
5486 char str[sizeof (import_string)];
5488 if (!imported[(int) code])
5490 imported[(int) code] = 1;
5491 strcpy (str, import_string);
5492 strncpy (str + MILLI_START, milli_names[(int) code], 4);
5493 output_asm_insn (str, 0);
5497 /* The register constraints have put the operands and return value in
5498 the proper registers. */
5500 const char *
5501 output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx insn)
5503 import_milli (mulI);
5504 return output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5507 /* Emit the rtl for doing a division by a constant. */
5509 /* Do magic division millicodes exist for this value? */
5510 const int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5512 /* We'll use an array to keep track of the magic millicodes and
5513 whether or not we've used them already. [n][0] is signed, [n][1] is
5514 unsigned. */
5516 static int div_milli[16][2];
5519 emit_hpdiv_const (rtx *operands, int unsignedp)
5521 if (GET_CODE (operands[2]) == CONST_INT
5522 && INTVAL (operands[2]) > 0
5523 && INTVAL (operands[2]) < 16
5524 && magic_milli[INTVAL (operands[2])])
5526 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5528 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
5529 emit
5530 (gen_rtx_PARALLEL
5531 (VOIDmode,
5532 gen_rtvec (6, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29),
5533 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5534 SImode,
5535 gen_rtx_REG (SImode, 26),
5536 operands[2])),
5537 gen_rtx_CLOBBER (VOIDmode, operands[4]),
5538 gen_rtx_CLOBBER (VOIDmode, operands[3]),
5539 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5540 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
5541 gen_rtx_CLOBBER (VOIDmode, ret))));
5542 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
5543 return 1;
5545 return 0;
5548 const char *
5549 output_div_insn (rtx *operands, int unsignedp, rtx insn)
5551 int divisor;
5553 /* If the divisor is a constant, try to use one of the special
5554 opcodes .*/
5555 if (GET_CODE (operands[0]) == CONST_INT)
5557 static char buf[100];
5558 divisor = INTVAL (operands[0]);
5559 if (!div_milli[divisor][unsignedp])
5561 div_milli[divisor][unsignedp] = 1;
5562 if (unsignedp)
5563 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
5564 else
5565 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
5567 if (unsignedp)
5569 sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
5570 INTVAL (operands[0]));
5571 return output_millicode_call (insn,
5572 gen_rtx_SYMBOL_REF (SImode, buf));
5574 else
5576 sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
5577 INTVAL (operands[0]));
5578 return output_millicode_call (insn,
5579 gen_rtx_SYMBOL_REF (SImode, buf));
5582 /* Divisor isn't a special constant. */
5583 else
5585 if (unsignedp)
5587 import_milli (divU);
5588 return output_millicode_call (insn,
5589 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
5591 else
5593 import_milli (divI);
5594 return output_millicode_call (insn,
5595 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
5600 /* Output a $$rem millicode to do mod. */
5602 const char *
5603 output_mod_insn (int unsignedp, rtx insn)
5605 if (unsignedp)
5607 import_milli (remU);
5608 return output_millicode_call (insn,
5609 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
5611 else
5613 import_milli (remI);
5614 return output_millicode_call (insn,
5615 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
5619 void
5620 output_arg_descriptor (rtx call_insn)
5622 const char *arg_regs[4];
5623 enum machine_mode arg_mode;
5624 rtx link;
5625 int i, output_flag = 0;
5626 int regno;
5628 /* We neither need nor want argument location descriptors for the
5629 64bit runtime environment or the ELF32 environment. */
5630 if (TARGET_64BIT || TARGET_ELF32)
5631 return;
5633 for (i = 0; i < 4; i++)
5634 arg_regs[i] = 0;
5636 /* Specify explicitly that no argument relocations should take place
5637 if using the portable runtime calling conventions. */
5638 if (TARGET_PORTABLE_RUNTIME)
5640 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5641 asm_out_file);
5642 return;
5645 gcc_assert (GET_CODE (call_insn) == CALL_INSN);
5646 for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
5647 link; link = XEXP (link, 1))
5649 rtx use = XEXP (link, 0);
5651 if (! (GET_CODE (use) == USE
5652 && GET_CODE (XEXP (use, 0)) == REG
5653 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5654 continue;
5656 arg_mode = GET_MODE (XEXP (use, 0));
5657 regno = REGNO (XEXP (use, 0));
5658 if (regno >= 23 && regno <= 26)
5660 arg_regs[26 - regno] = "GR";
5661 if (arg_mode == DImode)
5662 arg_regs[25 - regno] = "GR";
5664 else if (regno >= 32 && regno <= 39)
5666 if (arg_mode == SFmode)
5667 arg_regs[(regno - 32) / 2] = "FR";
5668 else
5670 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
5671 arg_regs[(regno - 34) / 2] = "FR";
5672 arg_regs[(regno - 34) / 2 + 1] = "FU";
5673 #else
5674 arg_regs[(regno - 34) / 2] = "FU";
5675 arg_regs[(regno - 34) / 2 + 1] = "FR";
5676 #endif
5680 fputs ("\t.CALL ", asm_out_file);
5681 for (i = 0; i < 4; i++)
5683 if (arg_regs[i])
5685 if (output_flag++)
5686 fputc (',', asm_out_file);
5687 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
5690 fputc ('\n', asm_out_file);
5693 static enum reg_class
5694 pa_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
5695 enum machine_mode mode, secondary_reload_info *sri)
5697 int is_symbolic, regno;
5699 /* Handle the easy stuff first. */
5700 if (rclass == R1_REGS)
5701 return NO_REGS;
5703 if (REG_P (x))
5705 regno = REGNO (x);
5706 if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
5707 return NO_REGS;
5709 else
5710 regno = -1;
5712 /* If we have something like (mem (mem (...)), we can safely assume the
5713 inner MEM will end up in a general register after reloading, so there's
5714 no need for a secondary reload. */
5715 if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
5716 return NO_REGS;
5718 /* Trying to load a constant into a FP register during PIC code
5719 generation requires %r1 as a scratch register. */
5720 if (flag_pic
5721 && (mode == SImode || mode == DImode)
5722 && FP_REG_CLASS_P (rclass)
5723 && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
5725 sri->icode = (mode == SImode ? CODE_FOR_reload_insi_r1
5726 : CODE_FOR_reload_indi_r1);
5727 return NO_REGS;
5730 /* Profiling showed the PA port spends about 1.3% of its compilation
5731 time in true_regnum from calls inside pa_secondary_reload_class. */
5732 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
5733 regno = true_regnum (x);
5735 /* In order to allow 14-bit displacements in integer loads and stores,
5736 we need to prevent reload from generating out of range integer mode
5737 loads and stores to the floating point registers. Previously, we
5738 used to call for a secondary reload and have emit_move_sequence()
5739 fix the instruction sequence. However, reload occasionally wouldn't
5740 generate the reload and we would end up with an invalid REG+D memory
5741 address. So, now we use an intermediate general register for most
5742 memory loads and stores. */
5743 if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
5744 && GET_MODE_CLASS (mode) == MODE_INT
5745 && FP_REG_CLASS_P (rclass))
5747 /* Reload passes (mem:SI (reg/f:DI 30 %r30) when it wants to check
5748 the secondary reload needed for a pseudo. It never passes a
5749 REG+D address. */
5750 if (GET_CODE (x) == MEM)
5752 x = XEXP (x, 0);
5754 /* We don't need an intermediate for indexed and LO_SUM DLT
5755 memory addresses. When INT14_OK_STRICT is true, it might
5756 appear that we could directly allow register indirect
5757 memory addresses. However, this doesn't work because we
5758 don't support SUBREGs in floating-point register copies
5759 and reload doesn't tell us when it's going to use a SUBREG. */
5760 if (IS_INDEX_ADDR_P (x)
5761 || IS_LO_SUM_DLT_ADDR_P (x))
5762 return NO_REGS;
5764 /* Otherwise, we need an intermediate general register. */
5765 return GENERAL_REGS;
5768 /* Request a secondary reload with a general scratch register
5769 for everthing else. ??? Could symbolic operands be handled
5770 directly when generating non-pic PA 2.0 code? */
5771 sri->icode = in_p ? reload_in_optab[mode] : reload_out_optab[mode];
5772 return NO_REGS;
5775 /* We need a secondary register (GPR) for copies between the SAR
5776 and anything other than a general register. */
5777 if (rclass == SHIFT_REGS && (regno <= 0 || regno >= 32))
5779 sri->icode = in_p ? reload_in_optab[mode] : reload_out_optab[mode];
5780 return NO_REGS;
5783 /* A SAR<->FP register copy requires a secondary register (GPR) as
5784 well as secondary memory. */
5785 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
5786 && (REGNO_REG_CLASS (regno) == SHIFT_REGS
5787 && FP_REG_CLASS_P (rclass)))
5789 sri->icode = in_p ? reload_in_optab[mode] : reload_out_optab[mode];
5790 return NO_REGS;
5793 /* Secondary reloads of symbolic operands require %r1 as a scratch
5794 register when we're generating PIC code and when the operand isn't
5795 readonly. */
5796 if (GET_CODE (x) == HIGH)
5797 x = XEXP (x, 0);
5799 /* Profiling has showed GCC spends about 2.6% of its compilation
5800 time in symbolic_operand from calls inside pa_secondary_reload_class.
5801 So, we use an inline copy to avoid useless work. */
5802 switch (GET_CODE (x))
5804 rtx op;
5806 case SYMBOL_REF:
5807 is_symbolic = !SYMBOL_REF_TLS_MODEL (x);
5808 break;
5809 case LABEL_REF:
5810 is_symbolic = 1;
5811 break;
5812 case CONST:
5813 op = XEXP (x, 0);
5814 is_symbolic = (GET_CODE (op) == PLUS
5815 && ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
5816 && !SYMBOL_REF_TLS_MODEL (XEXP (op, 0)))
5817 || GET_CODE (XEXP (op, 0)) == LABEL_REF)
5818 && GET_CODE (XEXP (op, 1)) == CONST_INT);
5819 break;
5820 default:
5821 is_symbolic = 0;
5822 break;
5825 if (is_symbolic && (flag_pic || !read_only_operand (x, VOIDmode)))
5827 gcc_assert (mode == SImode || mode == DImode);
5828 sri->icode = (mode == SImode ? CODE_FOR_reload_insi_r1
5829 : CODE_FOR_reload_indi_r1);
5832 return NO_REGS;
5835 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY. The argument pointer
5836 is only marked as live on entry by df-scan when it is a fixed
5837 register. It isn't a fixed register in the 64-bit runtime,
5838 so we need to mark it here. */
5840 static void
5841 pa_extra_live_on_entry (bitmap regs)
5843 if (TARGET_64BIT)
5844 bitmap_set_bit (regs, ARG_POINTER_REGNUM);
5847 /* Implement EH_RETURN_HANDLER_RTX. The MEM needs to be volatile
5848 to prevent it from being deleted. */
5851 pa_eh_return_handler_rtx (void)
5853 rtx tmp;
5855 tmp = gen_rtx_PLUS (word_mode, frame_pointer_rtx,
5856 TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20));
5857 tmp = gen_rtx_MEM (word_mode, tmp);
5858 tmp->volatil = 1;
5859 return tmp;
5862 /* In the 32-bit runtime, arguments larger than eight bytes are passed
5863 by invisible reference. As a GCC extension, we also pass anything
5864 with a zero or variable size by reference.
5866 The 64-bit runtime does not describe passing any types by invisible
5867 reference. The internals of GCC can't currently handle passing
5868 empty structures, and zero or variable length arrays when they are
5869 not passed entirely on the stack or by reference. Thus, as a GCC
5870 extension, we pass these types by reference. The HP compiler doesn't
5871 support these types, so hopefully there shouldn't be any compatibility
5872 issues. This may have to be revisited when HP releases a C99 compiler
5873 or updates the ABI. */
5875 static bool
5876 pa_pass_by_reference (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED,
5877 enum machine_mode mode, const_tree type,
5878 bool named ATTRIBUTE_UNUSED)
5880 HOST_WIDE_INT size;
5882 if (type)
5883 size = int_size_in_bytes (type);
5884 else
5885 size = GET_MODE_SIZE (mode);
5887 if (TARGET_64BIT)
5888 return size <= 0;
5889 else
5890 return size <= 0 || size > 8;
5893 enum direction
5894 function_arg_padding (enum machine_mode mode, const_tree type)
5896 if (mode == BLKmode
5897 || (TARGET_64BIT
5898 && type
5899 && (AGGREGATE_TYPE_P (type)
5900 || TREE_CODE (type) == COMPLEX_TYPE
5901 || TREE_CODE (type) == VECTOR_TYPE)))
5903 /* Return none if justification is not required. */
5904 if (type
5905 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
5906 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
5907 return none;
5909 /* The directions set here are ignored when a BLKmode argument larger
5910 than a word is placed in a register. Different code is used for
5911 the stack and registers. This makes it difficult to have a
5912 consistent data representation for both the stack and registers.
5913 For both runtimes, the justification and padding for arguments on
5914 the stack and in registers should be identical. */
5915 if (TARGET_64BIT)
5916 /* The 64-bit runtime specifies left justification for aggregates. */
5917 return upward;
5918 else
5919 /* The 32-bit runtime architecture specifies right justification.
5920 When the argument is passed on the stack, the argument is padded
5921 with garbage on the left. The HP compiler pads with zeros. */
5922 return downward;
5925 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
5926 return downward;
5927 else
5928 return none;
5932 /* Do what is necessary for `va_start'. We look at the current function
5933 to determine if stdargs or varargs is used and fill in an initial
5934 va_list. A pointer to this constructor is returned. */
5936 static rtx
5937 hppa_builtin_saveregs (void)
5939 rtx offset, dest;
5940 tree fntype = TREE_TYPE (current_function_decl);
5941 int argadj = ((!(TYPE_ARG_TYPES (fntype) != 0
5942 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
5943 != void_type_node)))
5944 ? UNITS_PER_WORD : 0);
5946 if (argadj)
5947 offset = plus_constant (crtl->args.arg_offset_rtx, argadj);
5948 else
5949 offset = crtl->args.arg_offset_rtx;
5951 if (TARGET_64BIT)
5953 int i, off;
5955 /* Adjust for varargs/stdarg differences. */
5956 if (argadj)
5957 offset = plus_constant (crtl->args.arg_offset_rtx, -argadj);
5958 else
5959 offset = crtl->args.arg_offset_rtx;
5961 /* We need to save %r26 .. %r19 inclusive starting at offset -64
5962 from the incoming arg pointer and growing to larger addresses. */
5963 for (i = 26, off = -64; i >= 19; i--, off += 8)
5964 emit_move_insn (gen_rtx_MEM (word_mode,
5965 plus_constant (arg_pointer_rtx, off)),
5966 gen_rtx_REG (word_mode, i));
5968 /* The incoming args pointer points just beyond the flushback area;
5969 normally this is not a serious concern. However, when we are doing
5970 varargs/stdargs we want to make the arg pointer point to the start
5971 of the incoming argument area. */
5972 emit_move_insn (virtual_incoming_args_rtx,
5973 plus_constant (arg_pointer_rtx, -64));
5975 /* Now return a pointer to the first anonymous argument. */
5976 return copy_to_reg (expand_binop (Pmode, add_optab,
5977 virtual_incoming_args_rtx,
5978 offset, 0, 0, OPTAB_LIB_WIDEN));
5981 /* Store general registers on the stack. */
5982 dest = gen_rtx_MEM (BLKmode,
5983 plus_constant (crtl->args.internal_arg_pointer,
5984 -16));
5985 set_mem_alias_set (dest, get_varargs_alias_set ());
5986 set_mem_align (dest, BITS_PER_WORD);
5987 move_block_from_reg (23, dest, 4);
5989 /* move_block_from_reg will emit code to store the argument registers
5990 individually as scalar stores.
5992 However, other insns may later load from the same addresses for
5993 a structure load (passing a struct to a varargs routine).
5995 The alias code assumes that such aliasing can never happen, so we
5996 have to keep memory referencing insns from moving up beyond the
5997 last argument register store. So we emit a blockage insn here. */
5998 emit_insn (gen_blockage ());
6000 return copy_to_reg (expand_binop (Pmode, add_optab,
6001 crtl->args.internal_arg_pointer,
6002 offset, 0, 0, OPTAB_LIB_WIDEN));
6005 static void
6006 hppa_va_start (tree valist, rtx nextarg)
6008 nextarg = expand_builtin_saveregs ();
6009 std_expand_builtin_va_start (valist, nextarg);
6012 static tree
6013 hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6014 gimple_seq *post_p)
6016 if (TARGET_64BIT)
6018 /* Args grow upward. We can use the generic routines. */
6019 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6021 else /* !TARGET_64BIT */
6023 tree ptr = build_pointer_type (type);
6024 tree valist_type;
6025 tree t, u;
6026 unsigned int size, ofs;
6027 bool indirect;
6029 indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0);
6030 if (indirect)
6032 type = ptr;
6033 ptr = build_pointer_type (type);
6035 size = int_size_in_bytes (type);
6036 valist_type = TREE_TYPE (valist);
6038 /* Args grow down. Not handled by generic routines. */
6040 u = fold_convert (sizetype, size_in_bytes (type));
6041 u = fold_build1 (NEGATE_EXPR, sizetype, u);
6042 t = build2 (POINTER_PLUS_EXPR, valist_type, valist, u);
6044 /* Copied from va-pa.h, but we probably don't need to align to
6045 word size, since we generate and preserve that invariant. */
6046 u = size_int (size > 4 ? -8 : -4);
6047 t = fold_convert (sizetype, t);
6048 t = build2 (BIT_AND_EXPR, sizetype, t, u);
6049 t = fold_convert (valist_type, t);
6051 t = build2 (MODIFY_EXPR, valist_type, valist, t);
6053 ofs = (8 - size) % 4;
6054 if (ofs != 0)
6056 u = size_int (ofs);
6057 t = build2 (POINTER_PLUS_EXPR, valist_type, t, u);
6060 t = fold_convert (ptr, t);
6061 t = build_va_arg_indirect_ref (t);
6063 if (indirect)
6064 t = build_va_arg_indirect_ref (t);
6066 return t;
6070 /* True if MODE is valid for the target. By "valid", we mean able to
6071 be manipulated in non-trivial ways. In particular, this means all
6072 the arithmetic is supported.
6074 Currently, TImode is not valid as the HP 64-bit runtime documentation
6075 doesn't document the alignment and calling conventions for this type.
6076 Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
6077 2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE. */
6079 static bool
6080 pa_scalar_mode_supported_p (enum machine_mode mode)
6082 int precision = GET_MODE_PRECISION (mode);
6084 switch (GET_MODE_CLASS (mode))
6086 case MODE_PARTIAL_INT:
6087 case MODE_INT:
6088 if (precision == CHAR_TYPE_SIZE)
6089 return true;
6090 if (precision == SHORT_TYPE_SIZE)
6091 return true;
6092 if (precision == INT_TYPE_SIZE)
6093 return true;
6094 if (precision == LONG_TYPE_SIZE)
6095 return true;
6096 if (precision == LONG_LONG_TYPE_SIZE)
6097 return true;
6098 return false;
6100 case MODE_FLOAT:
6101 if (precision == FLOAT_TYPE_SIZE)
6102 return true;
6103 if (precision == DOUBLE_TYPE_SIZE)
6104 return true;
6105 if (precision == LONG_DOUBLE_TYPE_SIZE)
6106 return true;
6107 return false;
6109 case MODE_DECIMAL_FLOAT:
6110 return false;
6112 default:
6113 gcc_unreachable ();
6117 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
6118 it branches to the next real instruction. Otherwise, return FALSE. */
6120 static bool
6121 branch_to_delay_slot_p (rtx insn)
6123 if (dbr_sequence_length ())
6124 return FALSE;
6126 return next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn);
6129 /* Return TRUE if INSN, a jump insn, needs a nop in its delay slot.
6131 This occurs when INSN has an unfilled delay slot and is followed
6132 by an ASM_INPUT. Disaster can occur if the ASM_INPUT is empty and
6133 the jump branches into the delay slot. So, we add a nop in the delay
6134 slot just to be safe. This messes up our instruction count, but we
6135 don't know how big the ASM_INPUT insn is anyway. */
6137 static bool
6138 branch_needs_nop_p (rtx insn)
6140 rtx next_insn;
6142 if (dbr_sequence_length ())
6143 return FALSE;
6145 next_insn = next_real_insn (insn);
6146 return GET_CODE (PATTERN (next_insn)) == ASM_INPUT;
6149 /* This routine handles all the normal conditional branch sequences we
6150 might need to generate. It handles compare immediate vs compare
6151 register, nullification of delay slots, varying length branches,
6152 negated branches, and all combinations of the above. It returns the
6153 output appropriate to emit the branch corresponding to all given
6154 parameters. */
6156 const char *
6157 output_cbranch (rtx *operands, int negated, rtx insn)
6159 static char buf[100];
6160 int useskip = 0;
6161 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6162 int length = get_attr_length (insn);
6163 int xdelay;
6165 /* A conditional branch to the following instruction (e.g. the delay slot)
6166 is asking for a disaster. This can happen when not optimizing and
6167 when jump optimization fails.
6169 While it is usually safe to emit nothing, this can fail if the
6170 preceding instruction is a nullified branch with an empty delay
6171 slot and the same branch target as this branch. We could check
6172 for this but jump optimization should eliminate nop jumps. It
6173 is always safe to emit a nop. */
6174 if (branch_to_delay_slot_p (insn))
6175 return "nop";
6177 /* The doubleword form of the cmpib instruction doesn't have the LEU
6178 and GTU conditions while the cmpb instruction does. Since we accept
6179 zero for cmpb, we must ensure that we use cmpb for the comparison. */
6180 if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6181 operands[2] = gen_rtx_REG (DImode, 0);
6182 if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
6183 operands[1] = gen_rtx_REG (DImode, 0);
6185 /* If this is a long branch with its delay slot unfilled, set `nullify'
6186 as it can nullify the delay slot and save a nop. */
6187 if (length == 8 && dbr_sequence_length () == 0)
6188 nullify = 1;
6190 /* If this is a short forward conditional branch which did not get
6191 its delay slot filled, the delay slot can still be nullified. */
6192 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6193 nullify = forward_branch_p (insn);
6195 /* A forward branch over a single nullified insn can be done with a
6196 comclr instruction. This avoids a single cycle penalty due to
6197 mis-predicted branch if we fall through (branch not taken). */
6198 if (length == 4
6199 && next_real_insn (insn) != 0
6200 && get_attr_length (next_real_insn (insn)) == 4
6201 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6202 && nullify)
6203 useskip = 1;
6205 switch (length)
6207 /* All short conditional branches except backwards with an unfilled
6208 delay slot. */
6209 case 4:
6210 if (useskip)
6211 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6212 else
6213 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6214 if (GET_MODE (operands[1]) == DImode)
6215 strcat (buf, "*");
6216 if (negated)
6217 strcat (buf, "%B3");
6218 else
6219 strcat (buf, "%S3");
6220 if (useskip)
6221 strcat (buf, " %2,%r1,%%r0");
6222 else if (nullify)
6224 if (branch_needs_nop_p (insn))
6225 strcat (buf, ",n %2,%r1,%0%#");
6226 else
6227 strcat (buf, ",n %2,%r1,%0");
6229 else
6230 strcat (buf, " %2,%r1,%0");
6231 break;
6233 /* All long conditionals. Note a short backward branch with an
6234 unfilled delay slot is treated just like a long backward branch
6235 with an unfilled delay slot. */
6236 case 8:
6237 /* Handle weird backwards branch with a filled delay slot
6238 which is nullified. */
6239 if (dbr_sequence_length () != 0
6240 && ! forward_branch_p (insn)
6241 && nullify)
6243 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6244 if (GET_MODE (operands[1]) == DImode)
6245 strcat (buf, "*");
6246 if (negated)
6247 strcat (buf, "%S3");
6248 else
6249 strcat (buf, "%B3");
6250 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6252 /* Handle short backwards branch with an unfilled delay slot.
6253 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6254 taken and untaken branches. */
6255 else if (dbr_sequence_length () == 0
6256 && ! forward_branch_p (insn)
6257 && INSN_ADDRESSES_SET_P ()
6258 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6259 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6261 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6262 if (GET_MODE (operands[1]) == DImode)
6263 strcat (buf, "*");
6264 if (negated)
6265 strcat (buf, "%B3 %2,%r1,%0%#");
6266 else
6267 strcat (buf, "%S3 %2,%r1,%0%#");
6269 else
6271 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6272 if (GET_MODE (operands[1]) == DImode)
6273 strcat (buf, "*");
6274 if (negated)
6275 strcat (buf, "%S3");
6276 else
6277 strcat (buf, "%B3");
6278 if (nullify)
6279 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6280 else
6281 strcat (buf, " %2,%r1,%%r0\n\tb %0");
6283 break;
6285 default:
6286 /* The reversed conditional branch must branch over one additional
6287 instruction if the delay slot is filled and needs to be extracted
6288 by output_lbranch. If the delay slot is empty or this is a
6289 nullified forward branch, the instruction after the reversed
6290 condition branch must be nullified. */
6291 if (dbr_sequence_length () == 0
6292 || (nullify && forward_branch_p (insn)))
6294 nullify = 1;
6295 xdelay = 0;
6296 operands[4] = GEN_INT (length);
6298 else
6300 xdelay = 1;
6301 operands[4] = GEN_INT (length + 4);
6304 /* Create a reversed conditional branch which branches around
6305 the following insns. */
6306 if (GET_MODE (operands[1]) != DImode)
6308 if (nullify)
6310 if (negated)
6311 strcpy (buf,
6312 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6313 else
6314 strcpy (buf,
6315 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6317 else
6319 if (negated)
6320 strcpy (buf,
6321 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6322 else
6323 strcpy (buf,
6324 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6327 else
6329 if (nullify)
6331 if (negated)
6332 strcpy (buf,
6333 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6334 else
6335 strcpy (buf,
6336 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6338 else
6340 if (negated)
6341 strcpy (buf,
6342 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6343 else
6344 strcpy (buf,
6345 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6349 output_asm_insn (buf, operands);
6350 return output_lbranch (operands[0], insn, xdelay);
6352 return buf;
6355 /* This routine handles output of long unconditional branches that
6356 exceed the maximum range of a simple branch instruction. Since
6357 we don't have a register available for the branch, we save register
6358 %r1 in the frame marker, load the branch destination DEST into %r1,
6359 execute the branch, and restore %r1 in the delay slot of the branch.
6361 Since long branches may have an insn in the delay slot and the
6362 delay slot is used to restore %r1, we in general need to extract
6363 this insn and execute it before the branch. However, to facilitate
6364 use of this function by conditional branches, we also provide an
6365 option to not extract the delay insn so that it will be emitted
6366 after the long branch. So, if there is an insn in the delay slot,
6367 it is extracted if XDELAY is nonzero.
6369 The lengths of the various long-branch sequences are 20, 16 and 24
6370 bytes for the portable runtime, non-PIC and PIC cases, respectively. */
6372 const char *
6373 output_lbranch (rtx dest, rtx insn, int xdelay)
6375 rtx xoperands[2];
6377 xoperands[0] = dest;
6379 /* First, free up the delay slot. */
6380 if (xdelay && dbr_sequence_length () != 0)
6382 /* We can't handle a jump in the delay slot. */
6383 gcc_assert (GET_CODE (NEXT_INSN (insn)) != JUMP_INSN);
6385 final_scan_insn (NEXT_INSN (insn), asm_out_file,
6386 optimize, 0, NULL);
6388 /* Now delete the delay insn. */
6389 SET_INSN_DELETED (NEXT_INSN (insn));
6392 /* Output an insn to save %r1. The runtime documentation doesn't
6393 specify whether the "Clean Up" slot in the callers frame can
6394 be clobbered by the callee. It isn't copied by HP's builtin
6395 alloca, so this suggests that it can be clobbered if necessary.
6396 The "Static Link" location is copied by HP builtin alloca, so
6397 we avoid using it. Using the cleanup slot might be a problem
6398 if we have to interoperate with languages that pass cleanup
6399 information. However, it should be possible to handle these
6400 situations with GCC's asm feature.
6402 The "Current RP" slot is reserved for the called procedure, so
6403 we try to use it when we don't have a frame of our own. It's
6404 rather unlikely that we won't have a frame when we need to emit
6405 a very long branch.
6407 Really the way to go long term is a register scavenger; goto
6408 the target of the jump and find a register which we can use
6409 as a scratch to hold the value in %r1. Then, we wouldn't have
6410 to free up the delay slot or clobber a slot that may be needed
6411 for other purposes. */
6412 if (TARGET_64BIT)
6414 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6415 /* Use the return pointer slot in the frame marker. */
6416 output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
6417 else
6418 /* Use the slot at -40 in the frame marker since HP builtin
6419 alloca doesn't copy it. */
6420 output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
6422 else
6424 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6425 /* Use the return pointer slot in the frame marker. */
6426 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
6427 else
6428 /* Use the "Clean Up" slot in the frame marker. In GCC,
6429 the only other use of this location is for copying a
6430 floating point double argument from a floating-point
6431 register to two general registers. The copy is done
6432 as an "atomic" operation when outputting a call, so it
6433 won't interfere with our using the location here. */
6434 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
6437 if (TARGET_PORTABLE_RUNTIME)
6439 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6440 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6441 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6443 else if (flag_pic)
6445 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6446 if (TARGET_SOM || !TARGET_GAS)
6448 xoperands[1] = gen_label_rtx ();
6449 output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands);
6450 targetm.asm_out.internal_label (asm_out_file, "L",
6451 CODE_LABEL_NUMBER (xoperands[1]));
6452 output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands);
6454 else
6456 output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands);
6457 output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
6459 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6461 else
6462 /* Now output a very long branch to the original target. */
6463 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
6465 /* Now restore the value of %r1 in the delay slot. */
6466 if (TARGET_64BIT)
6468 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6469 return "ldd -16(%%r30),%%r1";
6470 else
6471 return "ldd -40(%%r30),%%r1";
6473 else
6475 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6476 return "ldw -20(%%r30),%%r1";
6477 else
6478 return "ldw -12(%%r30),%%r1";
6482 /* This routine handles all the branch-on-bit conditional branch sequences we
6483 might need to generate. It handles nullification of delay slots,
6484 varying length branches, negated branches and all combinations of the
6485 above. it returns the appropriate output template to emit the branch. */
6487 const char *
6488 output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn, int which)
6490 static char buf[100];
6491 int useskip = 0;
6492 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6493 int length = get_attr_length (insn);
6494 int xdelay;
6496 /* A conditional branch to the following instruction (e.g. the delay slot) is
6497 asking for a disaster. I do not think this can happen as this pattern
6498 is only used when optimizing; jump optimization should eliminate the
6499 jump. But be prepared just in case. */
6501 if (branch_to_delay_slot_p (insn))
6502 return "nop";
6504 /* If this is a long branch with its delay slot unfilled, set `nullify'
6505 as it can nullify the delay slot and save a nop. */
6506 if (length == 8 && dbr_sequence_length () == 0)
6507 nullify = 1;
6509 /* If this is a short forward conditional branch which did not get
6510 its delay slot filled, the delay slot can still be nullified. */
6511 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6512 nullify = forward_branch_p (insn);
6514 /* A forward branch over a single nullified insn can be done with a
6515 extrs instruction. This avoids a single cycle penalty due to
6516 mis-predicted branch if we fall through (branch not taken). */
6518 if (length == 4
6519 && next_real_insn (insn) != 0
6520 && get_attr_length (next_real_insn (insn)) == 4
6521 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6522 && nullify)
6523 useskip = 1;
6525 switch (length)
6528 /* All short conditional branches except backwards with an unfilled
6529 delay slot. */
6530 case 4:
6531 if (useskip)
6532 strcpy (buf, "{extrs,|extrw,s,}");
6533 else
6534 strcpy (buf, "bb,");
6535 if (useskip && GET_MODE (operands[0]) == DImode)
6536 strcpy (buf, "extrd,s,*");
6537 else if (GET_MODE (operands[0]) == DImode)
6538 strcpy (buf, "bb,*");
6539 if ((which == 0 && negated)
6540 || (which == 1 && ! negated))
6541 strcat (buf, ">=");
6542 else
6543 strcat (buf, "<");
6544 if (useskip)
6545 strcat (buf, " %0,%1,1,%%r0");
6546 else if (nullify && negated)
6548 if (branch_needs_nop_p (insn))
6549 strcat (buf, ",n %0,%1,%3%#");
6550 else
6551 strcat (buf, ",n %0,%1,%3");
6553 else if (nullify && ! negated)
6555 if (branch_needs_nop_p (insn))
6556 strcat (buf, ",n %0,%1,%2%#");
6557 else
6558 strcat (buf, ",n %0,%1,%2");
6560 else if (! nullify && negated)
6561 strcat (buf, " %0,%1,%3");
6562 else if (! nullify && ! negated)
6563 strcat (buf, " %0,%1,%2");
6564 break;
6566 /* All long conditionals. Note a short backward branch with an
6567 unfilled delay slot is treated just like a long backward branch
6568 with an unfilled delay slot. */
6569 case 8:
6570 /* Handle weird backwards branch with a filled delay slot
6571 which is nullified. */
6572 if (dbr_sequence_length () != 0
6573 && ! forward_branch_p (insn)
6574 && nullify)
6576 strcpy (buf, "bb,");
6577 if (GET_MODE (operands[0]) == DImode)
6578 strcat (buf, "*");
6579 if ((which == 0 && negated)
6580 || (which == 1 && ! negated))
6581 strcat (buf, "<");
6582 else
6583 strcat (buf, ">=");
6584 if (negated)
6585 strcat (buf, ",n %0,%1,.+12\n\tb %3");
6586 else
6587 strcat (buf, ",n %0,%1,.+12\n\tb %2");
6589 /* Handle short backwards branch with an unfilled delay slot.
6590 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6591 taken and untaken branches. */
6592 else if (dbr_sequence_length () == 0
6593 && ! forward_branch_p (insn)
6594 && INSN_ADDRESSES_SET_P ()
6595 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6596 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6598 strcpy (buf, "bb,");
6599 if (GET_MODE (operands[0]) == DImode)
6600 strcat (buf, "*");
6601 if ((which == 0 && negated)
6602 || (which == 1 && ! negated))
6603 strcat (buf, ">=");
6604 else
6605 strcat (buf, "<");
6606 if (negated)
6607 strcat (buf, " %0,%1,%3%#");
6608 else
6609 strcat (buf, " %0,%1,%2%#");
6611 else
6613 if (GET_MODE (operands[0]) == DImode)
6614 strcpy (buf, "extrd,s,*");
6615 else
6616 strcpy (buf, "{extrs,|extrw,s,}");
6617 if ((which == 0 && negated)
6618 || (which == 1 && ! negated))
6619 strcat (buf, "<");
6620 else
6621 strcat (buf, ">=");
6622 if (nullify && negated)
6623 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
6624 else if (nullify && ! negated)
6625 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
6626 else if (negated)
6627 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
6628 else
6629 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
6631 break;
6633 default:
6634 /* The reversed conditional branch must branch over one additional
6635 instruction if the delay slot is filled and needs to be extracted
6636 by output_lbranch. If the delay slot is empty or this is a
6637 nullified forward branch, the instruction after the reversed
6638 condition branch must be nullified. */
6639 if (dbr_sequence_length () == 0
6640 || (nullify && forward_branch_p (insn)))
6642 nullify = 1;
6643 xdelay = 0;
6644 operands[4] = GEN_INT (length);
6646 else
6648 xdelay = 1;
6649 operands[4] = GEN_INT (length + 4);
6652 if (GET_MODE (operands[0]) == DImode)
6653 strcpy (buf, "bb,*");
6654 else
6655 strcpy (buf, "bb,");
6656 if ((which == 0 && negated)
6657 || (which == 1 && !negated))
6658 strcat (buf, "<");
6659 else
6660 strcat (buf, ">=");
6661 if (nullify)
6662 strcat (buf, ",n %0,%1,.+%4");
6663 else
6664 strcat (buf, " %0,%1,.+%4");
6665 output_asm_insn (buf, operands);
6666 return output_lbranch (negated ? operands[3] : operands[2],
6667 insn, xdelay);
6669 return buf;
6672 /* This routine handles all the branch-on-variable-bit conditional branch
6673 sequences we might need to generate. It handles nullification of delay
6674 slots, varying length branches, negated branches and all combinations
6675 of the above. it returns the appropriate output template to emit the
6676 branch. */
6678 const char *
6679 output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn, int which)
6681 static char buf[100];
6682 int useskip = 0;
6683 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6684 int length = get_attr_length (insn);
6685 int xdelay;
6687 /* A conditional branch to the following instruction (e.g. the delay slot) is
6688 asking for a disaster. I do not think this can happen as this pattern
6689 is only used when optimizing; jump optimization should eliminate the
6690 jump. But be prepared just in case. */
6692 if (branch_to_delay_slot_p (insn))
6693 return "nop";
6695 /* If this is a long branch with its delay slot unfilled, set `nullify'
6696 as it can nullify the delay slot and save a nop. */
6697 if (length == 8 && dbr_sequence_length () == 0)
6698 nullify = 1;
6700 /* If this is a short forward conditional branch which did not get
6701 its delay slot filled, the delay slot can still be nullified. */
6702 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6703 nullify = forward_branch_p (insn);
6705 /* A forward branch over a single nullified insn can be done with a
6706 extrs instruction. This avoids a single cycle penalty due to
6707 mis-predicted branch if we fall through (branch not taken). */
6709 if (length == 4
6710 && next_real_insn (insn) != 0
6711 && get_attr_length (next_real_insn (insn)) == 4
6712 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6713 && nullify)
6714 useskip = 1;
6716 switch (length)
6719 /* All short conditional branches except backwards with an unfilled
6720 delay slot. */
6721 case 4:
6722 if (useskip)
6723 strcpy (buf, "{vextrs,|extrw,s,}");
6724 else
6725 strcpy (buf, "{bvb,|bb,}");
6726 if (useskip && GET_MODE (operands[0]) == DImode)
6727 strcpy (buf, "extrd,s,*");
6728 else if (GET_MODE (operands[0]) == DImode)
6729 strcpy (buf, "bb,*");
6730 if ((which == 0 && negated)
6731 || (which == 1 && ! negated))
6732 strcat (buf, ">=");
6733 else
6734 strcat (buf, "<");
6735 if (useskip)
6736 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
6737 else if (nullify && negated)
6739 if (branch_needs_nop_p (insn))
6740 strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
6741 else
6742 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
6744 else if (nullify && ! negated)
6746 if (branch_needs_nop_p (insn))
6747 strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
6748 else
6749 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
6751 else if (! nullify && negated)
6752 strcat (buf, "{ %0,%3| %0,%%sar,%3}");
6753 else if (! nullify && ! negated)
6754 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
6755 break;
6757 /* All long conditionals. Note a short backward branch with an
6758 unfilled delay slot is treated just like a long backward branch
6759 with an unfilled delay slot. */
6760 case 8:
6761 /* Handle weird backwards branch with a filled delay slot
6762 which is nullified. */
6763 if (dbr_sequence_length () != 0
6764 && ! forward_branch_p (insn)
6765 && nullify)
6767 strcpy (buf, "{bvb,|bb,}");
6768 if (GET_MODE (operands[0]) == DImode)
6769 strcat (buf, "*");
6770 if ((which == 0 && negated)
6771 || (which == 1 && ! negated))
6772 strcat (buf, "<");
6773 else
6774 strcat (buf, ">=");
6775 if (negated)
6776 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
6777 else
6778 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
6780 /* Handle short backwards branch with an unfilled delay slot.
6781 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6782 taken and untaken branches. */
6783 else if (dbr_sequence_length () == 0
6784 && ! forward_branch_p (insn)
6785 && INSN_ADDRESSES_SET_P ()
6786 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6787 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6789 strcpy (buf, "{bvb,|bb,}");
6790 if (GET_MODE (operands[0]) == DImode)
6791 strcat (buf, "*");
6792 if ((which == 0 && negated)
6793 || (which == 1 && ! negated))
6794 strcat (buf, ">=");
6795 else
6796 strcat (buf, "<");
6797 if (negated)
6798 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
6799 else
6800 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
6802 else
6804 strcpy (buf, "{vextrs,|extrw,s,}");
6805 if (GET_MODE (operands[0]) == DImode)
6806 strcpy (buf, "extrd,s,*");
6807 if ((which == 0 && negated)
6808 || (which == 1 && ! negated))
6809 strcat (buf, "<");
6810 else
6811 strcat (buf, ">=");
6812 if (nullify && negated)
6813 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
6814 else if (nullify && ! negated)
6815 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
6816 else if (negated)
6817 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
6818 else
6819 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
6821 break;
6823 default:
6824 /* The reversed conditional branch must branch over one additional
6825 instruction if the delay slot is filled and needs to be extracted
6826 by output_lbranch. If the delay slot is empty or this is a
6827 nullified forward branch, the instruction after the reversed
6828 condition branch must be nullified. */
6829 if (dbr_sequence_length () == 0
6830 || (nullify && forward_branch_p (insn)))
6832 nullify = 1;
6833 xdelay = 0;
6834 operands[4] = GEN_INT (length);
6836 else
6838 xdelay = 1;
6839 operands[4] = GEN_INT (length + 4);
6842 if (GET_MODE (operands[0]) == DImode)
6843 strcpy (buf, "bb,*");
6844 else
6845 strcpy (buf, "{bvb,|bb,}");
6846 if ((which == 0 && negated)
6847 || (which == 1 && !negated))
6848 strcat (buf, "<");
6849 else
6850 strcat (buf, ">=");
6851 if (nullify)
6852 strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
6853 else
6854 strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
6855 output_asm_insn (buf, operands);
6856 return output_lbranch (negated ? operands[3] : operands[2],
6857 insn, xdelay);
6859 return buf;
6862 /* Return the output template for emitting a dbra type insn.
6864 Note it may perform some output operations on its own before
6865 returning the final output string. */
6866 const char *
6867 output_dbra (rtx *operands, rtx insn, int which_alternative)
6869 int length = get_attr_length (insn);
6871 /* A conditional branch to the following instruction (e.g. the delay slot) is
6872 asking for a disaster. Be prepared! */
6874 if (branch_to_delay_slot_p (insn))
6876 if (which_alternative == 0)
6877 return "ldo %1(%0),%0";
6878 else if (which_alternative == 1)
6880 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
6881 output_asm_insn ("ldw -16(%%r30),%4", operands);
6882 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6883 return "{fldws|fldw} -16(%%r30),%0";
6885 else
6887 output_asm_insn ("ldw %0,%4", operands);
6888 return "ldo %1(%4),%4\n\tstw %4,%0";
6892 if (which_alternative == 0)
6894 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6895 int xdelay;
6897 /* If this is a long branch with its delay slot unfilled, set `nullify'
6898 as it can nullify the delay slot and save a nop. */
6899 if (length == 8 && dbr_sequence_length () == 0)
6900 nullify = 1;
6902 /* If this is a short forward conditional branch which did not get
6903 its delay slot filled, the delay slot can still be nullified. */
6904 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6905 nullify = forward_branch_p (insn);
6907 switch (length)
6909 case 4:
6910 if (nullify)
6912 if (branch_needs_nop_p (insn))
6913 return "addib,%C2,n %1,%0,%3%#";
6914 else
6915 return "addib,%C2,n %1,%0,%3";
6917 else
6918 return "addib,%C2 %1,%0,%3";
6920 case 8:
6921 /* Handle weird backwards branch with a fulled delay slot
6922 which is nullified. */
6923 if (dbr_sequence_length () != 0
6924 && ! forward_branch_p (insn)
6925 && nullify)
6926 return "addib,%N2,n %1,%0,.+12\n\tb %3";
6927 /* Handle short backwards branch with an unfilled delay slot.
6928 Using a addb;nop rather than addi;bl saves 1 cycle for both
6929 taken and untaken branches. */
6930 else if (dbr_sequence_length () == 0
6931 && ! forward_branch_p (insn)
6932 && INSN_ADDRESSES_SET_P ()
6933 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6934 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6935 return "addib,%C2 %1,%0,%3%#";
6937 /* Handle normal cases. */
6938 if (nullify)
6939 return "addi,%N2 %1,%0,%0\n\tb,n %3";
6940 else
6941 return "addi,%N2 %1,%0,%0\n\tb %3";
6943 default:
6944 /* The reversed conditional branch must branch over one additional
6945 instruction if the delay slot is filled and needs to be extracted
6946 by output_lbranch. If the delay slot is empty or this is a
6947 nullified forward branch, the instruction after the reversed
6948 condition branch must be nullified. */
6949 if (dbr_sequence_length () == 0
6950 || (nullify && forward_branch_p (insn)))
6952 nullify = 1;
6953 xdelay = 0;
6954 operands[4] = GEN_INT (length);
6956 else
6958 xdelay = 1;
6959 operands[4] = GEN_INT (length + 4);
6962 if (nullify)
6963 output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
6964 else
6965 output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
6967 return output_lbranch (operands[3], insn, xdelay);
6971 /* Deal with gross reload from FP register case. */
6972 else if (which_alternative == 1)
6974 /* Move loop counter from FP register to MEM then into a GR,
6975 increment the GR, store the GR into MEM, and finally reload
6976 the FP register from MEM from within the branch's delay slot. */
6977 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
6978 operands);
6979 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6980 if (length == 24)
6981 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
6982 else if (length == 28)
6983 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
6984 else
6986 operands[5] = GEN_INT (length - 16);
6987 output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
6988 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
6989 return output_lbranch (operands[3], insn, 0);
6992 /* Deal with gross reload from memory case. */
6993 else
6995 /* Reload loop counter from memory, the store back to memory
6996 happens in the branch's delay slot. */
6997 output_asm_insn ("ldw %0,%4", operands);
6998 if (length == 12)
6999 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
7000 else if (length == 16)
7001 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
7002 else
7004 operands[5] = GEN_INT (length - 4);
7005 output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
7006 return output_lbranch (operands[3], insn, 0);
7011 /* Return the output template for emitting a movb type insn.
7013 Note it may perform some output operations on its own before
7014 returning the final output string. */
7015 const char *
7016 output_movb (rtx *operands, rtx insn, int which_alternative,
7017 int reverse_comparison)
7019 int length = get_attr_length (insn);
7021 /* A conditional branch to the following instruction (e.g. the delay slot) is
7022 asking for a disaster. Be prepared! */
7024 if (branch_to_delay_slot_p (insn))
7026 if (which_alternative == 0)
7027 return "copy %1,%0";
7028 else if (which_alternative == 1)
7030 output_asm_insn ("stw %1,-16(%%r30)", operands);
7031 return "{fldws|fldw} -16(%%r30),%0";
7033 else if (which_alternative == 2)
7034 return "stw %1,%0";
7035 else
7036 return "mtsar %r1";
7039 /* Support the second variant. */
7040 if (reverse_comparison)
7041 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
7043 if (which_alternative == 0)
7045 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7046 int xdelay;
7048 /* If this is a long branch with its delay slot unfilled, set `nullify'
7049 as it can nullify the delay slot and save a nop. */
7050 if (length == 8 && dbr_sequence_length () == 0)
7051 nullify = 1;
7053 /* If this is a short forward conditional branch which did not get
7054 its delay slot filled, the delay slot can still be nullified. */
7055 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7056 nullify = forward_branch_p (insn);
7058 switch (length)
7060 case 4:
7061 if (nullify)
7063 if (branch_needs_nop_p (insn))
7064 return "movb,%C2,n %1,%0,%3%#";
7065 else
7066 return "movb,%C2,n %1,%0,%3";
7068 else
7069 return "movb,%C2 %1,%0,%3";
7071 case 8:
7072 /* Handle weird backwards branch with a filled delay slot
7073 which is nullified. */
7074 if (dbr_sequence_length () != 0
7075 && ! forward_branch_p (insn)
7076 && nullify)
7077 return "movb,%N2,n %1,%0,.+12\n\tb %3";
7079 /* Handle short backwards branch with an unfilled delay slot.
7080 Using a movb;nop rather than or;bl saves 1 cycle for both
7081 taken and untaken branches. */
7082 else if (dbr_sequence_length () == 0
7083 && ! forward_branch_p (insn)
7084 && INSN_ADDRESSES_SET_P ()
7085 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7086 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7087 return "movb,%C2 %1,%0,%3%#";
7088 /* Handle normal cases. */
7089 if (nullify)
7090 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
7091 else
7092 return "or,%N2 %1,%%r0,%0\n\tb %3";
7094 default:
7095 /* The reversed conditional branch must branch over one additional
7096 instruction if the delay slot is filled and needs to be extracted
7097 by output_lbranch. If the delay slot is empty or this is a
7098 nullified forward branch, the instruction after the reversed
7099 condition branch must be nullified. */
7100 if (dbr_sequence_length () == 0
7101 || (nullify && forward_branch_p (insn)))
7103 nullify = 1;
7104 xdelay = 0;
7105 operands[4] = GEN_INT (length);
7107 else
7109 xdelay = 1;
7110 operands[4] = GEN_INT (length + 4);
7113 if (nullify)
7114 output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
7115 else
7116 output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
7118 return output_lbranch (operands[3], insn, xdelay);
7121 /* Deal with gross reload for FP destination register case. */
7122 else if (which_alternative == 1)
7124 /* Move source register to MEM, perform the branch test, then
7125 finally load the FP register from MEM from within the branch's
7126 delay slot. */
7127 output_asm_insn ("stw %1,-16(%%r30)", operands);
7128 if (length == 12)
7129 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7130 else if (length == 16)
7131 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7132 else
7134 operands[4] = GEN_INT (length - 4);
7135 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
7136 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7137 return output_lbranch (operands[3], insn, 0);
7140 /* Deal with gross reload from memory case. */
7141 else if (which_alternative == 2)
7143 /* Reload loop counter from memory, the store back to memory
7144 happens in the branch's delay slot. */
7145 if (length == 8)
7146 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7147 else if (length == 12)
7148 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7149 else
7151 operands[4] = GEN_INT (length);
7152 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7153 operands);
7154 return output_lbranch (operands[3], insn, 0);
7157 /* Handle SAR as a destination. */
7158 else
7160 if (length == 8)
7161 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7162 else if (length == 12)
7163 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7164 else
7166 operands[4] = GEN_INT (length);
7167 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7168 operands);
7169 return output_lbranch (operands[3], insn, 0);
7174 /* Copy any FP arguments in INSN into integer registers. */
7175 static void
7176 copy_fp_args (rtx insn)
7178 rtx link;
7179 rtx xoperands[2];
7181 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7183 int arg_mode, regno;
7184 rtx use = XEXP (link, 0);
7186 if (! (GET_CODE (use) == USE
7187 && GET_CODE (XEXP (use, 0)) == REG
7188 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7189 continue;
7191 arg_mode = GET_MODE (XEXP (use, 0));
7192 regno = REGNO (XEXP (use, 0));
7194 /* Is it a floating point register? */
7195 if (regno >= 32 && regno <= 39)
7197 /* Copy the FP register into an integer register via memory. */
7198 if (arg_mode == SFmode)
7200 xoperands[0] = XEXP (use, 0);
7201 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
7202 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
7203 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7205 else
7207 xoperands[0] = XEXP (use, 0);
7208 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
7209 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
7210 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
7211 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7217 /* Compute length of the FP argument copy sequence for INSN. */
7218 static int
7219 length_fp_args (rtx insn)
7221 int length = 0;
7222 rtx link;
7224 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7226 int arg_mode, regno;
7227 rtx use = XEXP (link, 0);
7229 if (! (GET_CODE (use) == USE
7230 && GET_CODE (XEXP (use, 0)) == REG
7231 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7232 continue;
7234 arg_mode = GET_MODE (XEXP (use, 0));
7235 regno = REGNO (XEXP (use, 0));
7237 /* Is it a floating point register? */
7238 if (regno >= 32 && regno <= 39)
7240 if (arg_mode == SFmode)
7241 length += 8;
7242 else
7243 length += 12;
7247 return length;
7250 /* Return the attribute length for the millicode call instruction INSN.
7251 The length must match the code generated by output_millicode_call.
7252 We include the delay slot in the returned length as it is better to
7253 over estimate the length than to under estimate it. */
7256 attr_length_millicode_call (rtx insn)
7258 unsigned long distance = -1;
7259 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7261 if (INSN_ADDRESSES_SET_P ())
7263 distance = (total + insn_current_reference_address (insn));
7264 if (distance < total)
7265 distance = -1;
7268 if (TARGET_64BIT)
7270 if (!TARGET_LONG_CALLS && distance < 7600000)
7271 return 8;
7273 return 20;
7275 else if (TARGET_PORTABLE_RUNTIME)
7276 return 24;
7277 else
7279 if (!TARGET_LONG_CALLS && distance < 240000)
7280 return 8;
7282 if (TARGET_LONG_ABS_CALL && !flag_pic)
7283 return 12;
7285 return 24;
7289 /* INSN is a function call. It may have an unconditional jump
7290 in its delay slot.
7292 CALL_DEST is the routine we are calling. */
7294 const char *
7295 output_millicode_call (rtx insn, rtx call_dest)
7297 int attr_length = get_attr_length (insn);
7298 int seq_length = dbr_sequence_length ();
7299 int distance;
7300 rtx seq_insn;
7301 rtx xoperands[3];
7303 xoperands[0] = call_dest;
7304 xoperands[2] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
7306 /* Handle the common case where we are sure that the branch will
7307 reach the beginning of the $CODE$ subspace. The within reach
7308 form of the $$sh_func_adrs call has a length of 28. Because
7309 it has an attribute type of multi, it never has a nonzero
7310 sequence length. The length of the $$sh_func_adrs is the same
7311 as certain out of reach PIC calls to other routines. */
7312 if (!TARGET_LONG_CALLS
7313 && ((seq_length == 0
7314 && (attr_length == 12
7315 || (attr_length == 28 && get_attr_type (insn) == TYPE_MULTI)))
7316 || (seq_length != 0 && attr_length == 8)))
7318 output_asm_insn ("{bl|b,l} %0,%2", xoperands);
7320 else
7322 if (TARGET_64BIT)
7324 /* It might seem that one insn could be saved by accessing
7325 the millicode function using the linkage table. However,
7326 this doesn't work in shared libraries and other dynamically
7327 loaded objects. Using a pc-relative sequence also avoids
7328 problems related to the implicit use of the gp register. */
7329 output_asm_insn ("b,l .+8,%%r1", xoperands);
7331 if (TARGET_GAS)
7333 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
7334 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
7336 else
7338 xoperands[1] = gen_label_rtx ();
7339 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7340 targetm.asm_out.internal_label (asm_out_file, "L",
7341 CODE_LABEL_NUMBER (xoperands[1]));
7342 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7345 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7347 else if (TARGET_PORTABLE_RUNTIME)
7349 /* Pure portable runtime doesn't allow be/ble; we also don't
7350 have PIC support in the assembler/linker, so this sequence
7351 is needed. */
7353 /* Get the address of our target into %r1. */
7354 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7355 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7357 /* Get our return address into %r31. */
7358 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7359 output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
7361 /* Jump to our target address in %r1. */
7362 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7364 else if (!flag_pic)
7366 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7367 if (TARGET_PA_20)
7368 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
7369 else
7370 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7372 else
7374 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7375 output_asm_insn ("addi 16,%%r1,%%r31", xoperands);
7377 if (TARGET_SOM || !TARGET_GAS)
7379 /* The HP assembler can generate relocations for the
7380 difference of two symbols. GAS can do this for a
7381 millicode symbol but not an arbitrary external
7382 symbol when generating SOM output. */
7383 xoperands[1] = gen_label_rtx ();
7384 targetm.asm_out.internal_label (asm_out_file, "L",
7385 CODE_LABEL_NUMBER (xoperands[1]));
7386 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7387 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7389 else
7391 output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands);
7392 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1",
7393 xoperands);
7396 /* Jump to our target address in %r1. */
7397 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7401 if (seq_length == 0)
7402 output_asm_insn ("nop", xoperands);
7404 /* We are done if there isn't a jump in the delay slot. */
7405 if (seq_length == 0 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7406 return "";
7408 /* This call has an unconditional jump in its delay slot. */
7409 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7411 /* See if the return address can be adjusted. Use the containing
7412 sequence insn's address. */
7413 if (INSN_ADDRESSES_SET_P ())
7415 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7416 distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7417 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7419 if (VAL_14_BITS_P (distance))
7421 xoperands[1] = gen_label_rtx ();
7422 output_asm_insn ("ldo %0-%1(%2),%2", xoperands);
7423 targetm.asm_out.internal_label (asm_out_file, "L",
7424 CODE_LABEL_NUMBER (xoperands[1]));
7426 else
7427 /* ??? This branch may not reach its target. */
7428 output_asm_insn ("nop\n\tb,n %0", xoperands);
7430 else
7431 /* ??? This branch may not reach its target. */
7432 output_asm_insn ("nop\n\tb,n %0", xoperands);
7434 /* Delete the jump. */
7435 SET_INSN_DELETED (NEXT_INSN (insn));
7437 return "";
7440 /* Return the attribute length of the call instruction INSN. The SIBCALL
7441 flag indicates whether INSN is a regular call or a sibling call. The
7442 length returned must be longer than the code actually generated by
7443 output_call. Since branch shortening is done before delay branch
7444 sequencing, there is no way to determine whether or not the delay
7445 slot will be filled during branch shortening. Even when the delay
7446 slot is filled, we may have to add a nop if the delay slot contains
7447 a branch that can't reach its target. Thus, we always have to include
7448 the delay slot in the length estimate. This used to be done in
7449 pa_adjust_insn_length but we do it here now as some sequences always
7450 fill the delay slot and we can save four bytes in the estimate for
7451 these sequences. */
7454 attr_length_call (rtx insn, int sibcall)
7456 int local_call;
7457 rtx call, call_dest;
7458 tree call_decl;
7459 int length = 0;
7460 rtx pat = PATTERN (insn);
7461 unsigned long distance = -1;
7463 gcc_assert (GET_CODE (insn) == CALL_INSN);
7465 if (INSN_ADDRESSES_SET_P ())
7467 unsigned long total;
7469 total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7470 distance = (total + insn_current_reference_address (insn));
7471 if (distance < total)
7472 distance = -1;
7475 gcc_assert (GET_CODE (pat) == PARALLEL);
7477 /* Get the call rtx. */
7478 call = XVECEXP (pat, 0, 0);
7479 if (GET_CODE (call) == SET)
7480 call = SET_SRC (call);
7482 gcc_assert (GET_CODE (call) == CALL);
7484 /* Determine if this is a local call. */
7485 call_dest = XEXP (XEXP (call, 0), 0);
7486 call_decl = SYMBOL_REF_DECL (call_dest);
7487 local_call = call_decl && targetm.binds_local_p (call_decl);
7489 /* pc-relative branch. */
7490 if (!TARGET_LONG_CALLS
7491 && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7492 || distance < 240000))
7493 length += 8;
7495 /* 64-bit plabel sequence. */
7496 else if (TARGET_64BIT && !local_call)
7497 length += sibcall ? 28 : 24;
7499 /* non-pic long absolute branch sequence. */
7500 else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7501 length += 12;
7503 /* long pc-relative branch sequence. */
7504 else if (TARGET_LONG_PIC_SDIFF_CALL
7505 || (TARGET_GAS && !TARGET_SOM
7506 && (TARGET_LONG_PIC_PCREL_CALL || local_call)))
7508 length += 20;
7510 if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7511 length += 8;
7514 /* 32-bit plabel sequence. */
7515 else
7517 length += 32;
7519 if (TARGET_SOM)
7520 length += length_fp_args (insn);
7522 if (flag_pic)
7523 length += 4;
7525 if (!TARGET_PA_20)
7527 if (!sibcall)
7528 length += 8;
7530 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7531 length += 8;
7535 return length;
7538 /* INSN is a function call. It may have an unconditional jump
7539 in its delay slot.
7541 CALL_DEST is the routine we are calling. */
7543 const char *
7544 output_call (rtx insn, rtx call_dest, int sibcall)
7546 int delay_insn_deleted = 0;
7547 int delay_slot_filled = 0;
7548 int seq_length = dbr_sequence_length ();
7549 tree call_decl = SYMBOL_REF_DECL (call_dest);
7550 int local_call = call_decl && targetm.binds_local_p (call_decl);
7551 rtx xoperands[2];
7553 xoperands[0] = call_dest;
7555 /* Handle the common case where we're sure that the branch will reach
7556 the beginning of the "$CODE$" subspace. This is the beginning of
7557 the current function if we are in a named section. */
7558 if (!TARGET_LONG_CALLS && attr_length_call (insn, sibcall) == 8)
7560 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
7561 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7563 else
7565 if (TARGET_64BIT && !local_call)
7567 /* ??? As far as I can tell, the HP linker doesn't support the
7568 long pc-relative sequence described in the 64-bit runtime
7569 architecture. So, we use a slightly longer indirect call. */
7570 xoperands[0] = get_deferred_plabel (call_dest);
7571 xoperands[1] = gen_label_rtx ();
7573 /* If this isn't a sibcall, we put the load of %r27 into the
7574 delay slot. We can't do this in a sibcall as we don't
7575 have a second call-clobbered scratch register available. */
7576 if (seq_length != 0
7577 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7578 && !sibcall)
7580 final_scan_insn (NEXT_INSN (insn), asm_out_file,
7581 optimize, 0, NULL);
7583 /* Now delete the delay insn. */
7584 SET_INSN_DELETED (NEXT_INSN (insn));
7585 delay_insn_deleted = 1;
7588 output_asm_insn ("addil LT'%0,%%r27", xoperands);
7589 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
7590 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
7592 if (sibcall)
7594 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7595 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands);
7596 output_asm_insn ("bve (%%r1)", xoperands);
7598 else
7600 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
7601 output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
7602 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7603 delay_slot_filled = 1;
7606 else
7608 int indirect_call = 0;
7610 /* Emit a long call. There are several different sequences
7611 of increasing length and complexity. In most cases,
7612 they don't allow an instruction in the delay slot. */
7613 if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7614 && !TARGET_LONG_PIC_SDIFF_CALL
7615 && !(TARGET_GAS && !TARGET_SOM
7616 && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7617 && !TARGET_64BIT)
7618 indirect_call = 1;
7620 if (seq_length != 0
7621 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7622 && !sibcall
7623 && (!TARGET_PA_20
7624 || indirect_call
7625 || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)))
7627 /* A non-jump insn in the delay slot. By definition we can
7628 emit this insn before the call (and in fact before argument
7629 relocating. */
7630 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
7631 NULL);
7633 /* Now delete the delay insn. */
7634 SET_INSN_DELETED (NEXT_INSN (insn));
7635 delay_insn_deleted = 1;
7638 if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7640 /* This is the best sequence for making long calls in
7641 non-pic code. Unfortunately, GNU ld doesn't provide
7642 the stub needed for external calls, and GAS's support
7643 for this with the SOM linker is buggy. It is safe
7644 to use this for local calls. */
7645 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7646 if (sibcall)
7647 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
7648 else
7650 if (TARGET_PA_20)
7651 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
7652 xoperands);
7653 else
7654 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7656 output_asm_insn ("copy %%r31,%%r2", xoperands);
7657 delay_slot_filled = 1;
7660 else
7662 if (TARGET_LONG_PIC_SDIFF_CALL)
7664 /* The HP assembler and linker can handle relocations
7665 for the difference of two symbols. The HP assembler
7666 recognizes the sequence as a pc-relative call and
7667 the linker provides stubs when needed. */
7668 xoperands[1] = gen_label_rtx ();
7669 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7670 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7671 targetm.asm_out.internal_label (asm_out_file, "L",
7672 CODE_LABEL_NUMBER (xoperands[1]));
7673 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7675 else if (TARGET_GAS && !TARGET_SOM
7676 && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7678 /* GAS currently can't generate the relocations that
7679 are needed for the SOM linker under HP-UX using this
7680 sequence. The GNU linker doesn't generate the stubs
7681 that are needed for external calls on TARGET_ELF32
7682 with this sequence. For now, we have to use a
7683 longer plabel sequence when using GAS. */
7684 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7685 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1",
7686 xoperands);
7687 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1",
7688 xoperands);
7690 else
7692 /* Emit a long plabel-based call sequence. This is
7693 essentially an inline implementation of $$dyncall.
7694 We don't actually try to call $$dyncall as this is
7695 as difficult as calling the function itself. */
7696 xoperands[0] = get_deferred_plabel (call_dest);
7697 xoperands[1] = gen_label_rtx ();
7699 /* Since the call is indirect, FP arguments in registers
7700 need to be copied to the general registers. Then, the
7701 argument relocation stub will copy them back. */
7702 if (TARGET_SOM)
7703 copy_fp_args (insn);
7705 if (flag_pic)
7707 output_asm_insn ("addil LT'%0,%%r19", xoperands);
7708 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
7709 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands);
7711 else
7713 output_asm_insn ("addil LR'%0-$global$,%%r27",
7714 xoperands);
7715 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
7716 xoperands);
7719 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands);
7720 output_asm_insn ("depi 0,31,2,%%r1", xoperands);
7721 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands);
7722 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands);
7724 if (!sibcall && !TARGET_PA_20)
7726 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
7727 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
7728 output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
7729 else
7730 output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
7734 if (TARGET_PA_20)
7736 if (sibcall)
7737 output_asm_insn ("bve (%%r1)", xoperands);
7738 else
7740 if (indirect_call)
7742 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7743 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
7744 delay_slot_filled = 1;
7746 else
7747 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7750 else
7752 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7753 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
7754 xoperands);
7756 if (sibcall)
7758 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
7759 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
7760 else
7761 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
7763 else
7765 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
7766 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
7767 else
7768 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
7770 if (indirect_call)
7771 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
7772 else
7773 output_asm_insn ("copy %%r31,%%r2", xoperands);
7774 delay_slot_filled = 1;
7781 if (!delay_slot_filled && (seq_length == 0 || delay_insn_deleted))
7782 output_asm_insn ("nop", xoperands);
7784 /* We are done if there isn't a jump in the delay slot. */
7785 if (seq_length == 0
7786 || delay_insn_deleted
7787 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7788 return "";
7790 /* A sibcall should never have a branch in the delay slot. */
7791 gcc_assert (!sibcall);
7793 /* This call has an unconditional jump in its delay slot. */
7794 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7796 if (!delay_slot_filled && INSN_ADDRESSES_SET_P ())
7798 /* See if the return address can be adjusted. Use the containing
7799 sequence insn's address. This would break the regular call/return@
7800 relationship assumed by the table based eh unwinder, so only do that
7801 if the call is not possibly throwing. */
7802 rtx seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7803 int distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7804 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7806 if (VAL_14_BITS_P (distance)
7807 && !(can_throw_internal (insn) || can_throw_external (insn)))
7809 xoperands[1] = gen_label_rtx ();
7810 output_asm_insn ("ldo %0-%1(%%r2),%%r2", xoperands);
7811 targetm.asm_out.internal_label (asm_out_file, "L",
7812 CODE_LABEL_NUMBER (xoperands[1]));
7814 else
7815 output_asm_insn ("nop\n\tb,n %0", xoperands);
7817 else
7818 output_asm_insn ("b,n %0", xoperands);
7820 /* Delete the jump. */
7821 SET_INSN_DELETED (NEXT_INSN (insn));
7823 return "";
7826 /* Return the attribute length of the indirect call instruction INSN.
7827 The length must match the code generated by output_indirect call.
7828 The returned length includes the delay slot. Currently, the delay
7829 slot of an indirect call sequence is not exposed and it is used by
7830 the sequence itself. */
7833 attr_length_indirect_call (rtx insn)
7835 unsigned long distance = -1;
7836 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7838 if (INSN_ADDRESSES_SET_P ())
7840 distance = (total + insn_current_reference_address (insn));
7841 if (distance < total)
7842 distance = -1;
7845 if (TARGET_64BIT)
7846 return 12;
7848 if (TARGET_FAST_INDIRECT_CALLS
7849 || (!TARGET_PORTABLE_RUNTIME
7850 && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
7851 || distance < 240000)))
7852 return 8;
7854 if (flag_pic)
7855 return 24;
7857 if (TARGET_PORTABLE_RUNTIME)
7858 return 20;
7860 /* Out of reach, can use ble. */
7861 return 12;
7864 const char *
7865 output_indirect_call (rtx insn, rtx call_dest)
7867 rtx xoperands[1];
7869 if (TARGET_64BIT)
7871 xoperands[0] = call_dest;
7872 output_asm_insn ("ldd 16(%0),%%r2", xoperands);
7873 output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands);
7874 return "";
7877 /* First the special case for kernels, level 0 systems, etc. */
7878 if (TARGET_FAST_INDIRECT_CALLS)
7879 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
7881 /* Now the normal case -- we can reach $$dyncall directly or
7882 we're sure that we can get there via a long-branch stub.
7884 No need to check target flags as the length uniquely identifies
7885 the remaining cases. */
7886 if (attr_length_indirect_call (insn) == 8)
7888 /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
7889 $$dyncall. Since BLE uses %r31 as the link register, the 22-bit
7890 variant of the B,L instruction can't be used on the SOM target. */
7891 if (TARGET_PA_20 && !TARGET_SOM)
7892 return ".CALL\tARGW0=GR\n\tb,l $$dyncall,%%r2\n\tcopy %%r2,%%r31";
7893 else
7894 return ".CALL\tARGW0=GR\n\tbl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
7897 /* Long millicode call, but we are not generating PIC or portable runtime
7898 code. */
7899 if (attr_length_indirect_call (insn) == 12)
7900 return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
7902 /* Long millicode call for portable runtime. */
7903 if (attr_length_indirect_call (insn) == 20)
7904 return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)\n\tnop";
7906 /* We need a long PIC call to $$dyncall. */
7907 xoperands[0] = NULL_RTX;
7908 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7909 if (TARGET_SOM || !TARGET_GAS)
7911 xoperands[0] = gen_label_rtx ();
7912 output_asm_insn ("addil L'$$dyncall-%0,%%r1", xoperands);
7913 targetm.asm_out.internal_label (asm_out_file, "L",
7914 CODE_LABEL_NUMBER (xoperands[0]));
7915 output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands);
7917 else
7919 output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r1", xoperands);
7920 output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
7921 xoperands);
7923 output_asm_insn ("blr %%r0,%%r2", xoperands);
7924 output_asm_insn ("bv,n %%r0(%%r1)\n\tnop", xoperands);
7925 return "";
7928 /* Return the total length of the save and restore instructions needed for
7929 the data linkage table pointer (i.e., the PIC register) across the call
7930 instruction INSN. No-return calls do not require a save and restore.
7931 In addition, we may be able to avoid the save and restore for calls
7932 within the same translation unit. */
7935 attr_length_save_restore_dltp (rtx insn)
7937 if (find_reg_note (insn, REG_NORETURN, NULL_RTX))
7938 return 0;
7940 return 8;
7943 /* In HPUX 8.0's shared library scheme, special relocations are needed
7944 for function labels if they might be passed to a function
7945 in a shared library (because shared libraries don't live in code
7946 space), and special magic is needed to construct their address. */
7948 void
7949 hppa_encode_label (rtx sym)
7951 const char *str = XSTR (sym, 0);
7952 int len = strlen (str) + 1;
7953 char *newstr, *p;
7955 p = newstr = XALLOCAVEC (char, len + 1);
7956 *p++ = '@';
7957 strcpy (p, str);
7959 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
7962 static void
7963 pa_encode_section_info (tree decl, rtx rtl, int first)
7965 int old_referenced = 0;
7967 if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF)
7968 old_referenced
7969 = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED;
7971 default_encode_section_info (decl, rtl, first);
7973 if (first && TEXT_SPACE_P (decl))
7975 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
7976 if (TREE_CODE (decl) == FUNCTION_DECL)
7977 hppa_encode_label (XEXP (rtl, 0));
7979 else if (old_referenced)
7980 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced;
7983 /* This is sort of inverse to pa_encode_section_info. */
7985 static const char *
7986 pa_strip_name_encoding (const char *str)
7988 str += (*str == '@');
7989 str += (*str == '*');
7990 return str;
7994 function_label_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7996 return GET_CODE (op) == SYMBOL_REF && FUNCTION_NAME_P (XSTR (op, 0));
7999 /* Returns 1 if OP is a function label involved in a simple addition
8000 with a constant. Used to keep certain patterns from matching
8001 during instruction combination. */
8003 is_function_label_plus_const (rtx op)
8005 /* Strip off any CONST. */
8006 if (GET_CODE (op) == CONST)
8007 op = XEXP (op, 0);
8009 return (GET_CODE (op) == PLUS
8010 && function_label_operand (XEXP (op, 0), Pmode)
8011 && GET_CODE (XEXP (op, 1)) == CONST_INT);
8014 /* Output assembly code for a thunk to FUNCTION. */
8016 static void
8017 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
8018 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
8019 tree function)
8021 static unsigned int current_thunk_number;
8022 int val_14 = VAL_14_BITS_P (delta);
8023 unsigned int old_last_address = last_address, nbytes = 0;
8024 char label[16];
8025 rtx xoperands[4];
8027 xoperands[0] = XEXP (DECL_RTL (function), 0);
8028 xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
8029 xoperands[2] = GEN_INT (delta);
8031 ASM_OUTPUT_LABEL (file, XSTR (xoperands[1], 0));
8032 fprintf (file, "\t.PROC\n\t.CALLINFO FRAME=0,NO_CALLS\n\t.ENTRY\n");
8034 /* Output the thunk. We know that the function is in the same
8035 translation unit (i.e., the same space) as the thunk, and that
8036 thunks are output after their method. Thus, we don't need an
8037 external branch to reach the function. With SOM and GAS,
8038 functions and thunks are effectively in different sections.
8039 Thus, we can always use a IA-relative branch and the linker
8040 will add a long branch stub if necessary.
8042 However, we have to be careful when generating PIC code on the
8043 SOM port to ensure that the sequence does not transfer to an
8044 import stub for the target function as this could clobber the
8045 return value saved at SP-24. This would also apply to the
8046 32-bit linux port if the multi-space model is implemented. */
8047 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8048 && !(flag_pic && TREE_PUBLIC (function))
8049 && (TARGET_GAS || last_address < 262132))
8050 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8051 && ((targetm.have_named_sections
8052 && DECL_SECTION_NAME (thunk_fndecl) != NULL
8053 /* The GNU 64-bit linker has rather poor stub management.
8054 So, we use a long branch from thunks that aren't in
8055 the same section as the target function. */
8056 && ((!TARGET_64BIT
8057 && (DECL_SECTION_NAME (thunk_fndecl)
8058 != DECL_SECTION_NAME (function)))
8059 || ((DECL_SECTION_NAME (thunk_fndecl)
8060 == DECL_SECTION_NAME (function))
8061 && last_address < 262132)))
8062 || (targetm.have_named_sections
8063 && DECL_SECTION_NAME (thunk_fndecl) == NULL
8064 && DECL_SECTION_NAME (function) == NULL
8065 && last_address < 262132)
8066 || (!targetm.have_named_sections && last_address < 262132))))
8068 if (!val_14)
8069 output_asm_insn ("addil L'%2,%%r26", xoperands);
8071 output_asm_insn ("b %0", xoperands);
8073 if (val_14)
8075 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8076 nbytes += 8;
8078 else
8080 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8081 nbytes += 12;
8084 else if (TARGET_64BIT)
8086 /* We only have one call-clobbered scratch register, so we can't
8087 make use of the delay slot if delta doesn't fit in 14 bits. */
8088 if (!val_14)
8090 output_asm_insn ("addil L'%2,%%r26", xoperands);
8091 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8094 output_asm_insn ("b,l .+8,%%r1", xoperands);
8096 if (TARGET_GAS)
8098 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
8099 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
8101 else
8103 xoperands[3] = GEN_INT (val_14 ? 8 : 16);
8104 output_asm_insn ("addil L'%0-%1-%3,%%r1", xoperands);
8107 if (val_14)
8109 output_asm_insn ("bv %%r0(%%r1)", xoperands);
8110 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8111 nbytes += 20;
8113 else
8115 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8116 nbytes += 24;
8119 else if (TARGET_PORTABLE_RUNTIME)
8121 output_asm_insn ("ldil L'%0,%%r1", xoperands);
8122 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
8124 if (!val_14)
8125 output_asm_insn ("addil L'%2,%%r26", xoperands);
8127 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8129 if (val_14)
8131 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8132 nbytes += 16;
8134 else
8136 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8137 nbytes += 20;
8140 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8142 /* The function is accessible from outside this module. The only
8143 way to avoid an import stub between the thunk and function is to
8144 call the function directly with an indirect sequence similar to
8145 that used by $$dyncall. This is possible because $$dyncall acts
8146 as the import stub in an indirect call. */
8147 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8148 xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8149 output_asm_insn ("addil LT'%3,%%r19", xoperands);
8150 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8151 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8152 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8153 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8154 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8155 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8157 if (!val_14)
8159 output_asm_insn ("addil L'%2,%%r26", xoperands);
8160 nbytes += 4;
8163 if (TARGET_PA_20)
8165 output_asm_insn ("bve (%%r22)", xoperands);
8166 nbytes += 36;
8168 else if (TARGET_NO_SPACE_REGS)
8170 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
8171 nbytes += 36;
8173 else
8175 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8176 output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8177 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
8178 nbytes += 44;
8181 if (val_14)
8182 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8183 else
8184 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8186 else if (flag_pic)
8188 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
8190 if (TARGET_SOM || !TARGET_GAS)
8192 output_asm_insn ("addil L'%0-%1-8,%%r1", xoperands);
8193 output_asm_insn ("ldo R'%0-%1-8(%%r1),%%r22", xoperands);
8195 else
8197 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
8198 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r22", xoperands);
8201 if (!val_14)
8202 output_asm_insn ("addil L'%2,%%r26", xoperands);
8204 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8206 if (val_14)
8208 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8209 nbytes += 20;
8211 else
8213 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8214 nbytes += 24;
8217 else
8219 if (!val_14)
8220 output_asm_insn ("addil L'%2,%%r26", xoperands);
8222 output_asm_insn ("ldil L'%0,%%r22", xoperands);
8223 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8225 if (val_14)
8227 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8228 nbytes += 12;
8230 else
8232 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8233 nbytes += 16;
8237 fprintf (file, "\t.EXIT\n\t.PROCEND\n");
8239 if (TARGET_SOM && TARGET_GAS)
8241 /* We done with this subspace except possibly for some additional
8242 debug information. Forget that we are in this subspace to ensure
8243 that the next function is output in its own subspace. */
8244 in_section = NULL;
8245 cfun->machine->in_nsubspa = 2;
8248 if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8250 switch_to_section (data_section);
8251 output_asm_insn (".align 4", xoperands);
8252 ASM_OUTPUT_LABEL (file, label);
8253 output_asm_insn (".word P'%0", xoperands);
8256 current_thunk_number++;
8257 nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
8258 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
8259 last_address += nbytes;
8260 if (old_last_address > last_address)
8261 last_address = UINT_MAX;
8262 update_total_code_bytes (nbytes);
8265 /* Only direct calls to static functions are allowed to be sibling (tail)
8266 call optimized.
8268 This restriction is necessary because some linker generated stubs will
8269 store return pointers into rp' in some cases which might clobber a
8270 live value already in rp'.
8272 In a sibcall the current function and the target function share stack
8273 space. Thus if the path to the current function and the path to the
8274 target function save a value in rp', they save the value into the
8275 same stack slot, which has undesirable consequences.
8277 Because of the deferred binding nature of shared libraries any function
8278 with external scope could be in a different load module and thus require
8279 rp' to be saved when calling that function. So sibcall optimizations
8280 can only be safe for static function.
8282 Note that GCC never needs return value relocations, so we don't have to
8283 worry about static calls with return value relocations (which require
8284 saving rp').
8286 It is safe to perform a sibcall optimization when the target function
8287 will never return. */
8288 static bool
8289 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8291 if (TARGET_PORTABLE_RUNTIME)
8292 return false;
8294 /* Sibcalls are ok for TARGET_ELF32 as along as the linker is used in
8295 single subspace mode and the call is not indirect. As far as I know,
8296 there is no operating system support for the multiple subspace mode.
8297 It might be possible to support indirect calls if we didn't use
8298 $$dyncall (see the indirect sequence generated in output_call). */
8299 if (TARGET_ELF32)
8300 return (decl != NULL_TREE);
8302 /* Sibcalls are not ok because the arg pointer register is not a fixed
8303 register. This prevents the sibcall optimization from occurring. In
8304 addition, there are problems with stub placement using GNU ld. This
8305 is because a normal sibcall branch uses a 17-bit relocation while
8306 a regular call branch uses a 22-bit relocation. As a result, more
8307 care needs to be taken in the placement of long-branch stubs. */
8308 if (TARGET_64BIT)
8309 return false;
8311 /* Sibcalls are only ok within a translation unit. */
8312 return (decl && !TREE_PUBLIC (decl));
8315 /* ??? Addition is not commutative on the PA due to the weird implicit
8316 space register selection rules for memory addresses. Therefore, we
8317 don't consider a + b == b + a, as this might be inside a MEM. */
8318 static bool
8319 pa_commutative_p (const_rtx x, int outer_code)
8321 return (COMMUTATIVE_P (x)
8322 && (TARGET_NO_SPACE_REGS
8323 || (outer_code != UNKNOWN && outer_code != MEM)
8324 || GET_CODE (x) != PLUS));
8327 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8328 use in fmpyadd instructions. */
8330 fmpyaddoperands (rtx *operands)
8332 enum machine_mode mode = GET_MODE (operands[0]);
8334 /* Must be a floating point mode. */
8335 if (mode != SFmode && mode != DFmode)
8336 return 0;
8338 /* All modes must be the same. */
8339 if (! (mode == GET_MODE (operands[1])
8340 && mode == GET_MODE (operands[2])
8341 && mode == GET_MODE (operands[3])
8342 && mode == GET_MODE (operands[4])
8343 && mode == GET_MODE (operands[5])))
8344 return 0;
8346 /* All operands must be registers. */
8347 if (! (GET_CODE (operands[1]) == REG
8348 && GET_CODE (operands[2]) == REG
8349 && GET_CODE (operands[3]) == REG
8350 && GET_CODE (operands[4]) == REG
8351 && GET_CODE (operands[5]) == REG))
8352 return 0;
8354 /* Only 2 real operands to the addition. One of the input operands must
8355 be the same as the output operand. */
8356 if (! rtx_equal_p (operands[3], operands[4])
8357 && ! rtx_equal_p (operands[3], operands[5]))
8358 return 0;
8360 /* Inout operand of add cannot conflict with any operands from multiply. */
8361 if (rtx_equal_p (operands[3], operands[0])
8362 || rtx_equal_p (operands[3], operands[1])
8363 || rtx_equal_p (operands[3], operands[2]))
8364 return 0;
8366 /* multiply cannot feed into addition operands. */
8367 if (rtx_equal_p (operands[4], operands[0])
8368 || rtx_equal_p (operands[5], operands[0]))
8369 return 0;
8371 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8372 if (mode == SFmode
8373 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8374 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8375 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8376 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8377 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8378 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8379 return 0;
8381 /* Passed. Operands are suitable for fmpyadd. */
8382 return 1;
8385 #if !defined(USE_COLLECT2)
8386 static void
8387 pa_asm_out_constructor (rtx symbol, int priority)
8389 if (!function_label_operand (symbol, VOIDmode))
8390 hppa_encode_label (symbol);
8392 #ifdef CTORS_SECTION_ASM_OP
8393 default_ctor_section_asm_out_constructor (symbol, priority);
8394 #else
8395 # ifdef TARGET_ASM_NAMED_SECTION
8396 default_named_section_asm_out_constructor (symbol, priority);
8397 # else
8398 default_stabs_asm_out_constructor (symbol, priority);
8399 # endif
8400 #endif
8403 static void
8404 pa_asm_out_destructor (rtx symbol, int priority)
8406 if (!function_label_operand (symbol, VOIDmode))
8407 hppa_encode_label (symbol);
8409 #ifdef DTORS_SECTION_ASM_OP
8410 default_dtor_section_asm_out_destructor (symbol, priority);
8411 #else
8412 # ifdef TARGET_ASM_NAMED_SECTION
8413 default_named_section_asm_out_destructor (symbol, priority);
8414 # else
8415 default_stabs_asm_out_destructor (symbol, priority);
8416 # endif
8417 #endif
8419 #endif
8421 /* This function places uninitialized global data in the bss section.
8422 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
8423 function on the SOM port to prevent uninitialized global data from
8424 being placed in the data section. */
8426 void
8427 pa_asm_output_aligned_bss (FILE *stream,
8428 const char *name,
8429 unsigned HOST_WIDE_INT size,
8430 unsigned int align)
8432 switch_to_section (bss_section);
8433 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8435 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
8436 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
8437 #endif
8439 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
8440 ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
8441 #endif
8443 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8444 ASM_OUTPUT_LABEL (stream, name);
8445 fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8448 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
8449 that doesn't allow the alignment of global common storage to be directly
8450 specified. The SOM linker aligns common storage based on the rounded
8451 value of the NUM_BYTES parameter in the .comm directive. It's not
8452 possible to use the .align directive as it doesn't affect the alignment
8453 of the label associated with a .comm directive. */
8455 void
8456 pa_asm_output_aligned_common (FILE *stream,
8457 const char *name,
8458 unsigned HOST_WIDE_INT size,
8459 unsigned int align)
8461 unsigned int max_common_align;
8463 max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
8464 if (align > max_common_align)
8466 warning (0, "alignment (%u) for %s exceeds maximum alignment "
8467 "for global common data. Using %u",
8468 align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT);
8469 align = max_common_align;
8472 switch_to_section (bss_section);
8474 assemble_name (stream, name);
8475 fprintf (stream, "\t.comm "HOST_WIDE_INT_PRINT_UNSIGNED"\n",
8476 MAX (size, align / BITS_PER_UNIT));
8479 /* We can't use .comm for local common storage as the SOM linker effectively
8480 treats the symbol as universal and uses the same storage for local symbols
8481 with the same name in different object files. The .block directive
8482 reserves an uninitialized block of storage. However, it's not common
8483 storage. Fortunately, GCC never requests common storage with the same
8484 name in any given translation unit. */
8486 void
8487 pa_asm_output_aligned_local (FILE *stream,
8488 const char *name,
8489 unsigned HOST_WIDE_INT size,
8490 unsigned int align)
8492 switch_to_section (bss_section);
8493 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8495 #ifdef LOCAL_ASM_OP
8496 fprintf (stream, "%s", LOCAL_ASM_OP);
8497 assemble_name (stream, name);
8498 fprintf (stream, "\n");
8499 #endif
8501 ASM_OUTPUT_LABEL (stream, name);
8502 fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8505 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8506 use in fmpysub instructions. */
8508 fmpysuboperands (rtx *operands)
8510 enum machine_mode mode = GET_MODE (operands[0]);
8512 /* Must be a floating point mode. */
8513 if (mode != SFmode && mode != DFmode)
8514 return 0;
8516 /* All modes must be the same. */
8517 if (! (mode == GET_MODE (operands[1])
8518 && mode == GET_MODE (operands[2])
8519 && mode == GET_MODE (operands[3])
8520 && mode == GET_MODE (operands[4])
8521 && mode == GET_MODE (operands[5])))
8522 return 0;
8524 /* All operands must be registers. */
8525 if (! (GET_CODE (operands[1]) == REG
8526 && GET_CODE (operands[2]) == REG
8527 && GET_CODE (operands[3]) == REG
8528 && GET_CODE (operands[4]) == REG
8529 && GET_CODE (operands[5]) == REG))
8530 return 0;
8532 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
8533 operation, so operands[4] must be the same as operand[3]. */
8534 if (! rtx_equal_p (operands[3], operands[4]))
8535 return 0;
8537 /* multiply cannot feed into subtraction. */
8538 if (rtx_equal_p (operands[5], operands[0]))
8539 return 0;
8541 /* Inout operand of sub cannot conflict with any operands from multiply. */
8542 if (rtx_equal_p (operands[3], operands[0])
8543 || rtx_equal_p (operands[3], operands[1])
8544 || rtx_equal_p (operands[3], operands[2]))
8545 return 0;
8547 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8548 if (mode == SFmode
8549 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8550 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8551 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8552 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8553 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8554 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8555 return 0;
8557 /* Passed. Operands are suitable for fmpysub. */
8558 return 1;
8561 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
8562 constants for shadd instructions. */
8564 shadd_constant_p (int val)
8566 if (val == 2 || val == 4 || val == 8)
8567 return 1;
8568 else
8569 return 0;
8572 /* Return 1 if OP is valid as a base or index register in a
8573 REG+REG address. */
8576 borx_reg_operand (rtx op, enum machine_mode mode)
8578 if (GET_CODE (op) != REG)
8579 return 0;
8581 /* We must reject virtual registers as the only expressions that
8582 can be instantiated are REG and REG+CONST. */
8583 if (op == virtual_incoming_args_rtx
8584 || op == virtual_stack_vars_rtx
8585 || op == virtual_stack_dynamic_rtx
8586 || op == virtual_outgoing_args_rtx
8587 || op == virtual_cfa_rtx)
8588 return 0;
8590 /* While it's always safe to index off the frame pointer, it's not
8591 profitable to do so when the frame pointer is being eliminated. */
8592 if (!reload_completed
8593 && flag_omit_frame_pointer
8594 && !cfun->calls_alloca
8595 && op == frame_pointer_rtx)
8596 return 0;
8598 return register_operand (op, mode);
8601 /* Return 1 if this operand is anything other than a hard register. */
8604 non_hard_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8606 return ! (GET_CODE (op) == REG && REGNO (op) < FIRST_PSEUDO_REGISTER);
8609 /* Return TRUE if INSN branches forward. */
8611 static bool
8612 forward_branch_p (rtx insn)
8614 rtx lab = JUMP_LABEL (insn);
8616 /* The INSN must have a jump label. */
8617 gcc_assert (lab != NULL_RTX);
8619 if (INSN_ADDRESSES_SET_P ())
8620 return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn));
8622 while (insn)
8624 if (insn == lab)
8625 return true;
8626 else
8627 insn = NEXT_INSN (insn);
8630 return false;
8633 /* Return 1 if OP is an equality comparison, else return 0. */
8635 eq_neq_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8637 return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
8640 /* Return 1 if INSN is in the delay slot of a call instruction. */
8642 jump_in_call_delay (rtx insn)
8645 if (GET_CODE (insn) != JUMP_INSN)
8646 return 0;
8648 if (PREV_INSN (insn)
8649 && PREV_INSN (PREV_INSN (insn))
8650 && GET_CODE (next_real_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
8652 rtx test_insn = next_real_insn (PREV_INSN (PREV_INSN (insn)));
8654 return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
8655 && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
8658 else
8659 return 0;
8662 /* Output an unconditional move and branch insn. */
8664 const char *
8665 output_parallel_movb (rtx *operands, rtx insn)
8667 int length = get_attr_length (insn);
8669 /* These are the cases in which we win. */
8670 if (length == 4)
8671 return "mov%I1b,tr %1,%0,%2";
8673 /* None of the following cases win, but they don't lose either. */
8674 if (length == 8)
8676 if (dbr_sequence_length () == 0)
8678 /* Nothing in the delay slot, fake it by putting the combined
8679 insn (the copy or add) in the delay slot of a bl. */
8680 if (GET_CODE (operands[1]) == CONST_INT)
8681 return "b %2\n\tldi %1,%0";
8682 else
8683 return "b %2\n\tcopy %1,%0";
8685 else
8687 /* Something in the delay slot, but we've got a long branch. */
8688 if (GET_CODE (operands[1]) == CONST_INT)
8689 return "ldi %1,%0\n\tb %2";
8690 else
8691 return "copy %1,%0\n\tb %2";
8695 if (GET_CODE (operands[1]) == CONST_INT)
8696 output_asm_insn ("ldi %1,%0", operands);
8697 else
8698 output_asm_insn ("copy %1,%0", operands);
8699 return output_lbranch (operands[2], insn, 1);
8702 /* Output an unconditional add and branch insn. */
8704 const char *
8705 output_parallel_addb (rtx *operands, rtx insn)
8707 int length = get_attr_length (insn);
8709 /* To make life easy we want operand0 to be the shared input/output
8710 operand and operand1 to be the readonly operand. */
8711 if (operands[0] == operands[1])
8712 operands[1] = operands[2];
8714 /* These are the cases in which we win. */
8715 if (length == 4)
8716 return "add%I1b,tr %1,%0,%3";
8718 /* None of the following cases win, but they don't lose either. */
8719 if (length == 8)
8721 if (dbr_sequence_length () == 0)
8722 /* Nothing in the delay slot, fake it by putting the combined
8723 insn (the copy or add) in the delay slot of a bl. */
8724 return "b %3\n\tadd%I1 %1,%0,%0";
8725 else
8726 /* Something in the delay slot, but we've got a long branch. */
8727 return "add%I1 %1,%0,%0\n\tb %3";
8730 output_asm_insn ("add%I1 %1,%0,%0", operands);
8731 return output_lbranch (operands[3], insn, 1);
8734 /* Return nonzero if INSN (a jump insn) immediately follows a call
8735 to a named function. This is used to avoid filling the delay slot
8736 of the jump since it can usually be eliminated by modifying RP in
8737 the delay slot of the call. */
8740 following_call (rtx insn)
8742 if (! TARGET_JUMP_IN_DELAY)
8743 return 0;
8745 /* Find the previous real insn, skipping NOTEs. */
8746 insn = PREV_INSN (insn);
8747 while (insn && GET_CODE (insn) == NOTE)
8748 insn = PREV_INSN (insn);
8750 /* Check for CALL_INSNs and millicode calls. */
8751 if (insn
8752 && ((GET_CODE (insn) == CALL_INSN
8753 && get_attr_type (insn) != TYPE_DYNCALL)
8754 || (GET_CODE (insn) == INSN
8755 && GET_CODE (PATTERN (insn)) != SEQUENCE
8756 && GET_CODE (PATTERN (insn)) != USE
8757 && GET_CODE (PATTERN (insn)) != CLOBBER
8758 && get_attr_type (insn) == TYPE_MILLI)))
8759 return 1;
8761 return 0;
8764 /* We use this hook to perform a PA specific optimization which is difficult
8765 to do in earlier passes.
8767 We want the delay slots of branches within jump tables to be filled.
8768 None of the compiler passes at the moment even has the notion that a
8769 PA jump table doesn't contain addresses, but instead contains actual
8770 instructions!
8772 Because we actually jump into the table, the addresses of each entry
8773 must stay constant in relation to the beginning of the table (which
8774 itself must stay constant relative to the instruction to jump into
8775 it). I don't believe we can guarantee earlier passes of the compiler
8776 will adhere to those rules.
8778 So, late in the compilation process we find all the jump tables, and
8779 expand them into real code -- e.g. each entry in the jump table vector
8780 will get an appropriate label followed by a jump to the final target.
8782 Reorg and the final jump pass can then optimize these branches and
8783 fill their delay slots. We end up with smaller, more efficient code.
8785 The jump instructions within the table are special; we must be able
8786 to identify them during assembly output (if the jumps don't get filled
8787 we need to emit a nop rather than nullifying the delay slot)). We
8788 identify jumps in switch tables by using insns with the attribute
8789 type TYPE_BTABLE_BRANCH.
8791 We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB
8792 insns. This serves two purposes, first it prevents jump.c from
8793 noticing that the last N entries in the table jump to the instruction
8794 immediately after the table and deleting the jumps. Second, those
8795 insns mark where we should emit .begin_brtab and .end_brtab directives
8796 when using GAS (allows for better link time optimizations). */
8798 static void
8799 pa_reorg (void)
8801 rtx insn;
8803 remove_useless_addtr_insns (1);
8805 if (pa_cpu < PROCESSOR_8000)
8806 pa_combine_instructions ();
8809 /* This is fairly cheap, so always run it if optimizing. */
8810 if (optimize > 0 && !TARGET_BIG_SWITCH)
8812 /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns. */
8813 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8815 rtx pattern, tmp, location, label;
8816 unsigned int length, i;
8818 /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode. */
8819 if (GET_CODE (insn) != JUMP_INSN
8820 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8821 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8822 continue;
8824 /* Emit marker for the beginning of the branch table. */
8825 emit_insn_before (gen_begin_brtab (), insn);
8827 pattern = PATTERN (insn);
8828 location = PREV_INSN (insn);
8829 length = XVECLEN (pattern, GET_CODE (pattern) == ADDR_DIFF_VEC);
8831 for (i = 0; i < length; i++)
8833 /* Emit a label before each jump to keep jump.c from
8834 removing this code. */
8835 tmp = gen_label_rtx ();
8836 LABEL_NUSES (tmp) = 1;
8837 emit_label_after (tmp, location);
8838 location = NEXT_INSN (location);
8840 if (GET_CODE (pattern) == ADDR_VEC)
8841 label = XEXP (XVECEXP (pattern, 0, i), 0);
8842 else
8843 label = XEXP (XVECEXP (pattern, 1, i), 0);
8845 tmp = gen_short_jump (label);
8847 /* Emit the jump itself. */
8848 tmp = emit_jump_insn_after (tmp, location);
8849 JUMP_LABEL (tmp) = label;
8850 LABEL_NUSES (label)++;
8851 location = NEXT_INSN (location);
8853 /* Emit a BARRIER after the jump. */
8854 emit_barrier_after (location);
8855 location = NEXT_INSN (location);
8858 /* Emit marker for the end of the branch table. */
8859 emit_insn_before (gen_end_brtab (), location);
8860 location = NEXT_INSN (location);
8861 emit_barrier_after (location);
8863 /* Delete the ADDR_VEC or ADDR_DIFF_VEC. */
8864 delete_insn (insn);
8867 else
8869 /* Still need brtab marker insns. FIXME: the presence of these
8870 markers disables output of the branch table to readonly memory,
8871 and any alignment directives that might be needed. Possibly,
8872 the begin_brtab insn should be output before the label for the
8873 table. This doesn't matter at the moment since the tables are
8874 always output in the text section. */
8875 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8877 /* Find an ADDR_VEC insn. */
8878 if (GET_CODE (insn) != JUMP_INSN
8879 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8880 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8881 continue;
8883 /* Now generate markers for the beginning and end of the
8884 branch table. */
8885 emit_insn_before (gen_begin_brtab (), insn);
8886 emit_insn_after (gen_end_brtab (), insn);
8891 /* The PA has a number of odd instructions which can perform multiple
8892 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
8893 it may be profitable to combine two instructions into one instruction
8894 with two outputs. It's not profitable PA2.0 machines because the
8895 two outputs would take two slots in the reorder buffers.
8897 This routine finds instructions which can be combined and combines
8898 them. We only support some of the potential combinations, and we
8899 only try common ways to find suitable instructions.
8901 * addb can add two registers or a register and a small integer
8902 and jump to a nearby (+-8k) location. Normally the jump to the
8903 nearby location is conditional on the result of the add, but by
8904 using the "true" condition we can make the jump unconditional.
8905 Thus addb can perform two independent operations in one insn.
8907 * movb is similar to addb in that it can perform a reg->reg
8908 or small immediate->reg copy and jump to a nearby (+-8k location).
8910 * fmpyadd and fmpysub can perform a FP multiply and either an
8911 FP add or FP sub if the operands of the multiply and add/sub are
8912 independent (there are other minor restrictions). Note both
8913 the fmpy and fadd/fsub can in theory move to better spots according
8914 to data dependencies, but for now we require the fmpy stay at a
8915 fixed location.
8917 * Many of the memory operations can perform pre & post updates
8918 of index registers. GCC's pre/post increment/decrement addressing
8919 is far too simple to take advantage of all the possibilities. This
8920 pass may not be suitable since those insns may not be independent.
8922 * comclr can compare two ints or an int and a register, nullify
8923 the following instruction and zero some other register. This
8924 is more difficult to use as it's harder to find an insn which
8925 will generate a comclr than finding something like an unconditional
8926 branch. (conditional moves & long branches create comclr insns).
8928 * Most arithmetic operations can conditionally skip the next
8929 instruction. They can be viewed as "perform this operation
8930 and conditionally jump to this nearby location" (where nearby
8931 is an insns away). These are difficult to use due to the
8932 branch length restrictions. */
8934 static void
8935 pa_combine_instructions (void)
8937 rtx anchor, new_rtx;
8939 /* This can get expensive since the basic algorithm is on the
8940 order of O(n^2) (or worse). Only do it for -O2 or higher
8941 levels of optimization. */
8942 if (optimize < 2)
8943 return;
8945 /* Walk down the list of insns looking for "anchor" insns which
8946 may be combined with "floating" insns. As the name implies,
8947 "anchor" instructions don't move, while "floating" insns may
8948 move around. */
8949 new_rtx = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
8950 new_rtx = make_insn_raw (new_rtx);
8952 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
8954 enum attr_pa_combine_type anchor_attr;
8955 enum attr_pa_combine_type floater_attr;
8957 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
8958 Also ignore any special USE insns. */
8959 if ((GET_CODE (anchor) != INSN
8960 && GET_CODE (anchor) != JUMP_INSN
8961 && GET_CODE (anchor) != CALL_INSN)
8962 || GET_CODE (PATTERN (anchor)) == USE
8963 || GET_CODE (PATTERN (anchor)) == CLOBBER
8964 || GET_CODE (PATTERN (anchor)) == ADDR_VEC
8965 || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC)
8966 continue;
8968 anchor_attr = get_attr_pa_combine_type (anchor);
8969 /* See if anchor is an insn suitable for combination. */
8970 if (anchor_attr == PA_COMBINE_TYPE_FMPY
8971 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
8972 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
8973 && ! forward_branch_p (anchor)))
8975 rtx floater;
8977 for (floater = PREV_INSN (anchor);
8978 floater;
8979 floater = PREV_INSN (floater))
8981 if (GET_CODE (floater) == NOTE
8982 || (GET_CODE (floater) == INSN
8983 && (GET_CODE (PATTERN (floater)) == USE
8984 || GET_CODE (PATTERN (floater)) == CLOBBER)))
8985 continue;
8987 /* Anything except a regular INSN will stop our search. */
8988 if (GET_CODE (floater) != INSN
8989 || GET_CODE (PATTERN (floater)) == ADDR_VEC
8990 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
8992 floater = NULL_RTX;
8993 break;
8996 /* See if FLOATER is suitable for combination with the
8997 anchor. */
8998 floater_attr = get_attr_pa_combine_type (floater);
8999 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9000 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9001 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9002 && floater_attr == PA_COMBINE_TYPE_FMPY))
9004 /* If ANCHOR and FLOATER can be combined, then we're
9005 done with this pass. */
9006 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9007 SET_DEST (PATTERN (floater)),
9008 XEXP (SET_SRC (PATTERN (floater)), 0),
9009 XEXP (SET_SRC (PATTERN (floater)), 1)))
9010 break;
9013 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9014 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
9016 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
9018 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9019 SET_DEST (PATTERN (floater)),
9020 XEXP (SET_SRC (PATTERN (floater)), 0),
9021 XEXP (SET_SRC (PATTERN (floater)), 1)))
9022 break;
9024 else
9026 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9027 SET_DEST (PATTERN (floater)),
9028 SET_SRC (PATTERN (floater)),
9029 SET_SRC (PATTERN (floater))))
9030 break;
9035 /* If we didn't find anything on the backwards scan try forwards. */
9036 if (!floater
9037 && (anchor_attr == PA_COMBINE_TYPE_FMPY
9038 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
9040 for (floater = anchor; floater; floater = NEXT_INSN (floater))
9042 if (GET_CODE (floater) == NOTE
9043 || (GET_CODE (floater) == INSN
9044 && (GET_CODE (PATTERN (floater)) == USE
9045 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9047 continue;
9049 /* Anything except a regular INSN will stop our search. */
9050 if (GET_CODE (floater) != INSN
9051 || GET_CODE (PATTERN (floater)) == ADDR_VEC
9052 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
9054 floater = NULL_RTX;
9055 break;
9058 /* See if FLOATER is suitable for combination with the
9059 anchor. */
9060 floater_attr = get_attr_pa_combine_type (floater);
9061 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9062 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9063 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9064 && floater_attr == PA_COMBINE_TYPE_FMPY))
9066 /* If ANCHOR and FLOATER can be combined, then we're
9067 done with this pass. */
9068 if (pa_can_combine_p (new_rtx, anchor, floater, 1,
9069 SET_DEST (PATTERN (floater)),
9070 XEXP (SET_SRC (PATTERN (floater)),
9072 XEXP (SET_SRC (PATTERN (floater)),
9073 1)))
9074 break;
9079 /* FLOATER will be nonzero if we found a suitable floating
9080 insn for combination with ANCHOR. */
9081 if (floater
9082 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9083 || anchor_attr == PA_COMBINE_TYPE_FMPY))
9085 /* Emit the new instruction and delete the old anchor. */
9086 emit_insn_before (gen_rtx_PARALLEL
9087 (VOIDmode,
9088 gen_rtvec (2, PATTERN (anchor),
9089 PATTERN (floater))),
9090 anchor);
9092 SET_INSN_DELETED (anchor);
9094 /* Emit a special USE insn for FLOATER, then delete
9095 the floating insn. */
9096 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
9097 delete_insn (floater);
9099 continue;
9101 else if (floater
9102 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
9104 rtx temp;
9105 /* Emit the new_jump instruction and delete the old anchor. */
9106 temp
9107 = emit_jump_insn_before (gen_rtx_PARALLEL
9108 (VOIDmode,
9109 gen_rtvec (2, PATTERN (anchor),
9110 PATTERN (floater))),
9111 anchor);
9113 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
9114 SET_INSN_DELETED (anchor);
9116 /* Emit a special USE insn for FLOATER, then delete
9117 the floating insn. */
9118 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
9119 delete_insn (floater);
9120 continue;
9126 static int
9127 pa_can_combine_p (rtx new_rtx, rtx anchor, rtx floater, int reversed, rtx dest,
9128 rtx src1, rtx src2)
9130 int insn_code_number;
9131 rtx start, end;
9133 /* Create a PARALLEL with the patterns of ANCHOR and
9134 FLOATER, try to recognize it, then test constraints
9135 for the resulting pattern.
9137 If the pattern doesn't match or the constraints
9138 aren't met keep searching for a suitable floater
9139 insn. */
9140 XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor);
9141 XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater);
9142 INSN_CODE (new_rtx) = -1;
9143 insn_code_number = recog_memoized (new_rtx);
9144 if (insn_code_number < 0
9145 || (extract_insn (new_rtx), ! constrain_operands (1)))
9146 return 0;
9148 if (reversed)
9150 start = anchor;
9151 end = floater;
9153 else
9155 start = floater;
9156 end = anchor;
9159 /* There's up to three operands to consider. One
9160 output and two inputs.
9162 The output must not be used between FLOATER & ANCHOR
9163 exclusive. The inputs must not be set between
9164 FLOATER and ANCHOR exclusive. */
9166 if (reg_used_between_p (dest, start, end))
9167 return 0;
9169 if (reg_set_between_p (src1, start, end))
9170 return 0;
9172 if (reg_set_between_p (src2, start, end))
9173 return 0;
9175 /* If we get here, then everything is good. */
9176 return 1;
9179 /* Return nonzero if references for INSN are delayed.
9181 Millicode insns are actually function calls with some special
9182 constraints on arguments and register usage.
9184 Millicode calls always expect their arguments in the integer argument
9185 registers, and always return their result in %r29 (ret1). They
9186 are expected to clobber their arguments, %r1, %r29, and the return
9187 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9189 This function tells reorg that the references to arguments and
9190 millicode calls do not appear to happen until after the millicode call.
9191 This allows reorg to put insns which set the argument registers into the
9192 delay slot of the millicode call -- thus they act more like traditional
9193 CALL_INSNs.
9195 Note we cannot consider side effects of the insn to be delayed because
9196 the branch and link insn will clobber the return pointer. If we happened
9197 to use the return pointer in the delay slot of the call, then we lose.
9199 get_attr_type will try to recognize the given insn, so make sure to
9200 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9201 in particular. */
9203 insn_refs_are_delayed (rtx insn)
9205 return ((GET_CODE (insn) == INSN
9206 && GET_CODE (PATTERN (insn)) != SEQUENCE
9207 && GET_CODE (PATTERN (insn)) != USE
9208 && GET_CODE (PATTERN (insn)) != CLOBBER
9209 && get_attr_type (insn) == TYPE_MILLI));
9212 /* Promote the return value, but not the arguments. */
9214 static enum machine_mode
9215 pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9216 enum machine_mode mode,
9217 int *punsignedp ATTRIBUTE_UNUSED,
9218 const_tree fntype ATTRIBUTE_UNUSED,
9219 int for_return)
9221 if (for_return == 0)
9222 return mode;
9223 return promote_mode (type, mode, punsignedp);
9226 /* On the HP-PA the value is found in register(s) 28(-29), unless
9227 the mode is SF or DF. Then the value is returned in fr4 (32).
9229 This must perform the same promotions as PROMOTE_MODE, else promoting
9230 return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
9232 Small structures must be returned in a PARALLEL on PA64 in order
9233 to match the HP Compiler ABI. */
9236 pa_function_value (const_tree valtype,
9237 const_tree func ATTRIBUTE_UNUSED,
9238 bool outgoing ATTRIBUTE_UNUSED)
9240 enum machine_mode valmode;
9242 if (AGGREGATE_TYPE_P (valtype)
9243 || TREE_CODE (valtype) == COMPLEX_TYPE
9244 || TREE_CODE (valtype) == VECTOR_TYPE)
9246 if (TARGET_64BIT)
9248 /* Aggregates with a size less than or equal to 128 bits are
9249 returned in GR 28(-29). They are left justified. The pad
9250 bits are undefined. Larger aggregates are returned in
9251 memory. */
9252 rtx loc[2];
9253 int i, offset = 0;
9254 int ub = int_size_in_bytes (valtype) <= UNITS_PER_WORD ? 1 : 2;
9256 for (i = 0; i < ub; i++)
9258 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9259 gen_rtx_REG (DImode, 28 + i),
9260 GEN_INT (offset));
9261 offset += 8;
9264 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
9266 else if (int_size_in_bytes (valtype) > UNITS_PER_WORD)
9268 /* Aggregates 5 to 8 bytes in size are returned in general
9269 registers r28-r29 in the same manner as other non
9270 floating-point objects. The data is right-justified and
9271 zero-extended to 64 bits. This is opposite to the normal
9272 justification used on big endian targets and requires
9273 special treatment. */
9274 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9275 gen_rtx_REG (DImode, 28), const0_rtx);
9276 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9280 if ((INTEGRAL_TYPE_P (valtype)
9281 && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD)
9282 || POINTER_TYPE_P (valtype))
9283 valmode = word_mode;
9284 else
9285 valmode = TYPE_MODE (valtype);
9287 if (TREE_CODE (valtype) == REAL_TYPE
9288 && !AGGREGATE_TYPE_P (valtype)
9289 && TYPE_MODE (valtype) != TFmode
9290 && !TARGET_SOFT_FLOAT)
9291 return gen_rtx_REG (valmode, 32);
9293 return gen_rtx_REG (valmode, 28);
9296 /* Return the location of a parameter that is passed in a register or NULL
9297 if the parameter has any component that is passed in memory.
9299 This is new code and will be pushed to into the net sources after
9300 further testing.
9302 ??? We might want to restructure this so that it looks more like other
9303 ports. */
9305 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
9306 int named ATTRIBUTE_UNUSED)
9308 int max_arg_words = (TARGET_64BIT ? 8 : 4);
9309 int alignment = 0;
9310 int arg_size;
9311 int fpr_reg_base;
9312 int gpr_reg_base;
9313 rtx retval;
9315 if (mode == VOIDmode)
9316 return NULL_RTX;
9318 arg_size = FUNCTION_ARG_SIZE (mode, type);
9320 /* If this arg would be passed partially or totally on the stack, then
9321 this routine should return zero. pa_arg_partial_bytes will
9322 handle arguments which are split between regs and stack slots if
9323 the ABI mandates split arguments. */
9324 if (!TARGET_64BIT)
9326 /* The 32-bit ABI does not split arguments. */
9327 if (cum->words + arg_size > max_arg_words)
9328 return NULL_RTX;
9330 else
9332 if (arg_size > 1)
9333 alignment = cum->words & 1;
9334 if (cum->words + alignment >= max_arg_words)
9335 return NULL_RTX;
9338 /* The 32bit ABIs and the 64bit ABIs are rather different,
9339 particularly in their handling of FP registers. We might
9340 be able to cleverly share code between them, but I'm not
9341 going to bother in the hope that splitting them up results
9342 in code that is more easily understood. */
9344 if (TARGET_64BIT)
9346 /* Advance the base registers to their current locations.
9348 Remember, gprs grow towards smaller register numbers while
9349 fprs grow to higher register numbers. Also remember that
9350 although FP regs are 32-bit addressable, we pretend that
9351 the registers are 64-bits wide. */
9352 gpr_reg_base = 26 - cum->words;
9353 fpr_reg_base = 32 + cum->words;
9355 /* Arguments wider than one word and small aggregates need special
9356 treatment. */
9357 if (arg_size > 1
9358 || mode == BLKmode
9359 || (type && (AGGREGATE_TYPE_P (type)
9360 || TREE_CODE (type) == COMPLEX_TYPE
9361 || TREE_CODE (type) == VECTOR_TYPE)))
9363 /* Double-extended precision (80-bit), quad-precision (128-bit)
9364 and aggregates including complex numbers are aligned on
9365 128-bit boundaries. The first eight 64-bit argument slots
9366 are associated one-to-one, with general registers r26
9367 through r19, and also with floating-point registers fr4
9368 through fr11. Arguments larger than one word are always
9369 passed in general registers.
9371 Using a PARALLEL with a word mode register results in left
9372 justified data on a big-endian target. */
9374 rtx loc[8];
9375 int i, offset = 0, ub = arg_size;
9377 /* Align the base register. */
9378 gpr_reg_base -= alignment;
9380 ub = MIN (ub, max_arg_words - cum->words - alignment);
9381 for (i = 0; i < ub; i++)
9383 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9384 gen_rtx_REG (DImode, gpr_reg_base),
9385 GEN_INT (offset));
9386 gpr_reg_base -= 1;
9387 offset += 8;
9390 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
9393 else
9395 /* If the argument is larger than a word, then we know precisely
9396 which registers we must use. */
9397 if (arg_size > 1)
9399 if (cum->words)
9401 gpr_reg_base = 23;
9402 fpr_reg_base = 38;
9404 else
9406 gpr_reg_base = 25;
9407 fpr_reg_base = 34;
9410 /* Structures 5 to 8 bytes in size are passed in the general
9411 registers in the same manner as other non floating-point
9412 objects. The data is right-justified and zero-extended
9413 to 64 bits. This is opposite to the normal justification
9414 used on big endian targets and requires special treatment.
9415 We now define BLOCK_REG_PADDING to pad these objects.
9416 Aggregates, complex and vector types are passed in the same
9417 manner as structures. */
9418 if (mode == BLKmode
9419 || (type && (AGGREGATE_TYPE_P (type)
9420 || TREE_CODE (type) == COMPLEX_TYPE
9421 || TREE_CODE (type) == VECTOR_TYPE)))
9423 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9424 gen_rtx_REG (DImode, gpr_reg_base),
9425 const0_rtx);
9426 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9429 else
9431 /* We have a single word (32 bits). A simple computation
9432 will get us the register #s we need. */
9433 gpr_reg_base = 26 - cum->words;
9434 fpr_reg_base = 32 + 2 * cum->words;
9438 /* Determine if the argument needs to be passed in both general and
9439 floating point registers. */
9440 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
9441 /* If we are doing soft-float with portable runtime, then there
9442 is no need to worry about FP regs. */
9443 && !TARGET_SOFT_FLOAT
9444 /* The parameter must be some kind of scalar float, else we just
9445 pass it in integer registers. */
9446 && GET_MODE_CLASS (mode) == MODE_FLOAT
9447 /* The target function must not have a prototype. */
9448 && cum->nargs_prototype <= 0
9449 /* libcalls do not need to pass items in both FP and general
9450 registers. */
9451 && type != NULL_TREE
9452 /* All this hair applies to "outgoing" args only. This includes
9453 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
9454 && !cum->incoming)
9455 /* Also pass outgoing floating arguments in both registers in indirect
9456 calls with the 32 bit ABI and the HP assembler since there is no
9457 way to the specify argument locations in static functions. */
9458 || (!TARGET_64BIT
9459 && !TARGET_GAS
9460 && !cum->incoming
9461 && cum->indirect
9462 && GET_MODE_CLASS (mode) == MODE_FLOAT))
9464 retval
9465 = gen_rtx_PARALLEL
9466 (mode,
9467 gen_rtvec (2,
9468 gen_rtx_EXPR_LIST (VOIDmode,
9469 gen_rtx_REG (mode, fpr_reg_base),
9470 const0_rtx),
9471 gen_rtx_EXPR_LIST (VOIDmode,
9472 gen_rtx_REG (mode, gpr_reg_base),
9473 const0_rtx)));
9475 else
9477 /* See if we should pass this parameter in a general register. */
9478 if (TARGET_SOFT_FLOAT
9479 /* Indirect calls in the normal 32bit ABI require all arguments
9480 to be passed in general registers. */
9481 || (!TARGET_PORTABLE_RUNTIME
9482 && !TARGET_64BIT
9483 && !TARGET_ELF32
9484 && cum->indirect)
9485 /* If the parameter is not a scalar floating-point parameter,
9486 then it belongs in GPRs. */
9487 || GET_MODE_CLASS (mode) != MODE_FLOAT
9488 /* Structure with single SFmode field belongs in GPR. */
9489 || (type && AGGREGATE_TYPE_P (type)))
9490 retval = gen_rtx_REG (mode, gpr_reg_base);
9491 else
9492 retval = gen_rtx_REG (mode, fpr_reg_base);
9494 return retval;
9498 /* If this arg would be passed totally in registers or totally on the stack,
9499 then this routine should return zero. */
9501 static int
9502 pa_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
9503 tree type, bool named ATTRIBUTE_UNUSED)
9505 unsigned int max_arg_words = 8;
9506 unsigned int offset = 0;
9508 if (!TARGET_64BIT)
9509 return 0;
9511 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
9512 offset = 1;
9514 if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
9515 /* Arg fits fully into registers. */
9516 return 0;
9517 else if (cum->words + offset >= max_arg_words)
9518 /* Arg fully on the stack. */
9519 return 0;
9520 else
9521 /* Arg is split. */
9522 return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
9526 /* A get_unnamed_section callback for switching to the text section.
9528 This function is only used with SOM. Because we don't support
9529 named subspaces, we can only create a new subspace or switch back
9530 to the default text subspace. */
9532 static void
9533 som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED)
9535 gcc_assert (TARGET_SOM);
9536 if (TARGET_GAS)
9538 if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
9540 /* We only want to emit a .nsubspa directive once at the
9541 start of the function. */
9542 cfun->machine->in_nsubspa = 1;
9544 /* Create a new subspace for the text. This provides
9545 better stub placement and one-only functions. */
9546 if (cfun->decl
9547 && DECL_ONE_ONLY (cfun->decl)
9548 && !DECL_WEAK (cfun->decl))
9550 output_section_asm_op ("\t.SPACE $TEXT$\n"
9551 "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
9552 "ACCESS=44,SORT=24,COMDAT");
9553 return;
9556 else
9558 /* There isn't a current function or the body of the current
9559 function has been completed. So, we are changing to the
9560 text section to output debugging information. Thus, we
9561 need to forget that we are in the text section so that
9562 varasm.c will call us when text_section is selected again. */
9563 gcc_assert (!cfun || !cfun->machine
9564 || cfun->machine->in_nsubspa == 2);
9565 in_section = NULL;
9567 output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
9568 return;
9570 output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
9573 /* A get_unnamed_section callback for switching to comdat data
9574 sections. This function is only used with SOM. */
9576 static void
9577 som_output_comdat_data_section_asm_op (const void *data)
9579 in_section = NULL;
9580 output_section_asm_op (data);
9583 /* Implement TARGET_ASM_INITIALIZE_SECTIONS */
9585 static void
9586 pa_som_asm_init_sections (void)
9588 text_section
9589 = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
9591 /* SOM puts readonly data in the default $LIT$ subspace when PIC code
9592 is not being generated. */
9593 som_readonly_data_section
9594 = get_unnamed_section (0, output_section_asm_op,
9595 "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
9597 /* When secondary definitions are not supported, SOM makes readonly
9598 data one-only by creating a new $LIT$ subspace in $TEXT$ with
9599 the comdat flag. */
9600 som_one_only_readonly_data_section
9601 = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
9602 "\t.SPACE $TEXT$\n"
9603 "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
9604 "ACCESS=0x2c,SORT=16,COMDAT");
9607 /* When secondary definitions are not supported, SOM makes data one-only
9608 by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */
9609 som_one_only_data_section
9610 = get_unnamed_section (SECTION_WRITE,
9611 som_output_comdat_data_section_asm_op,
9612 "\t.SPACE $PRIVATE$\n"
9613 "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
9614 "ACCESS=31,SORT=24,COMDAT");
9616 /* FIXME: HPUX ld generates incorrect GOT entries for "T" fixups
9617 which reference data within the $TEXT$ space (for example constant
9618 strings in the $LIT$ subspace).
9620 The assemblers (GAS and HP as) both have problems with handling
9621 the difference of two symbols which is the other correct way to
9622 reference constant data during PIC code generation.
9624 So, there's no way to reference constant data which is in the
9625 $TEXT$ space during PIC generation. Instead place all constant
9626 data into the $PRIVATE$ subspace (this reduces sharing, but it
9627 works correctly). */
9628 readonly_data_section = flag_pic ? data_section : som_readonly_data_section;
9630 /* We must not have a reference to an external symbol defined in a
9631 shared library in a readonly section, else the SOM linker will
9632 complain.
9634 So, we force exception information into the data section. */
9635 exception_section = data_section;
9638 /* On hpux10, the linker will give an error if we have a reference
9639 in the read-only data section to a symbol defined in a shared
9640 library. Therefore, expressions that might require a reloc can
9641 not be placed in the read-only data section. */
9643 static section *
9644 pa_select_section (tree exp, int reloc,
9645 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
9647 if (TREE_CODE (exp) == VAR_DECL
9648 && TREE_READONLY (exp)
9649 && !TREE_THIS_VOLATILE (exp)
9650 && DECL_INITIAL (exp)
9651 && (DECL_INITIAL (exp) == error_mark_node
9652 || TREE_CONSTANT (DECL_INITIAL (exp)))
9653 && !reloc)
9655 if (TARGET_SOM
9656 && DECL_ONE_ONLY (exp)
9657 && !DECL_WEAK (exp))
9658 return som_one_only_readonly_data_section;
9659 else
9660 return readonly_data_section;
9662 else if (CONSTANT_CLASS_P (exp) && !reloc)
9663 return readonly_data_section;
9664 else if (TARGET_SOM
9665 && TREE_CODE (exp) == VAR_DECL
9666 && DECL_ONE_ONLY (exp)
9667 && !DECL_WEAK (exp))
9668 return som_one_only_data_section;
9669 else
9670 return data_section;
9673 static void
9674 pa_globalize_label (FILE *stream, const char *name)
9676 /* We only handle DATA objects here, functions are globalized in
9677 ASM_DECLARE_FUNCTION_NAME. */
9678 if (! FUNCTION_NAME_P (name))
9680 fputs ("\t.EXPORT ", stream);
9681 assemble_name (stream, name);
9682 fputs (",DATA\n", stream);
9686 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
9688 static rtx
9689 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
9690 int incoming ATTRIBUTE_UNUSED)
9692 return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
9695 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9697 bool
9698 pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9700 /* SOM ABI says that objects larger than 64 bits are returned in memory.
9701 PA64 ABI says that objects larger than 128 bits are returned in memory.
9702 Note, int_size_in_bytes can return -1 if the size of the object is
9703 variable or larger than the maximum value that can be expressed as
9704 a HOST_WIDE_INT. It can also return zero for an empty type. The
9705 simplest way to handle variable and empty types is to pass them in
9706 memory. This avoids problems in defining the boundaries of argument
9707 slots, allocating registers, etc. */
9708 return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
9709 || int_size_in_bytes (type) <= 0);
9712 /* Structure to hold declaration and name of external symbols that are
9713 emitted by GCC. We generate a vector of these symbols and output them
9714 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
9715 This avoids putting out names that are never really used. */
9717 typedef struct GTY(()) extern_symbol
9719 tree decl;
9720 const char *name;
9721 } extern_symbol;
9723 /* Define gc'd vector type for extern_symbol. */
9724 DEF_VEC_O(extern_symbol);
9725 DEF_VEC_ALLOC_O(extern_symbol,gc);
9727 /* Vector of extern_symbol pointers. */
9728 static GTY(()) VEC(extern_symbol,gc) *extern_symbols;
9730 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9731 /* Mark DECL (name NAME) as an external reference (assembler output
9732 file FILE). This saves the names to output at the end of the file
9733 if actually referenced. */
9735 void
9736 pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
9738 extern_symbol * p = VEC_safe_push (extern_symbol, gc, extern_symbols, NULL);
9740 gcc_assert (file == asm_out_file);
9741 p->decl = decl;
9742 p->name = name;
9745 /* Output text required at the end of an assembler file.
9746 This includes deferred plabels and .import directives for
9747 all external symbols that were actually referenced. */
9749 static void
9750 pa_hpux_file_end (void)
9752 unsigned int i;
9753 extern_symbol *p;
9755 if (!NO_DEFERRED_PROFILE_COUNTERS)
9756 output_deferred_profile_counters ();
9758 output_deferred_plabels ();
9760 for (i = 0; VEC_iterate (extern_symbol, extern_symbols, i, p); i++)
9762 tree decl = p->decl;
9764 if (!TREE_ASM_WRITTEN (decl)
9765 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
9766 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
9769 VEC_free (extern_symbol, gc, extern_symbols);
9771 #endif
9773 /* Return true if a change from mode FROM to mode TO for a register
9774 in register class RCLASS is invalid. */
9776 bool
9777 pa_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
9778 enum reg_class rclass)
9780 if (from == to)
9781 return false;
9783 /* Reject changes to/from complex and vector modes. */
9784 if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from)
9785 || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to))
9786 return true;
9788 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to))
9789 return false;
9791 /* There is no way to load QImode or HImode values directly from
9792 memory. SImode loads to the FP registers are not zero extended.
9793 On the 64-bit target, this conflicts with the definition of
9794 LOAD_EXTEND_OP. Thus, we can't allow changing between modes
9795 with different sizes in the floating-point registers. */
9796 if (MAYBE_FP_REG_CLASS_P (rclass))
9797 return true;
9799 /* HARD_REGNO_MODE_OK places modes with sizes larger than a word
9800 in specific sets of registers. Thus, we cannot allow changing
9801 to a larger mode when it's larger than a word. */
9802 if (GET_MODE_SIZE (to) > UNITS_PER_WORD
9803 && GET_MODE_SIZE (to) > GET_MODE_SIZE (from))
9804 return true;
9806 return false;
9809 /* Returns TRUE if it is a good idea to tie two pseudo registers
9810 when one has mode MODE1 and one has mode MODE2.
9811 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
9812 for any hard reg, then this must be FALSE for correct output.
9814 We should return FALSE for QImode and HImode because these modes
9815 are not ok in the floating-point registers. However, this prevents
9816 tieing these modes to SImode and DImode in the general registers.
9817 So, this isn't a good idea. We rely on HARD_REGNO_MODE_OK and
9818 CANNOT_CHANGE_MODE_CLASS to prevent these modes from being used
9819 in the floating-point registers. */
9821 bool
9822 pa_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
9824 /* Don't tie modes in different classes. */
9825 if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2))
9826 return false;
9828 return true;
9832 /* Length in units of the trampoline instruction code. */
9834 #define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 32 : 40))
9837 /* Output assembler code for a block containing the constant parts
9838 of a trampoline, leaving space for the variable parts.\
9840 The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
9841 and then branches to the specified routine.
9843 This code template is copied from text segment to stack location
9844 and then patched with pa_trampoline_init to contain valid values,
9845 and then entered as a subroutine.
9847 It is best to keep this as small as possible to avoid having to
9848 flush multiple lines in the cache. */
9850 static void
9851 pa_asm_trampoline_template (FILE *f)
9853 if (!TARGET_64BIT)
9855 fputs ("\tldw 36(%r22),%r21\n", f);
9856 fputs ("\tbb,>=,n %r21,30,.+16\n", f);
9857 if (ASSEMBLER_DIALECT == 0)
9858 fputs ("\tdepi 0,31,2,%r21\n", f);
9859 else
9860 fputs ("\tdepwi 0,31,2,%r21\n", f);
9861 fputs ("\tldw 4(%r21),%r19\n", f);
9862 fputs ("\tldw 0(%r21),%r21\n", f);
9863 if (TARGET_PA_20)
9865 fputs ("\tbve (%r21)\n", f);
9866 fputs ("\tldw 40(%r22),%r29\n", f);
9867 fputs ("\t.word 0\n", f);
9868 fputs ("\t.word 0\n", f);
9870 else
9872 fputs ("\tldsid (%r21),%r1\n", f);
9873 fputs ("\tmtsp %r1,%sr0\n", f);
9874 fputs ("\tbe 0(%sr0,%r21)\n", f);
9875 fputs ("\tldw 40(%r22),%r29\n", f);
9877 fputs ("\t.word 0\n", f);
9878 fputs ("\t.word 0\n", f);
9879 fputs ("\t.word 0\n", f);
9880 fputs ("\t.word 0\n", f);
9882 else
9884 fputs ("\t.dword 0\n", f);
9885 fputs ("\t.dword 0\n", f);
9886 fputs ("\t.dword 0\n", f);
9887 fputs ("\t.dword 0\n", f);
9888 fputs ("\tmfia %r31\n", f);
9889 fputs ("\tldd 24(%r31),%r1\n", f);
9890 fputs ("\tldd 24(%r1),%r27\n", f);
9891 fputs ("\tldd 16(%r1),%r1\n", f);
9892 fputs ("\tbve (%r1)\n", f);
9893 fputs ("\tldd 32(%r31),%r31\n", f);
9894 fputs ("\t.dword 0 ; fptr\n", f);
9895 fputs ("\t.dword 0 ; static link\n", f);
9899 /* Emit RTL insns to initialize the variable parts of a trampoline.
9900 FNADDR is an RTX for the address of the function's pure code.
9901 CXT is an RTX for the static chain value for the function.
9903 Move the function address to the trampoline template at offset 36.
9904 Move the static chain value to trampoline template at offset 40.
9905 Move the trampoline address to trampoline template at offset 44.
9906 Move r19 to trampoline template at offset 48. The latter two
9907 words create a plabel for the indirect call to the trampoline.
9909 A similar sequence is used for the 64-bit port but the plabel is
9910 at the beginning of the trampoline.
9912 Finally, the cache entries for the trampoline code are flushed.
9913 This is necessary to ensure that the trampoline instruction sequence
9914 is written to memory prior to any attempts at prefetching the code
9915 sequence. */
9917 static void
9918 pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
9920 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
9921 rtx start_addr = gen_reg_rtx (Pmode);
9922 rtx end_addr = gen_reg_rtx (Pmode);
9923 rtx line_length = gen_reg_rtx (Pmode);
9924 rtx r_tramp, tmp;
9926 emit_block_move (m_tramp, assemble_trampoline_template (),
9927 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
9928 r_tramp = force_reg (Pmode, XEXP (m_tramp, 0));
9930 if (!TARGET_64BIT)
9932 tmp = adjust_address (m_tramp, Pmode, 36);
9933 emit_move_insn (tmp, fnaddr);
9934 tmp = adjust_address (m_tramp, Pmode, 40);
9935 emit_move_insn (tmp, chain_value);
9937 /* Create a fat pointer for the trampoline. */
9938 tmp = adjust_address (m_tramp, Pmode, 44);
9939 emit_move_insn (tmp, r_tramp);
9940 tmp = adjust_address (m_tramp, Pmode, 48);
9941 emit_move_insn (tmp, gen_rtx_REG (Pmode, 19));
9943 /* fdc and fic only use registers for the address to flush,
9944 they do not accept integer displacements. We align the
9945 start and end addresses to the beginning of their respective
9946 cache lines to minimize the number of lines flushed. */
9947 emit_insn (gen_andsi3 (start_addr, r_tramp,
9948 GEN_INT (-MIN_CACHELINE_SIZE)));
9949 tmp = force_reg (Pmode, plus_constant (r_tramp, TRAMPOLINE_CODE_SIZE-1));
9950 emit_insn (gen_andsi3 (end_addr, tmp,
9951 GEN_INT (-MIN_CACHELINE_SIZE)));
9952 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
9953 emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length));
9954 emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length,
9955 gen_reg_rtx (Pmode),
9956 gen_reg_rtx (Pmode)));
9958 else
9960 tmp = adjust_address (m_tramp, Pmode, 56);
9961 emit_move_insn (tmp, fnaddr);
9962 tmp = adjust_address (m_tramp, Pmode, 64);
9963 emit_move_insn (tmp, chain_value);
9965 /* Create a fat pointer for the trampoline. */
9966 tmp = adjust_address (m_tramp, Pmode, 16);
9967 emit_move_insn (tmp, force_reg (Pmode, plus_constant (r_tramp, 32)));
9968 tmp = adjust_address (m_tramp, Pmode, 24);
9969 emit_move_insn (tmp, gen_rtx_REG (Pmode, 27));
9971 /* fdc and fic only use registers for the address to flush,
9972 they do not accept integer displacements. We align the
9973 start and end addresses to the beginning of their respective
9974 cache lines to minimize the number of lines flushed. */
9975 tmp = force_reg (Pmode, plus_constant (r_tramp, 32));
9976 emit_insn (gen_anddi3 (start_addr, tmp,
9977 GEN_INT (-MIN_CACHELINE_SIZE)));
9978 tmp = force_reg (Pmode, plus_constant (tmp, TRAMPOLINE_CODE_SIZE - 1));
9979 emit_insn (gen_anddi3 (end_addr, tmp,
9980 GEN_INT (-MIN_CACHELINE_SIZE)));
9981 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
9982 emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length));
9983 emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length,
9984 gen_reg_rtx (Pmode),
9985 gen_reg_rtx (Pmode)));
9989 /* Perform any machine-specific adjustment in the address of the trampoline.
9990 ADDR contains the address that was passed to pa_trampoline_init.
9991 Adjust the trampoline address to point to the plabel at offset 44. */
9993 static rtx
9994 pa_trampoline_adjust_address (rtx addr)
9996 if (!TARGET_64BIT)
9997 addr = memory_address (Pmode, plus_constant (addr, 46));
9998 return addr;
10001 static rtx
10002 pa_delegitimize_address (rtx orig_x)
10004 rtx x = delegitimize_mem_from_attrs (orig_x);
10006 if (GET_CODE (x) == LO_SUM
10007 && GET_CODE (XEXP (x, 1)) == UNSPEC
10008 && XINT (XEXP (x, 1), 1) == UNSPEC_DLTIND14R)
10009 return gen_const_mem (Pmode, XVECEXP (XEXP (x, 1), 0, 0));
10010 return x;
10013 #include "gt-pa.h"