* pa-protos.h (get_deferred_plabel): New prototype.
[official-gcc.git] / gcc / config / pa / pa.c
blobadeabed8cbac0971334c12c67126024c2086c439
1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
4 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to
20 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
21 Boston, MA 02110-1301, USA. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "rtl.h"
28 #include "regs.h"
29 #include "hard-reg-set.h"
30 #include "real.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "insn-attr.h"
34 #include "flags.h"
35 #include "tree.h"
36 #include "output.h"
37 #include "except.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "reload.h"
41 #include "integrate.h"
42 #include "function.h"
43 #include "toplev.h"
44 #include "ggc.h"
45 #include "recog.h"
46 #include "predict.h"
47 #include "tm_p.h"
48 #include "target.h"
49 #include "target-def.h"
51 /* Return nonzero if there is a bypass for the output of
52 OUT_INSN and the fp store IN_INSN. */
53 int
54 hppa_fpstore_bypass_p (rtx out_insn, rtx in_insn)
56 enum machine_mode store_mode;
57 enum machine_mode other_mode;
58 rtx set;
60 if (recog_memoized (in_insn) < 0
61 || get_attr_type (in_insn) != TYPE_FPSTORE
62 || recog_memoized (out_insn) < 0)
63 return 0;
65 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
67 set = single_set (out_insn);
68 if (!set)
69 return 0;
71 other_mode = GET_MODE (SET_SRC (set));
73 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
77 #ifndef DO_FRAME_NOTES
78 #ifdef INCOMING_RETURN_ADDR_RTX
79 #define DO_FRAME_NOTES 1
80 #else
81 #define DO_FRAME_NOTES 0
82 #endif
83 #endif
85 static void copy_reg_pointer (rtx, rtx);
86 static void fix_range (const char *);
87 static bool pa_handle_option (size_t, const char *, int);
88 static int hppa_address_cost (rtx);
89 static bool hppa_rtx_costs (rtx, int, int, int *);
90 static inline rtx force_mode (enum machine_mode, rtx);
91 static void pa_reorg (void);
92 static void pa_combine_instructions (void);
93 static int pa_can_combine_p (rtx, rtx, rtx, int, rtx, rtx, rtx);
94 static int forward_branch_p (rtx);
95 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
96 static int compute_movmem_length (rtx);
97 static int compute_clrmem_length (rtx);
98 static bool pa_assemble_integer (rtx, unsigned int, int);
99 static void remove_useless_addtr_insns (int);
100 static void store_reg (int, HOST_WIDE_INT, int);
101 static void store_reg_modify (int, int, HOST_WIDE_INT);
102 static void load_reg (int, HOST_WIDE_INT, int);
103 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
104 static void pa_output_function_prologue (FILE *, HOST_WIDE_INT);
105 static void update_total_code_bytes (int);
106 static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT);
107 static int pa_adjust_cost (rtx, rtx, rtx, int);
108 static int pa_adjust_priority (rtx, int);
109 static int pa_issue_rate (void);
110 static void pa_select_section (tree, int, unsigned HOST_WIDE_INT)
111 ATTRIBUTE_UNUSED;
112 static void pa_encode_section_info (tree, rtx, int);
113 static const char *pa_strip_name_encoding (const char *);
114 static bool pa_function_ok_for_sibcall (tree, tree);
115 static void pa_globalize_label (FILE *, const char *)
116 ATTRIBUTE_UNUSED;
117 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
118 HOST_WIDE_INT, tree);
119 #if !defined(USE_COLLECT2)
120 static void pa_asm_out_constructor (rtx, int);
121 static void pa_asm_out_destructor (rtx, int);
122 #endif
123 static void pa_init_builtins (void);
124 static rtx hppa_builtin_saveregs (void);
125 static tree hppa_gimplify_va_arg_expr (tree, tree, tree *, tree *);
126 static bool pa_scalar_mode_supported_p (enum machine_mode);
127 static bool pa_commutative_p (rtx x, int outer_code);
128 static void copy_fp_args (rtx) ATTRIBUTE_UNUSED;
129 static int length_fp_args (rtx) ATTRIBUTE_UNUSED;
130 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
131 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
132 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
133 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
134 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
135 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
136 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
137 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
138 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
139 static void output_deferred_plabels (void);
140 #ifdef ASM_OUTPUT_EXTERNAL_REAL
141 static void pa_hpux_file_end (void);
142 #endif
143 #ifdef HPUX_LONG_DOUBLE_LIBRARY
144 static void pa_hpux_init_libfuncs (void);
145 #endif
146 static rtx pa_struct_value_rtx (tree, int);
147 static bool pa_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
148 tree, bool);
149 static int pa_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
150 tree, bool);
151 static struct machine_function * pa_init_machine_status (void);
154 /* Save the operands last given to a compare for use when we
155 generate a scc or bcc insn. */
156 rtx hppa_compare_op0, hppa_compare_op1;
157 enum cmp_type hppa_branch_type;
159 /* Which cpu we are scheduling for. */
160 enum processor_type pa_cpu = TARGET_SCHED_DEFAULT;
162 /* The UNIX standard to use for predefines and linking. */
163 int flag_pa_unix = TARGET_HPUX_11_11 ? 1998 : TARGET_HPUX_10_10 ? 1995 : 1993;
165 /* Counts for the number of callee-saved general and floating point
166 registers which were saved by the current function's prologue. */
167 static int gr_saved, fr_saved;
169 static rtx find_addr_reg (rtx);
171 /* Keep track of the number of bytes we have output in the CODE subspace
172 during this compilation so we'll know when to emit inline long-calls. */
173 unsigned long total_code_bytes;
175 /* The last address of the previous function plus the number of bytes in
176 associated thunks that have been output. This is used to determine if
177 a thunk can use an IA-relative branch to reach its target function. */
178 static int last_address;
180 /* Variables to handle plabels that we discover are necessary at assembly
181 output time. They are output after the current function. */
182 struct deferred_plabel GTY(())
184 rtx internal_label;
185 rtx symbol;
187 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
188 deferred_plabels;
189 static size_t n_deferred_plabels = 0;
192 /* Initialize the GCC target structure. */
194 #undef TARGET_ASM_ALIGNED_HI_OP
195 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
196 #undef TARGET_ASM_ALIGNED_SI_OP
197 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
198 #undef TARGET_ASM_ALIGNED_DI_OP
199 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
200 #undef TARGET_ASM_UNALIGNED_HI_OP
201 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
202 #undef TARGET_ASM_UNALIGNED_SI_OP
203 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
204 #undef TARGET_ASM_UNALIGNED_DI_OP
205 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
206 #undef TARGET_ASM_INTEGER
207 #define TARGET_ASM_INTEGER pa_assemble_integer
209 #undef TARGET_ASM_FUNCTION_PROLOGUE
210 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
211 #undef TARGET_ASM_FUNCTION_EPILOGUE
212 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
214 #undef TARGET_SCHED_ADJUST_COST
215 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
216 #undef TARGET_SCHED_ADJUST_PRIORITY
217 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
218 #undef TARGET_SCHED_ISSUE_RATE
219 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
221 #undef TARGET_ENCODE_SECTION_INFO
222 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
223 #undef TARGET_STRIP_NAME_ENCODING
224 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
226 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
227 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
229 #undef TARGET_COMMUTATIVE_P
230 #define TARGET_COMMUTATIVE_P pa_commutative_p
232 #undef TARGET_ASM_OUTPUT_MI_THUNK
233 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
234 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
235 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
237 #undef TARGET_ASM_FILE_END
238 #ifdef ASM_OUTPUT_EXTERNAL_REAL
239 #define TARGET_ASM_FILE_END pa_hpux_file_end
240 #else
241 #define TARGET_ASM_FILE_END output_deferred_plabels
242 #endif
244 #if !defined(USE_COLLECT2)
245 #undef TARGET_ASM_CONSTRUCTOR
246 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
247 #undef TARGET_ASM_DESTRUCTOR
248 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
249 #endif
251 #undef TARGET_DEFAULT_TARGET_FLAGS
252 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
253 #undef TARGET_HANDLE_OPTION
254 #define TARGET_HANDLE_OPTION pa_handle_option
256 #undef TARGET_INIT_BUILTINS
257 #define TARGET_INIT_BUILTINS pa_init_builtins
259 #undef TARGET_RTX_COSTS
260 #define TARGET_RTX_COSTS hppa_rtx_costs
261 #undef TARGET_ADDRESS_COST
262 #define TARGET_ADDRESS_COST hppa_address_cost
264 #undef TARGET_MACHINE_DEPENDENT_REORG
265 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
267 #ifdef HPUX_LONG_DOUBLE_LIBRARY
268 #undef TARGET_INIT_LIBFUNCS
269 #define TARGET_INIT_LIBFUNCS pa_hpux_init_libfuncs
270 #endif
272 #undef TARGET_PROMOTE_FUNCTION_RETURN
273 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true
274 #undef TARGET_PROMOTE_PROTOTYPES
275 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
277 #undef TARGET_STRUCT_VALUE_RTX
278 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
279 #undef TARGET_RETURN_IN_MEMORY
280 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
281 #undef TARGET_MUST_PASS_IN_STACK
282 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
283 #undef TARGET_PASS_BY_REFERENCE
284 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
285 #undef TARGET_CALLEE_COPIES
286 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true
287 #undef TARGET_ARG_PARTIAL_BYTES
288 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
290 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
291 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
292 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
293 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
295 #undef TARGET_SCALAR_MODE_SUPPORTED_P
296 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
298 #undef TARGET_CANNOT_FORCE_CONST_MEM
299 #define TARGET_CANNOT_FORCE_CONST_MEM pa_tls_referenced_p
301 struct gcc_target targetm = TARGET_INITIALIZER;
303 /* Parse the -mfixed-range= option string. */
305 static void
306 fix_range (const char *const_str)
308 int i, first, last;
309 char *str, *dash, *comma;
311 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
312 REG2 are either register names or register numbers. The effect
313 of this option is to mark the registers in the range from REG1 to
314 REG2 as ``fixed'' so they won't be used by the compiler. This is
315 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */
317 i = strlen (const_str);
318 str = (char *) alloca (i + 1);
319 memcpy (str, const_str, i + 1);
321 while (1)
323 dash = strchr (str, '-');
324 if (!dash)
326 warning (0, "value of -mfixed-range must have form REG1-REG2");
327 return;
329 *dash = '\0';
331 comma = strchr (dash + 1, ',');
332 if (comma)
333 *comma = '\0';
335 first = decode_reg_name (str);
336 if (first < 0)
338 warning (0, "unknown register name: %s", str);
339 return;
342 last = decode_reg_name (dash + 1);
343 if (last < 0)
345 warning (0, "unknown register name: %s", dash + 1);
346 return;
349 *dash = '-';
351 if (first > last)
353 warning (0, "%s-%s is an empty range", str, dash + 1);
354 return;
357 for (i = first; i <= last; ++i)
358 fixed_regs[i] = call_used_regs[i] = 1;
360 if (!comma)
361 break;
363 *comma = ',';
364 str = comma + 1;
367 /* Check if all floating point registers have been fixed. */
368 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
369 if (!fixed_regs[i])
370 break;
372 if (i > FP_REG_LAST)
373 target_flags |= MASK_DISABLE_FPREGS;
376 /* Implement TARGET_HANDLE_OPTION. */
378 static bool
379 pa_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
381 switch (code)
383 case OPT_mnosnake:
384 case OPT_mpa_risc_1_0:
385 case OPT_march_1_0:
386 target_flags &= ~(MASK_PA_11 | MASK_PA_20);
387 return true;
389 case OPT_msnake:
390 case OPT_mpa_risc_1_1:
391 case OPT_march_1_1:
392 target_flags &= ~MASK_PA_20;
393 target_flags |= MASK_PA_11;
394 return true;
396 case OPT_mpa_risc_2_0:
397 case OPT_march_2_0:
398 target_flags |= MASK_PA_11 | MASK_PA_20;
399 return true;
401 case OPT_mschedule_:
402 if (strcmp (arg, "8000") == 0)
403 pa_cpu = PROCESSOR_8000;
404 else if (strcmp (arg, "7100") == 0)
405 pa_cpu = PROCESSOR_7100;
406 else if (strcmp (arg, "700") == 0)
407 pa_cpu = PROCESSOR_700;
408 else if (strcmp (arg, "7100LC") == 0)
409 pa_cpu = PROCESSOR_7100LC;
410 else if (strcmp (arg, "7200") == 0)
411 pa_cpu = PROCESSOR_7200;
412 else if (strcmp (arg, "7300") == 0)
413 pa_cpu = PROCESSOR_7300;
414 else
415 return false;
416 return true;
418 case OPT_mfixed_range_:
419 fix_range (arg);
420 return true;
422 #if TARGET_HPUX
423 case OPT_munix_93:
424 flag_pa_unix = 1993;
425 return true;
426 #endif
428 #if TARGET_HPUX_10_10
429 case OPT_munix_95:
430 flag_pa_unix = 1995;
431 return true;
432 #endif
434 #if TARGET_HPUX_11_11
435 case OPT_munix_98:
436 flag_pa_unix = 1998;
437 return true;
438 #endif
440 default:
441 return true;
445 void
446 override_options (void)
448 /* Unconditional branches in the delay slot are not compatible with dwarf2
449 call frame information. There is no benefit in using this optimization
450 on PA8000 and later processors. */
451 if (pa_cpu >= PROCESSOR_8000
452 || (! USING_SJLJ_EXCEPTIONS && flag_exceptions)
453 || flag_unwind_tables)
454 target_flags &= ~MASK_JUMP_IN_DELAY;
456 if (flag_pic && TARGET_PORTABLE_RUNTIME)
458 warning (0, "PIC code generation is not supported in the portable runtime model");
461 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
463 warning (0, "PIC code generation is not compatible with fast indirect calls");
466 if (! TARGET_GAS && write_symbols != NO_DEBUG)
468 warning (0, "-g is only supported when using GAS on this processor,");
469 warning (0, "-g option disabled");
470 write_symbols = NO_DEBUG;
473 /* We only support the "big PIC" model now. And we always generate PIC
474 code when in 64bit mode. */
475 if (flag_pic == 1 || TARGET_64BIT)
476 flag_pic = 2;
478 /* We can't guarantee that .dword is available for 32-bit targets. */
479 if (UNITS_PER_WORD == 4)
480 targetm.asm_out.aligned_op.di = NULL;
482 /* The unaligned ops are only available when using GAS. */
483 if (!TARGET_GAS)
485 targetm.asm_out.unaligned_op.hi = NULL;
486 targetm.asm_out.unaligned_op.si = NULL;
487 targetm.asm_out.unaligned_op.di = NULL;
490 init_machine_status = pa_init_machine_status;
493 static void
494 pa_init_builtins (void)
496 #ifdef DONT_HAVE_FPUTC_UNLOCKED
497 built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED] = NULL_TREE;
498 implicit_built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED] = NULL_TREE;
499 #endif
502 /* Function to init struct machine_function.
503 This will be called, via a pointer variable,
504 from push_function_context. */
506 static struct machine_function *
507 pa_init_machine_status (void)
509 return ggc_alloc_cleared (sizeof (machine_function));
512 /* If FROM is a probable pointer register, mark TO as a probable
513 pointer register with the same pointer alignment as FROM. */
515 static void
516 copy_reg_pointer (rtx to, rtx from)
518 if (REG_POINTER (from))
519 mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
522 /* Return 1 if X contains a symbolic expression. We know these
523 expressions will have one of a few well defined forms, so
524 we need only check those forms. */
526 symbolic_expression_p (rtx x)
529 /* Strip off any HIGH. */
530 if (GET_CODE (x) == HIGH)
531 x = XEXP (x, 0);
533 return (symbolic_operand (x, VOIDmode));
536 /* Accept any constant that can be moved in one instruction into a
537 general register. */
539 cint_ok_for_move (HOST_WIDE_INT intval)
541 /* OK if ldo, ldil, or zdepi, can be used. */
542 return (CONST_OK_FOR_LETTER_P (intval, 'J')
543 || CONST_OK_FOR_LETTER_P (intval, 'N')
544 || CONST_OK_FOR_LETTER_P (intval, 'K'));
547 /* Return truth value of whether OP can be used as an operand in a
548 adddi3 insn. */
550 adddi3_operand (rtx op, enum machine_mode mode)
552 return (register_operand (op, mode)
553 || (GET_CODE (op) == CONST_INT
554 && (TARGET_64BIT ? INT_14_BITS (op) : INT_11_BITS (op))));
557 /* True iff zdepi can be used to generate this CONST_INT.
558 zdepi first sign extends a 5 bit signed number to a given field
559 length, then places this field anywhere in a zero. */
561 zdepi_cint_p (unsigned HOST_WIDE_INT x)
563 unsigned HOST_WIDE_INT lsb_mask, t;
565 /* This might not be obvious, but it's at least fast.
566 This function is critical; we don't have the time loops would take. */
567 lsb_mask = x & -x;
568 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
569 /* Return true iff t is a power of two. */
570 return ((t & (t - 1)) == 0);
573 /* True iff depi or extru can be used to compute (reg & mask).
574 Accept bit pattern like these:
575 0....01....1
576 1....10....0
577 1..10..01..1 */
579 and_mask_p (unsigned HOST_WIDE_INT mask)
581 mask = ~mask;
582 mask += mask & -mask;
583 return (mask & (mask - 1)) == 0;
586 /* True iff depi can be used to compute (reg | MASK). */
588 ior_mask_p (unsigned HOST_WIDE_INT mask)
590 mask += mask & -mask;
591 return (mask & (mask - 1)) == 0;
594 /* Legitimize PIC addresses. If the address is already
595 position-independent, we return ORIG. Newly generated
596 position-independent addresses go to REG. If we need more
597 than one register, we lose. */
600 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
602 rtx pic_ref = orig;
604 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
606 /* Labels need special handling. */
607 if (pic_label_operand (orig, mode))
609 /* We do not want to go through the movXX expanders here since that
610 would create recursion.
612 Nor do we really want to call a generator for a named pattern
613 since that requires multiple patterns if we want to support
614 multiple word sizes.
616 So instead we just emit the raw set, which avoids the movXX
617 expanders completely. */
618 mark_reg_pointer (reg, BITS_PER_UNIT);
619 emit_insn (gen_rtx_SET (VOIDmode, reg, orig));
620 current_function_uses_pic_offset_table = 1;
621 return reg;
623 if (GET_CODE (orig) == SYMBOL_REF)
625 rtx insn, tmp_reg;
627 gcc_assert (reg);
629 /* Before reload, allocate a temporary register for the intermediate
630 result. This allows the sequence to be deleted when the final
631 result is unused and the insns are trivially dead. */
632 tmp_reg = ((reload_in_progress || reload_completed)
633 ? reg : gen_reg_rtx (Pmode));
635 emit_move_insn (tmp_reg,
636 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
637 gen_rtx_HIGH (word_mode, orig)));
638 pic_ref
639 = gen_const_mem (Pmode,
640 gen_rtx_LO_SUM (Pmode, tmp_reg,
641 gen_rtx_UNSPEC (Pmode,
642 gen_rtvec (1, orig),
643 UNSPEC_DLTIND14R)));
645 current_function_uses_pic_offset_table = 1;
646 mark_reg_pointer (reg, BITS_PER_UNIT);
647 insn = emit_move_insn (reg, pic_ref);
649 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
650 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, orig, REG_NOTES (insn));
652 return reg;
654 else if (GET_CODE (orig) == CONST)
656 rtx base;
658 if (GET_CODE (XEXP (orig, 0)) == PLUS
659 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
660 return orig;
662 gcc_assert (reg);
663 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
665 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
666 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
667 base == reg ? 0 : reg);
669 if (GET_CODE (orig) == CONST_INT)
671 if (INT_14_BITS (orig))
672 return plus_constant (base, INTVAL (orig));
673 orig = force_reg (Pmode, orig);
675 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
676 /* Likewise, should we set special REG_NOTEs here? */
679 return pic_ref;
682 static GTY(()) rtx gen_tls_tga;
684 static rtx
685 gen_tls_get_addr (void)
687 if (!gen_tls_tga)
688 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
689 return gen_tls_tga;
692 static rtx
693 hppa_tls_call (rtx arg)
695 rtx ret;
697 ret = gen_reg_rtx (Pmode);
698 emit_library_call_value (gen_tls_get_addr (), ret,
699 LCT_CONST, Pmode, 1, arg, Pmode);
701 return ret;
704 static rtx
705 legitimize_tls_address (rtx addr)
707 rtx ret, insn, tmp, t1, t2, tp;
708 enum tls_model model = SYMBOL_REF_TLS_MODEL (addr);
710 switch (model)
712 case TLS_MODEL_GLOBAL_DYNAMIC:
713 tmp = gen_reg_rtx (Pmode);
714 emit_insn (gen_tgd_load (tmp, addr));
715 ret = hppa_tls_call (tmp);
716 break;
718 case TLS_MODEL_LOCAL_DYNAMIC:
719 ret = gen_reg_rtx (Pmode);
720 tmp = gen_reg_rtx (Pmode);
721 start_sequence ();
722 emit_insn (gen_tld_load (tmp, addr));
723 t1 = hppa_tls_call (tmp);
724 insn = get_insns ();
725 end_sequence ();
726 t2 = gen_reg_rtx (Pmode);
727 emit_libcall_block (insn, t2, t1,
728 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
729 UNSPEC_TLSLDBASE));
730 emit_insn (gen_tld_offset_load (ret, addr, t2));
731 break;
733 case TLS_MODEL_INITIAL_EXEC:
734 tp = gen_reg_rtx (Pmode);
735 tmp = gen_reg_rtx (Pmode);
736 ret = gen_reg_rtx (Pmode);
737 emit_insn (gen_tp_load (tp));
738 emit_insn (gen_tie_load (tmp, addr));
739 emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
740 break;
742 case TLS_MODEL_LOCAL_EXEC:
743 tp = gen_reg_rtx (Pmode);
744 ret = gen_reg_rtx (Pmode);
745 emit_insn (gen_tp_load (tp));
746 emit_insn (gen_tle_load (ret, addr, tp));
747 break;
749 default:
750 gcc_unreachable ();
753 return ret;
756 /* Try machine-dependent ways of modifying an illegitimate address
757 to be legitimate. If we find one, return the new, valid address.
758 This macro is used in only one place: `memory_address' in explow.c.
760 OLDX is the address as it was before break_out_memory_refs was called.
761 In some cases it is useful to look at this to decide what needs to be done.
763 MODE and WIN are passed so that this macro can use
764 GO_IF_LEGITIMATE_ADDRESS.
766 It is always safe for this macro to do nothing. It exists to recognize
767 opportunities to optimize the output.
769 For the PA, transform:
771 memory(X + <large int>)
773 into:
775 if (<large int> & mask) >= 16
776 Y = (<large int> & ~mask) + mask + 1 Round up.
777 else
778 Y = (<large int> & ~mask) Round down.
779 Z = X + Y
780 memory (Z + (<large int> - Y));
782 This is for CSE to find several similar references, and only use one Z.
784 X can either be a SYMBOL_REF or REG, but because combine cannot
785 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
786 D will not fit in 14 bits.
788 MODE_FLOAT references allow displacements which fit in 5 bits, so use
789 0x1f as the mask.
791 MODE_INT references allow displacements which fit in 14 bits, so use
792 0x3fff as the mask.
794 This relies on the fact that most mode MODE_FLOAT references will use FP
795 registers and most mode MODE_INT references will use integer registers.
796 (In the rare case of an FP register used in an integer MODE, we depend
797 on secondary reloads to clean things up.)
800 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
801 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
802 addressing modes to be used).
804 Put X and Z into registers. Then put the entire expression into
805 a register. */
808 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
809 enum machine_mode mode)
811 rtx orig = x;
813 /* We need to canonicalize the order of operands in unscaled indexed
814 addresses since the code that checks if an address is valid doesn't
815 always try both orders. */
816 if (!TARGET_NO_SPACE_REGS
817 && GET_CODE (x) == PLUS
818 && GET_MODE (x) == Pmode
819 && REG_P (XEXP (x, 0))
820 && REG_P (XEXP (x, 1))
821 && REG_POINTER (XEXP (x, 0))
822 && !REG_POINTER (XEXP (x, 1)))
823 return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
825 if (PA_SYMBOL_REF_TLS_P (x))
826 return legitimize_tls_address (x);
827 else if (flag_pic)
828 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
830 /* Strip off CONST. */
831 if (GET_CODE (x) == CONST)
832 x = XEXP (x, 0);
834 /* Special case. Get the SYMBOL_REF into a register and use indexing.
835 That should always be safe. */
836 if (GET_CODE (x) == PLUS
837 && GET_CODE (XEXP (x, 0)) == REG
838 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
840 rtx reg = force_reg (Pmode, XEXP (x, 1));
841 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
844 /* Note we must reject symbols which represent function addresses
845 since the assembler/linker can't handle arithmetic on plabels. */
846 if (GET_CODE (x) == PLUS
847 && GET_CODE (XEXP (x, 1)) == CONST_INT
848 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
849 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
850 || GET_CODE (XEXP (x, 0)) == REG))
852 rtx int_part, ptr_reg;
853 int newoffset;
854 int offset = INTVAL (XEXP (x, 1));
855 int mask;
857 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
858 ? (TARGET_PA_20 ? 0x3fff : 0x1f) : 0x3fff);
860 /* Choose which way to round the offset. Round up if we
861 are >= halfway to the next boundary. */
862 if ((offset & mask) >= ((mask + 1) / 2))
863 newoffset = (offset & ~ mask) + mask + 1;
864 else
865 newoffset = (offset & ~ mask);
867 /* If the newoffset will not fit in 14 bits (ldo), then
868 handling this would take 4 or 5 instructions (2 to load
869 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
870 add the new offset and the SYMBOL_REF.) Combine can
871 not handle 4->2 or 5->2 combinations, so do not create
872 them. */
873 if (! VAL_14_BITS_P (newoffset)
874 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
876 rtx const_part = plus_constant (XEXP (x, 0), newoffset);
877 rtx tmp_reg
878 = force_reg (Pmode,
879 gen_rtx_HIGH (Pmode, const_part));
880 ptr_reg
881 = force_reg (Pmode,
882 gen_rtx_LO_SUM (Pmode,
883 tmp_reg, const_part));
885 else
887 if (! VAL_14_BITS_P (newoffset))
888 int_part = force_reg (Pmode, GEN_INT (newoffset));
889 else
890 int_part = GEN_INT (newoffset);
892 ptr_reg = force_reg (Pmode,
893 gen_rtx_PLUS (Pmode,
894 force_reg (Pmode, XEXP (x, 0)),
895 int_part));
897 return plus_constant (ptr_reg, offset - newoffset);
900 /* Handle (plus (mult (a) (shadd_constant)) (b)). */
902 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
903 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
904 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
905 && (OBJECT_P (XEXP (x, 1))
906 || GET_CODE (XEXP (x, 1)) == SUBREG)
907 && GET_CODE (XEXP (x, 1)) != CONST)
909 int val = INTVAL (XEXP (XEXP (x, 0), 1));
910 rtx reg1, reg2;
912 reg1 = XEXP (x, 1);
913 if (GET_CODE (reg1) != REG)
914 reg1 = force_reg (Pmode, force_operand (reg1, 0));
916 reg2 = XEXP (XEXP (x, 0), 0);
917 if (GET_CODE (reg2) != REG)
918 reg2 = force_reg (Pmode, force_operand (reg2, 0));
920 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
921 gen_rtx_MULT (Pmode,
922 reg2,
923 GEN_INT (val)),
924 reg1));
927 /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
929 Only do so for floating point modes since this is more speculative
930 and we lose if it's an integer store. */
931 if (GET_CODE (x) == PLUS
932 && GET_CODE (XEXP (x, 0)) == PLUS
933 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
934 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
935 && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
936 && (mode == SFmode || mode == DFmode))
939 /* First, try and figure out what to use as a base register. */
940 rtx reg1, reg2, base, idx, orig_base;
942 reg1 = XEXP (XEXP (x, 0), 1);
943 reg2 = XEXP (x, 1);
944 base = NULL_RTX;
945 idx = NULL_RTX;
947 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
948 then emit_move_sequence will turn on REG_POINTER so we'll know
949 it's a base register below. */
950 if (GET_CODE (reg1) != REG)
951 reg1 = force_reg (Pmode, force_operand (reg1, 0));
953 if (GET_CODE (reg2) != REG)
954 reg2 = force_reg (Pmode, force_operand (reg2, 0));
956 /* Figure out what the base and index are. */
958 if (GET_CODE (reg1) == REG
959 && REG_POINTER (reg1))
961 base = reg1;
962 orig_base = XEXP (XEXP (x, 0), 1);
963 idx = gen_rtx_PLUS (Pmode,
964 gen_rtx_MULT (Pmode,
965 XEXP (XEXP (XEXP (x, 0), 0), 0),
966 XEXP (XEXP (XEXP (x, 0), 0), 1)),
967 XEXP (x, 1));
969 else if (GET_CODE (reg2) == REG
970 && REG_POINTER (reg2))
972 base = reg2;
973 orig_base = XEXP (x, 1);
974 idx = XEXP (x, 0);
977 if (base == 0)
978 return orig;
980 /* If the index adds a large constant, try to scale the
981 constant so that it can be loaded with only one insn. */
982 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
983 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
984 / INTVAL (XEXP (XEXP (idx, 0), 1)))
985 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
987 /* Divide the CONST_INT by the scale factor, then add it to A. */
988 int val = INTVAL (XEXP (idx, 1));
990 val /= INTVAL (XEXP (XEXP (idx, 0), 1));
991 reg1 = XEXP (XEXP (idx, 0), 0);
992 if (GET_CODE (reg1) != REG)
993 reg1 = force_reg (Pmode, force_operand (reg1, 0));
995 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
997 /* We can now generate a simple scaled indexed address. */
998 return
999 force_reg
1000 (Pmode, gen_rtx_PLUS (Pmode,
1001 gen_rtx_MULT (Pmode, reg1,
1002 XEXP (XEXP (idx, 0), 1)),
1003 base));
1006 /* If B + C is still a valid base register, then add them. */
1007 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1008 && INTVAL (XEXP (idx, 1)) <= 4096
1009 && INTVAL (XEXP (idx, 1)) >= -4096)
1011 int val = INTVAL (XEXP (XEXP (idx, 0), 1));
1012 rtx reg1, reg2;
1014 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1016 reg2 = XEXP (XEXP (idx, 0), 0);
1017 if (GET_CODE (reg2) != CONST_INT)
1018 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1020 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1021 gen_rtx_MULT (Pmode,
1022 reg2,
1023 GEN_INT (val)),
1024 reg1));
1027 /* Get the index into a register, then add the base + index and
1028 return a register holding the result. */
1030 /* First get A into a register. */
1031 reg1 = XEXP (XEXP (idx, 0), 0);
1032 if (GET_CODE (reg1) != REG)
1033 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1035 /* And get B into a register. */
1036 reg2 = XEXP (idx, 1);
1037 if (GET_CODE (reg2) != REG)
1038 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1040 reg1 = force_reg (Pmode,
1041 gen_rtx_PLUS (Pmode,
1042 gen_rtx_MULT (Pmode, reg1,
1043 XEXP (XEXP (idx, 0), 1)),
1044 reg2));
1046 /* Add the result to our base register and return. */
1047 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1051 /* Uh-oh. We might have an address for x[n-100000]. This needs
1052 special handling to avoid creating an indexed memory address
1053 with x-100000 as the base.
1055 If the constant part is small enough, then it's still safe because
1056 there is a guard page at the beginning and end of the data segment.
1058 Scaled references are common enough that we want to try and rearrange the
1059 terms so that we can use indexing for these addresses too. Only
1060 do the optimization for floatint point modes. */
1062 if (GET_CODE (x) == PLUS
1063 && symbolic_expression_p (XEXP (x, 1)))
1065 /* Ugly. We modify things here so that the address offset specified
1066 by the index expression is computed first, then added to x to form
1067 the entire address. */
1069 rtx regx1, regx2, regy1, regy2, y;
1071 /* Strip off any CONST. */
1072 y = XEXP (x, 1);
1073 if (GET_CODE (y) == CONST)
1074 y = XEXP (y, 0);
1076 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1078 /* See if this looks like
1079 (plus (mult (reg) (shadd_const))
1080 (const (plus (symbol_ref) (const_int))))
1082 Where const_int is small. In that case the const
1083 expression is a valid pointer for indexing.
1085 If const_int is big, but can be divided evenly by shadd_const
1086 and added to (reg). This allows more scaled indexed addresses. */
1087 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1088 && GET_CODE (XEXP (x, 0)) == MULT
1089 && GET_CODE (XEXP (y, 1)) == CONST_INT
1090 && INTVAL (XEXP (y, 1)) >= -4096
1091 && INTVAL (XEXP (y, 1)) <= 4095
1092 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1093 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1095 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1096 rtx reg1, reg2;
1098 reg1 = XEXP (x, 1);
1099 if (GET_CODE (reg1) != REG)
1100 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1102 reg2 = XEXP (XEXP (x, 0), 0);
1103 if (GET_CODE (reg2) != REG)
1104 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1106 return force_reg (Pmode,
1107 gen_rtx_PLUS (Pmode,
1108 gen_rtx_MULT (Pmode,
1109 reg2,
1110 GEN_INT (val)),
1111 reg1));
1113 else if ((mode == DFmode || mode == SFmode)
1114 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1115 && GET_CODE (XEXP (x, 0)) == MULT
1116 && GET_CODE (XEXP (y, 1)) == CONST_INT
1117 && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
1118 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1119 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1121 regx1
1122 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1123 / INTVAL (XEXP (XEXP (x, 0), 1))));
1124 regx2 = XEXP (XEXP (x, 0), 0);
1125 if (GET_CODE (regx2) != REG)
1126 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1127 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1128 regx2, regx1));
1129 return
1130 force_reg (Pmode,
1131 gen_rtx_PLUS (Pmode,
1132 gen_rtx_MULT (Pmode, regx2,
1133 XEXP (XEXP (x, 0), 1)),
1134 force_reg (Pmode, XEXP (y, 0))));
1136 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1137 && INTVAL (XEXP (y, 1)) >= -4096
1138 && INTVAL (XEXP (y, 1)) <= 4095)
1140 /* This is safe because of the guard page at the
1141 beginning and end of the data space. Just
1142 return the original address. */
1143 return orig;
1145 else
1147 /* Doesn't look like one we can optimize. */
1148 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1149 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1150 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1151 regx1 = force_reg (Pmode,
1152 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1153 regx1, regy2));
1154 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1159 return orig;
1162 /* For the HPPA, REG and REG+CONST is cost 0
1163 and addresses involving symbolic constants are cost 2.
1165 PIC addresses are very expensive.
1167 It is no coincidence that this has the same structure
1168 as GO_IF_LEGITIMATE_ADDRESS. */
1170 static int
1171 hppa_address_cost (rtx X)
1173 switch (GET_CODE (X))
1175 case REG:
1176 case PLUS:
1177 case LO_SUM:
1178 return 1;
1179 case HIGH:
1180 return 2;
1181 default:
1182 return 4;
1186 /* Compute a (partial) cost for rtx X. Return true if the complete
1187 cost has been computed, and false if subexpressions should be
1188 scanned. In either case, *TOTAL contains the cost result. */
1190 static bool
1191 hppa_rtx_costs (rtx x, int code, int outer_code, int *total)
1193 switch (code)
1195 case CONST_INT:
1196 if (INTVAL (x) == 0)
1197 *total = 0;
1198 else if (INT_14_BITS (x))
1199 *total = 1;
1200 else
1201 *total = 2;
1202 return true;
1204 case HIGH:
1205 *total = 2;
1206 return true;
1208 case CONST:
1209 case LABEL_REF:
1210 case SYMBOL_REF:
1211 *total = 4;
1212 return true;
1214 case CONST_DOUBLE:
1215 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1216 && outer_code != SET)
1217 *total = 0;
1218 else
1219 *total = 8;
1220 return true;
1222 case MULT:
1223 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1224 *total = COSTS_N_INSNS (3);
1225 else if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1226 *total = COSTS_N_INSNS (8);
1227 else
1228 *total = COSTS_N_INSNS (20);
1229 return true;
1231 case DIV:
1232 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1234 *total = COSTS_N_INSNS (14);
1235 return true;
1237 /* FALLTHRU */
1239 case UDIV:
1240 case MOD:
1241 case UMOD:
1242 *total = COSTS_N_INSNS (60);
1243 return true;
1245 case PLUS: /* this includes shNadd insns */
1246 case MINUS:
1247 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1248 *total = COSTS_N_INSNS (3);
1249 else
1250 *total = COSTS_N_INSNS (1);
1251 return true;
1253 case ASHIFT:
1254 case ASHIFTRT:
1255 case LSHIFTRT:
1256 *total = COSTS_N_INSNS (1);
1257 return true;
1259 default:
1260 return false;
1264 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1265 new rtx with the correct mode. */
1266 static inline rtx
1267 force_mode (enum machine_mode mode, rtx orig)
1269 if (mode == GET_MODE (orig))
1270 return orig;
1272 gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
1274 return gen_rtx_REG (mode, REGNO (orig));
1277 /* Return 1 if *X is a thread-local symbol. */
1279 static int
1280 pa_tls_symbol_ref_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
1282 return PA_SYMBOL_REF_TLS_P (*x);
1285 /* Return 1 if X contains a thread-local symbol. */
1287 bool
1288 pa_tls_referenced_p (rtx x)
1290 if (!TARGET_HAVE_TLS)
1291 return false;
1293 return for_each_rtx (&x, &pa_tls_symbol_ref_1, 0);
1296 /* Emit insns to move operands[1] into operands[0].
1298 Return 1 if we have written out everything that needs to be done to
1299 do the move. Otherwise, return 0 and the caller will emit the move
1300 normally.
1302 Note SCRATCH_REG may not be in the proper mode depending on how it
1303 will be used. This routine is responsible for creating a new copy
1304 of SCRATCH_REG in the proper mode. */
1307 emit_move_sequence (rtx *operands, enum machine_mode mode, rtx scratch_reg)
1309 register rtx operand0 = operands[0];
1310 register rtx operand1 = operands[1];
1311 register rtx tem;
1313 /* We can only handle indexed addresses in the destination operand
1314 of floating point stores. Thus, we need to break out indexed
1315 addresses from the destination operand. */
1316 if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1318 /* This is only safe up to the beginning of life analysis. */
1319 gcc_assert (!no_new_pseudos);
1321 tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1322 operand0 = replace_equiv_address (operand0, tem);
1325 /* On targets with non-equivalent space registers, break out unscaled
1326 indexed addresses from the source operand before the final CSE.
1327 We have to do this because the REG_POINTER flag is not correctly
1328 carried through various optimization passes and CSE may substitute
1329 a pseudo without the pointer set for one with the pointer set. As
1330 a result, we loose various opportunities to create insns with
1331 unscaled indexed addresses. */
1332 if (!TARGET_NO_SPACE_REGS
1333 && !cse_not_expected
1334 && GET_CODE (operand1) == MEM
1335 && GET_CODE (XEXP (operand1, 0)) == PLUS
1336 && REG_P (XEXP (XEXP (operand1, 0), 0))
1337 && REG_P (XEXP (XEXP (operand1, 0), 1)))
1338 operand1
1339 = replace_equiv_address (operand1,
1340 copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1342 if (scratch_reg
1343 && reload_in_progress && GET_CODE (operand0) == REG
1344 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1345 operand0 = reg_equiv_mem[REGNO (operand0)];
1346 else if (scratch_reg
1347 && reload_in_progress && GET_CODE (operand0) == SUBREG
1348 && GET_CODE (SUBREG_REG (operand0)) == REG
1349 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1351 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1352 the code which tracks sets/uses for delete_output_reload. */
1353 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1354 reg_equiv_mem [REGNO (SUBREG_REG (operand0))],
1355 SUBREG_BYTE (operand0));
1356 operand0 = alter_subreg (&temp);
1359 if (scratch_reg
1360 && reload_in_progress && GET_CODE (operand1) == REG
1361 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1362 operand1 = reg_equiv_mem[REGNO (operand1)];
1363 else if (scratch_reg
1364 && reload_in_progress && GET_CODE (operand1) == SUBREG
1365 && GET_CODE (SUBREG_REG (operand1)) == REG
1366 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1368 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1369 the code which tracks sets/uses for delete_output_reload. */
1370 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1371 reg_equiv_mem [REGNO (SUBREG_REG (operand1))],
1372 SUBREG_BYTE (operand1));
1373 operand1 = alter_subreg (&temp);
1376 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1377 && ((tem = find_replacement (&XEXP (operand0, 0)))
1378 != XEXP (operand0, 0)))
1379 operand0 = replace_equiv_address (operand0, tem);
1381 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1382 && ((tem = find_replacement (&XEXP (operand1, 0)))
1383 != XEXP (operand1, 0)))
1384 operand1 = replace_equiv_address (operand1, tem);
1386 /* Handle secondary reloads for loads/stores of FP registers from
1387 REG+D addresses where D does not fit in 5 or 14 bits, including
1388 (subreg (mem (addr))) cases. */
1389 if (scratch_reg
1390 && fp_reg_operand (operand0, mode)
1391 && ((GET_CODE (operand1) == MEM
1392 && !memory_address_p ((GET_MODE_SIZE (mode) == 4 ? SFmode : DFmode),
1393 XEXP (operand1, 0)))
1394 || ((GET_CODE (operand1) == SUBREG
1395 && GET_CODE (XEXP (operand1, 0)) == MEM
1396 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1397 ? SFmode : DFmode),
1398 XEXP (XEXP (operand1, 0), 0))))))
1400 if (GET_CODE (operand1) == SUBREG)
1401 operand1 = XEXP (operand1, 0);
1403 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1404 it in WORD_MODE regardless of what mode it was originally given
1405 to us. */
1406 scratch_reg = force_mode (word_mode, scratch_reg);
1408 /* D might not fit in 14 bits either; for such cases load D into
1409 scratch reg. */
1410 if (!memory_address_p (Pmode, XEXP (operand1, 0)))
1412 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1413 emit_move_insn (scratch_reg,
1414 gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
1415 Pmode,
1416 XEXP (XEXP (operand1, 0), 0),
1417 scratch_reg));
1419 else
1420 emit_move_insn (scratch_reg, XEXP (operand1, 0));
1421 emit_insn (gen_rtx_SET (VOIDmode, operand0,
1422 replace_equiv_address (operand1, scratch_reg)));
1423 return 1;
1425 else if (scratch_reg
1426 && fp_reg_operand (operand1, mode)
1427 && ((GET_CODE (operand0) == MEM
1428 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1429 ? SFmode : DFmode),
1430 XEXP (operand0, 0)))
1431 || ((GET_CODE (operand0) == SUBREG)
1432 && GET_CODE (XEXP (operand0, 0)) == MEM
1433 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1434 ? SFmode : DFmode),
1435 XEXP (XEXP (operand0, 0), 0)))))
1437 if (GET_CODE (operand0) == SUBREG)
1438 operand0 = XEXP (operand0, 0);
1440 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1441 it in WORD_MODE regardless of what mode it was originally given
1442 to us. */
1443 scratch_reg = force_mode (word_mode, scratch_reg);
1445 /* D might not fit in 14 bits either; for such cases load D into
1446 scratch reg. */
1447 if (!memory_address_p (Pmode, XEXP (operand0, 0)))
1449 emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
1450 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0,
1451 0)),
1452 Pmode,
1453 XEXP (XEXP (operand0, 0),
1455 scratch_reg));
1457 else
1458 emit_move_insn (scratch_reg, XEXP (operand0, 0));
1459 emit_insn (gen_rtx_SET (VOIDmode,
1460 replace_equiv_address (operand0, scratch_reg),
1461 operand1));
1462 return 1;
1464 /* Handle secondary reloads for loads of FP registers from constant
1465 expressions by forcing the constant into memory.
1467 Use scratch_reg to hold the address of the memory location.
1469 The proper fix is to change PREFERRED_RELOAD_CLASS to return
1470 NO_REGS when presented with a const_int and a register class
1471 containing only FP registers. Doing so unfortunately creates
1472 more problems than it solves. Fix this for 2.5. */
1473 else if (scratch_reg
1474 && CONSTANT_P (operand1)
1475 && fp_reg_operand (operand0, mode))
1477 rtx const_mem, xoperands[2];
1479 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1480 it in WORD_MODE regardless of what mode it was originally given
1481 to us. */
1482 scratch_reg = force_mode (word_mode, scratch_reg);
1484 /* Force the constant into memory and put the address of the
1485 memory location into scratch_reg. */
1486 const_mem = force_const_mem (mode, operand1);
1487 xoperands[0] = scratch_reg;
1488 xoperands[1] = XEXP (const_mem, 0);
1489 emit_move_sequence (xoperands, Pmode, 0);
1491 /* Now load the destination register. */
1492 emit_insn (gen_rtx_SET (mode, operand0,
1493 replace_equiv_address (const_mem, scratch_reg)));
1494 return 1;
1496 /* Handle secondary reloads for SAR. These occur when trying to load
1497 the SAR from memory, FP register, or with a constant. */
1498 else if (scratch_reg
1499 && GET_CODE (operand0) == REG
1500 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1501 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1502 && (GET_CODE (operand1) == MEM
1503 || GET_CODE (operand1) == CONST_INT
1504 || (GET_CODE (operand1) == REG
1505 && FP_REG_CLASS_P (REGNO_REG_CLASS (REGNO (operand1))))))
1507 /* D might not fit in 14 bits either; for such cases load D into
1508 scratch reg. */
1509 if (GET_CODE (operand1) == MEM
1510 && !memory_address_p (Pmode, XEXP (operand1, 0)))
1512 /* We are reloading the address into the scratch register, so we
1513 want to make sure the scratch register is a full register. */
1514 scratch_reg = force_mode (word_mode, scratch_reg);
1516 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1517 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1518 0)),
1519 Pmode,
1520 XEXP (XEXP (operand1, 0),
1522 scratch_reg));
1524 /* Now we are going to load the scratch register from memory,
1525 we want to load it in the same width as the original MEM,
1526 which must be the same as the width of the ultimate destination,
1527 OPERAND0. */
1528 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1530 emit_move_insn (scratch_reg,
1531 replace_equiv_address (operand1, scratch_reg));
1533 else
1535 /* We want to load the scratch register using the same mode as
1536 the ultimate destination. */
1537 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1539 emit_move_insn (scratch_reg, operand1);
1542 /* And emit the insn to set the ultimate destination. We know that
1543 the scratch register has the same mode as the destination at this
1544 point. */
1545 emit_move_insn (operand0, scratch_reg);
1546 return 1;
1548 /* Handle the most common case: storing into a register. */
1549 else if (register_operand (operand0, mode))
1551 if (register_operand (operand1, mode)
1552 || (GET_CODE (operand1) == CONST_INT
1553 && cint_ok_for_move (INTVAL (operand1)))
1554 || (operand1 == CONST0_RTX (mode))
1555 || (GET_CODE (operand1) == HIGH
1556 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1557 /* Only `general_operands' can come here, so MEM is ok. */
1558 || GET_CODE (operand1) == MEM)
1560 /* Various sets are created during RTL generation which don't
1561 have the REG_POINTER flag correctly set. After the CSE pass,
1562 instruction recognition can fail if we don't consistently
1563 set this flag when performing register copies. This should
1564 also improve the opportunities for creating insns that use
1565 unscaled indexing. */
1566 if (REG_P (operand0) && REG_P (operand1))
1568 if (REG_POINTER (operand1)
1569 && !REG_POINTER (operand0)
1570 && !HARD_REGISTER_P (operand0))
1571 copy_reg_pointer (operand0, operand1);
1572 else if (REG_POINTER (operand0)
1573 && !REG_POINTER (operand1)
1574 && !HARD_REGISTER_P (operand1))
1575 copy_reg_pointer (operand1, operand0);
1578 /* When MEMs are broken out, the REG_POINTER flag doesn't
1579 get set. In some cases, we can set the REG_POINTER flag
1580 from the declaration for the MEM. */
1581 if (REG_P (operand0)
1582 && GET_CODE (operand1) == MEM
1583 && !REG_POINTER (operand0))
1585 tree decl = MEM_EXPR (operand1);
1587 /* Set the register pointer flag and register alignment
1588 if the declaration for this memory reference is a
1589 pointer type. Fortran indirect argument references
1590 are ignored. */
1591 if (decl
1592 && !(flag_argument_noalias > 1
1593 && TREE_CODE (decl) == INDIRECT_REF
1594 && TREE_CODE (TREE_OPERAND (decl, 0)) == PARM_DECL))
1596 tree type;
1598 /* If this is a COMPONENT_REF, use the FIELD_DECL from
1599 tree operand 1. */
1600 if (TREE_CODE (decl) == COMPONENT_REF)
1601 decl = TREE_OPERAND (decl, 1);
1603 type = TREE_TYPE (decl);
1604 if (TREE_CODE (type) == ARRAY_TYPE)
1605 type = get_inner_array_type (type);
1607 if (POINTER_TYPE_P (type))
1609 int align;
1611 type = TREE_TYPE (type);
1612 /* Using TYPE_ALIGN_OK is rather conservative as
1613 only the ada frontend actually sets it. */
1614 align = (TYPE_ALIGN_OK (type) ? TYPE_ALIGN (type)
1615 : BITS_PER_UNIT);
1616 mark_reg_pointer (operand0, align);
1621 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1622 return 1;
1625 else if (GET_CODE (operand0) == MEM)
1627 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1628 && !(reload_in_progress || reload_completed))
1630 rtx temp = gen_reg_rtx (DFmode);
1632 emit_insn (gen_rtx_SET (VOIDmode, temp, operand1));
1633 emit_insn (gen_rtx_SET (VOIDmode, operand0, temp));
1634 return 1;
1636 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1638 /* Run this case quickly. */
1639 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1640 return 1;
1642 if (! (reload_in_progress || reload_completed))
1644 operands[0] = validize_mem (operand0);
1645 operands[1] = operand1 = force_reg (mode, operand1);
1649 /* Simplify the source if we need to.
1650 Note we do have to handle function labels here, even though we do
1651 not consider them legitimate constants. Loop optimizations can
1652 call the emit_move_xxx with one as a source. */
1653 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1654 || function_label_operand (operand1, mode)
1655 || (GET_CODE (operand1) == HIGH
1656 && symbolic_operand (XEXP (operand1, 0), mode)))
1658 int ishighonly = 0;
1660 if (GET_CODE (operand1) == HIGH)
1662 ishighonly = 1;
1663 operand1 = XEXP (operand1, 0);
1665 if (symbolic_operand (operand1, mode))
1667 /* Argh. The assembler and linker can't handle arithmetic
1668 involving plabels.
1670 So we force the plabel into memory, load operand0 from
1671 the memory location, then add in the constant part. */
1672 if ((GET_CODE (operand1) == CONST
1673 && GET_CODE (XEXP (operand1, 0)) == PLUS
1674 && function_label_operand (XEXP (XEXP (operand1, 0), 0), Pmode))
1675 || function_label_operand (operand1, mode))
1677 rtx temp, const_part;
1679 /* Figure out what (if any) scratch register to use. */
1680 if (reload_in_progress || reload_completed)
1682 scratch_reg = scratch_reg ? scratch_reg : operand0;
1683 /* SCRATCH_REG will hold an address and maybe the actual
1684 data. We want it in WORD_MODE regardless of what mode it
1685 was originally given to us. */
1686 scratch_reg = force_mode (word_mode, scratch_reg);
1688 else if (flag_pic)
1689 scratch_reg = gen_reg_rtx (Pmode);
1691 if (GET_CODE (operand1) == CONST)
1693 /* Save away the constant part of the expression. */
1694 const_part = XEXP (XEXP (operand1, 0), 1);
1695 gcc_assert (GET_CODE (const_part) == CONST_INT);
1697 /* Force the function label into memory. */
1698 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
1700 else
1702 /* No constant part. */
1703 const_part = NULL_RTX;
1705 /* Force the function label into memory. */
1706 temp = force_const_mem (mode, operand1);
1710 /* Get the address of the memory location. PIC-ify it if
1711 necessary. */
1712 temp = XEXP (temp, 0);
1713 if (flag_pic)
1714 temp = legitimize_pic_address (temp, mode, scratch_reg);
1716 /* Put the address of the memory location into our destination
1717 register. */
1718 operands[1] = temp;
1719 emit_move_sequence (operands, mode, scratch_reg);
1721 /* Now load from the memory location into our destination
1722 register. */
1723 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
1724 emit_move_sequence (operands, mode, scratch_reg);
1726 /* And add back in the constant part. */
1727 if (const_part != NULL_RTX)
1728 expand_inc (operand0, const_part);
1730 return 1;
1733 if (flag_pic)
1735 rtx temp;
1737 if (reload_in_progress || reload_completed)
1739 temp = scratch_reg ? scratch_reg : operand0;
1740 /* TEMP will hold an address and maybe the actual
1741 data. We want it in WORD_MODE regardless of what mode it
1742 was originally given to us. */
1743 temp = force_mode (word_mode, temp);
1745 else
1746 temp = gen_reg_rtx (Pmode);
1748 /* (const (plus (symbol) (const_int))) must be forced to
1749 memory during/after reload if the const_int will not fit
1750 in 14 bits. */
1751 if (GET_CODE (operand1) == CONST
1752 && GET_CODE (XEXP (operand1, 0)) == PLUS
1753 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
1754 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
1755 && (reload_completed || reload_in_progress)
1756 && flag_pic)
1758 rtx const_mem = force_const_mem (mode, operand1);
1759 operands[1] = legitimize_pic_address (XEXP (const_mem, 0),
1760 mode, temp);
1761 operands[1] = replace_equiv_address (const_mem, operands[1]);
1762 emit_move_sequence (operands, mode, temp);
1764 else
1766 operands[1] = legitimize_pic_address (operand1, mode, temp);
1767 if (REG_P (operand0) && REG_P (operands[1]))
1768 copy_reg_pointer (operand0, operands[1]);
1769 emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1]));
1772 /* On the HPPA, references to data space are supposed to use dp,
1773 register 27, but showing it in the RTL inhibits various cse
1774 and loop optimizations. */
1775 else
1777 rtx temp, set;
1779 if (reload_in_progress || reload_completed)
1781 temp = scratch_reg ? scratch_reg : operand0;
1782 /* TEMP will hold an address and maybe the actual
1783 data. We want it in WORD_MODE regardless of what mode it
1784 was originally given to us. */
1785 temp = force_mode (word_mode, temp);
1787 else
1788 temp = gen_reg_rtx (mode);
1790 /* Loading a SYMBOL_REF into a register makes that register
1791 safe to be used as the base in an indexed address.
1793 Don't mark hard registers though. That loses. */
1794 if (GET_CODE (operand0) == REG
1795 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1796 mark_reg_pointer (operand0, BITS_PER_UNIT);
1797 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
1798 mark_reg_pointer (temp, BITS_PER_UNIT);
1800 if (ishighonly)
1801 set = gen_rtx_SET (mode, operand0, temp);
1802 else
1803 set = gen_rtx_SET (VOIDmode,
1804 operand0,
1805 gen_rtx_LO_SUM (mode, temp, operand1));
1807 emit_insn (gen_rtx_SET (VOIDmode,
1808 temp,
1809 gen_rtx_HIGH (mode, operand1)));
1810 emit_insn (set);
1813 return 1;
1815 else if (pa_tls_referenced_p (operand1))
1817 rtx tmp = operand1;
1818 rtx addend = NULL;
1820 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
1822 addend = XEXP (XEXP (tmp, 0), 1);
1823 tmp = XEXP (XEXP (tmp, 0), 0);
1826 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
1827 tmp = legitimize_tls_address (tmp);
1828 if (addend)
1830 tmp = gen_rtx_PLUS (mode, tmp, addend);
1831 tmp = force_operand (tmp, operands[0]);
1833 operands[1] = tmp;
1835 else if (GET_CODE (operand1) != CONST_INT
1836 || !cint_ok_for_move (INTVAL (operand1)))
1838 rtx insn, temp;
1839 rtx op1 = operand1;
1840 HOST_WIDE_INT value = 0;
1841 HOST_WIDE_INT insv = 0;
1842 int insert = 0;
1844 if (GET_CODE (operand1) == CONST_INT)
1845 value = INTVAL (operand1);
1847 if (TARGET_64BIT
1848 && GET_CODE (operand1) == CONST_INT
1849 && HOST_BITS_PER_WIDE_INT > 32
1850 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
1852 HOST_WIDE_INT nval;
1854 /* Extract the low order 32 bits of the value and sign extend.
1855 If the new value is the same as the original value, we can
1856 can use the original value as-is. If the new value is
1857 different, we use it and insert the most-significant 32-bits
1858 of the original value into the final result. */
1859 nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
1860 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
1861 if (value != nval)
1863 #if HOST_BITS_PER_WIDE_INT > 32
1864 insv = value >= 0 ? value >> 32 : ~(~value >> 32);
1865 #endif
1866 insert = 1;
1867 value = nval;
1868 operand1 = GEN_INT (nval);
1872 if (reload_in_progress || reload_completed)
1873 temp = scratch_reg ? scratch_reg : operand0;
1874 else
1875 temp = gen_reg_rtx (mode);
1877 /* We don't directly split DImode constants on 32-bit targets
1878 because PLUS uses an 11-bit immediate and the insn sequence
1879 generated is not as efficient as the one using HIGH/LO_SUM. */
1880 if (GET_CODE (operand1) == CONST_INT
1881 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
1882 && !insert)
1884 /* Directly break constant into high and low parts. This
1885 provides better optimization opportunities because various
1886 passes recognize constants split with PLUS but not LO_SUM.
1887 We use a 14-bit signed low part except when the addition
1888 of 0x4000 to the high part might change the sign of the
1889 high part. */
1890 HOST_WIDE_INT low = value & 0x3fff;
1891 HOST_WIDE_INT high = value & ~ 0x3fff;
1893 if (low >= 0x2000)
1895 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
1896 high += 0x2000;
1897 else
1898 high += 0x4000;
1901 low = value - high;
1903 emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (high)));
1904 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
1906 else
1908 emit_insn (gen_rtx_SET (VOIDmode, temp,
1909 gen_rtx_HIGH (mode, operand1)));
1910 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
1913 insn = emit_move_insn (operands[0], operands[1]);
1915 /* Now insert the most significant 32 bits of the value
1916 into the register. When we don't have a second register
1917 available, it could take up to nine instructions to load
1918 a 64-bit integer constant. Prior to reload, we force
1919 constants that would take more than three instructions
1920 to load to the constant pool. During and after reload,
1921 we have to handle all possible values. */
1922 if (insert)
1924 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
1925 register and the value to be inserted is outside the
1926 range that can be loaded with three depdi instructions. */
1927 if (temp != operand0 && (insv >= 16384 || insv < -16384))
1929 operand1 = GEN_INT (insv);
1931 emit_insn (gen_rtx_SET (VOIDmode, temp,
1932 gen_rtx_HIGH (mode, operand1)));
1933 emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
1934 emit_insn (gen_insv (operand0, GEN_INT (32),
1935 const0_rtx, temp));
1937 else
1939 int len = 5, pos = 27;
1941 /* Insert the bits using the depdi instruction. */
1942 while (pos >= 0)
1944 HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
1945 HOST_WIDE_INT sign = v5 < 0;
1947 /* Left extend the insertion. */
1948 insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
1949 while (pos > 0 && (insv & 1) == sign)
1951 insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
1952 len += 1;
1953 pos -= 1;
1956 emit_insn (gen_insv (operand0, GEN_INT (len),
1957 GEN_INT (pos), GEN_INT (v5)));
1959 len = pos > 0 && pos < 5 ? pos : 5;
1960 pos -= len;
1965 REG_NOTES (insn)
1966 = gen_rtx_EXPR_LIST (REG_EQUAL, op1, REG_NOTES (insn));
1968 return 1;
1971 /* Now have insn-emit do whatever it normally does. */
1972 return 0;
1975 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
1976 it will need a link/runtime reloc). */
1979 reloc_needed (tree exp)
1981 int reloc = 0;
1983 switch (TREE_CODE (exp))
1985 case ADDR_EXPR:
1986 return 1;
1988 case PLUS_EXPR:
1989 case MINUS_EXPR:
1990 reloc = reloc_needed (TREE_OPERAND (exp, 0));
1991 reloc |= reloc_needed (TREE_OPERAND (exp, 1));
1992 break;
1994 case NOP_EXPR:
1995 case CONVERT_EXPR:
1996 case NON_LVALUE_EXPR:
1997 reloc = reloc_needed (TREE_OPERAND (exp, 0));
1998 break;
2000 case CONSTRUCTOR:
2002 tree value;
2003 unsigned HOST_WIDE_INT ix;
2005 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2006 if (value)
2007 reloc |= reloc_needed (value);
2009 break;
2011 case ERROR_MARK:
2012 break;
2014 default:
2015 break;
2017 return reloc;
2020 /* Does operand (which is a symbolic_operand) live in text space?
2021 If so, SYMBOL_REF_FLAG, which is set by pa_encode_section_info,
2022 will be true. */
2025 read_only_operand (rtx operand, enum machine_mode mode ATTRIBUTE_UNUSED)
2027 if (GET_CODE (operand) == CONST)
2028 operand = XEXP (XEXP (operand, 0), 0);
2029 if (flag_pic)
2031 if (GET_CODE (operand) == SYMBOL_REF)
2032 return SYMBOL_REF_FLAG (operand) && !CONSTANT_POOL_ADDRESS_P (operand);
2034 else
2036 if (GET_CODE (operand) == SYMBOL_REF)
2037 return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand);
2039 return 1;
2043 /* Return the best assembler insn template
2044 for moving operands[1] into operands[0] as a fullword. */
2045 const char *
2046 singlemove_string (rtx *operands)
2048 HOST_WIDE_INT intval;
2050 if (GET_CODE (operands[0]) == MEM)
2051 return "stw %r1,%0";
2052 if (GET_CODE (operands[1]) == MEM)
2053 return "ldw %1,%0";
2054 if (GET_CODE (operands[1]) == CONST_DOUBLE)
2056 long i;
2057 REAL_VALUE_TYPE d;
2059 gcc_assert (GET_MODE (operands[1]) == SFmode);
2061 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2062 bit pattern. */
2063 REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
2064 REAL_VALUE_TO_TARGET_SINGLE (d, i);
2066 operands[1] = GEN_INT (i);
2067 /* Fall through to CONST_INT case. */
2069 if (GET_CODE (operands[1]) == CONST_INT)
2071 intval = INTVAL (operands[1]);
2073 if (VAL_14_BITS_P (intval))
2074 return "ldi %1,%0";
2075 else if ((intval & 0x7ff) == 0)
2076 return "ldil L'%1,%0";
2077 else if (zdepi_cint_p (intval))
2078 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2079 else
2080 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2082 return "copy %1,%0";
2086 /* Compute position (in OP[1]) and width (in OP[2])
2087 useful for copying IMM to a register using the zdepi
2088 instructions. Store the immediate value to insert in OP[0]. */
2089 static void
2090 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2092 int lsb, len;
2094 /* Find the least significant set bit in IMM. */
2095 for (lsb = 0; lsb < 32; lsb++)
2097 if ((imm & 1) != 0)
2098 break;
2099 imm >>= 1;
2102 /* Choose variants based on *sign* of the 5-bit field. */
2103 if ((imm & 0x10) == 0)
2104 len = (lsb <= 28) ? 4 : 32 - lsb;
2105 else
2107 /* Find the width of the bitstring in IMM. */
2108 for (len = 5; len < 32; len++)
2110 if ((imm & (1 << len)) == 0)
2111 break;
2114 /* Sign extend IMM as a 5-bit value. */
2115 imm = (imm & 0xf) - 0x10;
2118 op[0] = imm;
2119 op[1] = 31 - lsb;
2120 op[2] = len;
2123 /* Compute position (in OP[1]) and width (in OP[2])
2124 useful for copying IMM to a register using the depdi,z
2125 instructions. Store the immediate value to insert in OP[0]. */
2126 void
2127 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2129 HOST_WIDE_INT lsb, len;
2131 /* Find the least significant set bit in IMM. */
2132 for (lsb = 0; lsb < HOST_BITS_PER_WIDE_INT; lsb++)
2134 if ((imm & 1) != 0)
2135 break;
2136 imm >>= 1;
2139 /* Choose variants based on *sign* of the 5-bit field. */
2140 if ((imm & 0x10) == 0)
2141 len = ((lsb <= HOST_BITS_PER_WIDE_INT - 4)
2142 ? 4 : HOST_BITS_PER_WIDE_INT - lsb);
2143 else
2145 /* Find the width of the bitstring in IMM. */
2146 for (len = 5; len < HOST_BITS_PER_WIDE_INT; len++)
2148 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2149 break;
2152 /* Sign extend IMM as a 5-bit value. */
2153 imm = (imm & 0xf) - 0x10;
2156 op[0] = imm;
2157 op[1] = 63 - lsb;
2158 op[2] = len;
2161 /* Output assembler code to perform a doubleword move insn
2162 with operands OPERANDS. */
2164 const char *
2165 output_move_double (rtx *operands)
2167 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2168 rtx latehalf[2];
2169 rtx addreg0 = 0, addreg1 = 0;
2171 /* First classify both operands. */
2173 if (REG_P (operands[0]))
2174 optype0 = REGOP;
2175 else if (offsettable_memref_p (operands[0]))
2176 optype0 = OFFSOP;
2177 else if (GET_CODE (operands[0]) == MEM)
2178 optype0 = MEMOP;
2179 else
2180 optype0 = RNDOP;
2182 if (REG_P (operands[1]))
2183 optype1 = REGOP;
2184 else if (CONSTANT_P (operands[1]))
2185 optype1 = CNSTOP;
2186 else if (offsettable_memref_p (operands[1]))
2187 optype1 = OFFSOP;
2188 else if (GET_CODE (operands[1]) == MEM)
2189 optype1 = MEMOP;
2190 else
2191 optype1 = RNDOP;
2193 /* Check for the cases that the operand constraints are not
2194 supposed to allow to happen. */
2195 gcc_assert (optype0 == REGOP || optype1 == REGOP);
2197 /* Handle auto decrementing and incrementing loads and stores
2198 specifically, since the structure of the function doesn't work
2199 for them without major modification. Do it better when we learn
2200 this port about the general inc/dec addressing of PA.
2201 (This was written by tege. Chide him if it doesn't work.) */
2203 if (optype0 == MEMOP)
2205 /* We have to output the address syntax ourselves, since print_operand
2206 doesn't deal with the addresses we want to use. Fix this later. */
2208 rtx addr = XEXP (operands[0], 0);
2209 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2211 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2213 operands[0] = XEXP (addr, 0);
2214 gcc_assert (GET_CODE (operands[1]) == REG
2215 && GET_CODE (operands[0]) == REG);
2217 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2219 /* No overlap between high target register and address
2220 register. (We do this in a non-obvious way to
2221 save a register file writeback) */
2222 if (GET_CODE (addr) == POST_INC)
2223 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2224 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2226 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2228 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2230 operands[0] = XEXP (addr, 0);
2231 gcc_assert (GET_CODE (operands[1]) == REG
2232 && GET_CODE (operands[0]) == REG);
2234 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2235 /* No overlap between high target register and address
2236 register. (We do this in a non-obvious way to save a
2237 register file writeback) */
2238 if (GET_CODE (addr) == PRE_INC)
2239 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2240 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2243 if (optype1 == MEMOP)
2245 /* We have to output the address syntax ourselves, since print_operand
2246 doesn't deal with the addresses we want to use. Fix this later. */
2248 rtx addr = XEXP (operands[1], 0);
2249 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2251 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2253 operands[1] = XEXP (addr, 0);
2254 gcc_assert (GET_CODE (operands[0]) == REG
2255 && GET_CODE (operands[1]) == REG);
2257 if (!reg_overlap_mentioned_p (high_reg, addr))
2259 /* No overlap between high target register and address
2260 register. (We do this in a non-obvious way to
2261 save a register file writeback) */
2262 if (GET_CODE (addr) == POST_INC)
2263 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2264 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2266 else
2268 /* This is an undefined situation. We should load into the
2269 address register *and* update that register. Probably
2270 we don't need to handle this at all. */
2271 if (GET_CODE (addr) == POST_INC)
2272 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2273 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2276 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2278 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2280 operands[1] = XEXP (addr, 0);
2281 gcc_assert (GET_CODE (operands[0]) == REG
2282 && GET_CODE (operands[1]) == REG);
2284 if (!reg_overlap_mentioned_p (high_reg, addr))
2286 /* No overlap between high target register and address
2287 register. (We do this in a non-obvious way to
2288 save a register file writeback) */
2289 if (GET_CODE (addr) == PRE_INC)
2290 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2291 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2293 else
2295 /* This is an undefined situation. We should load into the
2296 address register *and* update that register. Probably
2297 we don't need to handle this at all. */
2298 if (GET_CODE (addr) == PRE_INC)
2299 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2300 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2303 else if (GET_CODE (addr) == PLUS
2304 && GET_CODE (XEXP (addr, 0)) == MULT)
2306 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2308 if (!reg_overlap_mentioned_p (high_reg, addr))
2310 rtx xoperands[3];
2312 xoperands[0] = high_reg;
2313 xoperands[1] = XEXP (addr, 1);
2314 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2315 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2316 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2317 xoperands);
2318 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2320 else
2322 rtx xoperands[3];
2324 xoperands[0] = high_reg;
2325 xoperands[1] = XEXP (addr, 1);
2326 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2327 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2328 output_asm_insn ("{sh%O3addl %2,%1,%R0|shladd,l %2,%O3,%1,%R0}",
2329 xoperands);
2330 return "ldw 0(%R0),%0\n\tldw 4(%R0),%R0";
2335 /* If an operand is an unoffsettable memory ref, find a register
2336 we can increment temporarily to make it refer to the second word. */
2338 if (optype0 == MEMOP)
2339 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2341 if (optype1 == MEMOP)
2342 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2344 /* Ok, we can do one word at a time.
2345 Normally we do the low-numbered word first.
2347 In either case, set up in LATEHALF the operands to use
2348 for the high-numbered word and in some cases alter the
2349 operands in OPERANDS to be suitable for the low-numbered word. */
2351 if (optype0 == REGOP)
2352 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2353 else if (optype0 == OFFSOP)
2354 latehalf[0] = adjust_address (operands[0], SImode, 4);
2355 else
2356 latehalf[0] = operands[0];
2358 if (optype1 == REGOP)
2359 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2360 else if (optype1 == OFFSOP)
2361 latehalf[1] = adjust_address (operands[1], SImode, 4);
2362 else if (optype1 == CNSTOP)
2363 split_double (operands[1], &operands[1], &latehalf[1]);
2364 else
2365 latehalf[1] = operands[1];
2367 /* If the first move would clobber the source of the second one,
2368 do them in the other order.
2370 This can happen in two cases:
2372 mem -> register where the first half of the destination register
2373 is the same register used in the memory's address. Reload
2374 can create such insns.
2376 mem in this case will be either register indirect or register
2377 indirect plus a valid offset.
2379 register -> register move where REGNO(dst) == REGNO(src + 1)
2380 someone (Tim/Tege?) claimed this can happen for parameter loads.
2382 Handle mem -> register case first. */
2383 if (optype0 == REGOP
2384 && (optype1 == MEMOP || optype1 == OFFSOP)
2385 && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
2386 operands[1], 0))
2388 /* Do the late half first. */
2389 if (addreg1)
2390 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2391 output_asm_insn (singlemove_string (latehalf), latehalf);
2393 /* Then clobber. */
2394 if (addreg1)
2395 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2396 return singlemove_string (operands);
2399 /* Now handle register -> register case. */
2400 if (optype0 == REGOP && optype1 == REGOP
2401 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2403 output_asm_insn (singlemove_string (latehalf), latehalf);
2404 return singlemove_string (operands);
2407 /* Normal case: do the two words, low-numbered first. */
2409 output_asm_insn (singlemove_string (operands), operands);
2411 /* Make any unoffsettable addresses point at high-numbered word. */
2412 if (addreg0)
2413 output_asm_insn ("ldo 4(%0),%0", &addreg0);
2414 if (addreg1)
2415 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2417 /* Do that word. */
2418 output_asm_insn (singlemove_string (latehalf), latehalf);
2420 /* Undo the adds we just did. */
2421 if (addreg0)
2422 output_asm_insn ("ldo -4(%0),%0", &addreg0);
2423 if (addreg1)
2424 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2426 return "";
2429 const char *
2430 output_fp_move_double (rtx *operands)
2432 if (FP_REG_P (operands[0]))
2434 if (FP_REG_P (operands[1])
2435 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2436 output_asm_insn ("fcpy,dbl %f1,%0", operands);
2437 else
2438 output_asm_insn ("fldd%F1 %1,%0", operands);
2440 else if (FP_REG_P (operands[1]))
2442 output_asm_insn ("fstd%F0 %1,%0", operands);
2444 else
2446 rtx xoperands[2];
2448 gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2450 /* This is a pain. You have to be prepared to deal with an
2451 arbitrary address here including pre/post increment/decrement.
2453 so avoid this in the MD. */
2454 gcc_assert (GET_CODE (operands[0]) == REG);
2456 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2457 xoperands[0] = operands[0];
2458 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2460 return "";
2463 /* Return a REG that occurs in ADDR with coefficient 1.
2464 ADDR can be effectively incremented by incrementing REG. */
2466 static rtx
2467 find_addr_reg (rtx addr)
2469 while (GET_CODE (addr) == PLUS)
2471 if (GET_CODE (XEXP (addr, 0)) == REG)
2472 addr = XEXP (addr, 0);
2473 else if (GET_CODE (XEXP (addr, 1)) == REG)
2474 addr = XEXP (addr, 1);
2475 else if (CONSTANT_P (XEXP (addr, 0)))
2476 addr = XEXP (addr, 1);
2477 else if (CONSTANT_P (XEXP (addr, 1)))
2478 addr = XEXP (addr, 0);
2479 else
2480 gcc_unreachable ();
2482 gcc_assert (GET_CODE (addr) == REG);
2483 return addr;
2486 /* Emit code to perform a block move.
2488 OPERANDS[0] is the destination pointer as a REG, clobbered.
2489 OPERANDS[1] is the source pointer as a REG, clobbered.
2490 OPERANDS[2] is a register for temporary storage.
2491 OPERANDS[3] is a register for temporary storage.
2492 OPERANDS[4] is the size as a CONST_INT
2493 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2494 OPERANDS[6] is another temporary register. */
2496 const char *
2497 output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2499 int align = INTVAL (operands[5]);
2500 unsigned long n_bytes = INTVAL (operands[4]);
2502 /* We can't move more than a word at a time because the PA
2503 has no longer integer move insns. (Could use fp mem ops?) */
2504 if (align > (TARGET_64BIT ? 8 : 4))
2505 align = (TARGET_64BIT ? 8 : 4);
2507 /* Note that we know each loop below will execute at least twice
2508 (else we would have open-coded the copy). */
2509 switch (align)
2511 case 8:
2512 /* Pre-adjust the loop counter. */
2513 operands[4] = GEN_INT (n_bytes - 16);
2514 output_asm_insn ("ldi %4,%2", operands);
2516 /* Copying loop. */
2517 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2518 output_asm_insn ("ldd,ma 8(%1),%6", operands);
2519 output_asm_insn ("std,ma %3,8(%0)", operands);
2520 output_asm_insn ("addib,>= -16,%2,.-12", operands);
2521 output_asm_insn ("std,ma %6,8(%0)", operands);
2523 /* Handle the residual. There could be up to 7 bytes of
2524 residual to copy! */
2525 if (n_bytes % 16 != 0)
2527 operands[4] = GEN_INT (n_bytes % 8);
2528 if (n_bytes % 16 >= 8)
2529 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2530 if (n_bytes % 8 != 0)
2531 output_asm_insn ("ldd 0(%1),%6", operands);
2532 if (n_bytes % 16 >= 8)
2533 output_asm_insn ("std,ma %3,8(%0)", operands);
2534 if (n_bytes % 8 != 0)
2535 output_asm_insn ("stdby,e %6,%4(%0)", operands);
2537 return "";
2539 case 4:
2540 /* Pre-adjust the loop counter. */
2541 operands[4] = GEN_INT (n_bytes - 8);
2542 output_asm_insn ("ldi %4,%2", operands);
2544 /* Copying loop. */
2545 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2546 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2547 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2548 output_asm_insn ("addib,>= -8,%2,.-12", operands);
2549 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2551 /* Handle the residual. There could be up to 7 bytes of
2552 residual to copy! */
2553 if (n_bytes % 8 != 0)
2555 operands[4] = GEN_INT (n_bytes % 4);
2556 if (n_bytes % 8 >= 4)
2557 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2558 if (n_bytes % 4 != 0)
2559 output_asm_insn ("ldw 0(%1),%6", operands);
2560 if (n_bytes % 8 >= 4)
2561 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2562 if (n_bytes % 4 != 0)
2563 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2565 return "";
2567 case 2:
2568 /* Pre-adjust the loop counter. */
2569 operands[4] = GEN_INT (n_bytes - 4);
2570 output_asm_insn ("ldi %4,%2", operands);
2572 /* Copying loop. */
2573 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2574 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2575 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2576 output_asm_insn ("addib,>= -4,%2,.-12", operands);
2577 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2579 /* Handle the residual. */
2580 if (n_bytes % 4 != 0)
2582 if (n_bytes % 4 >= 2)
2583 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2584 if (n_bytes % 2 != 0)
2585 output_asm_insn ("ldb 0(%1),%6", operands);
2586 if (n_bytes % 4 >= 2)
2587 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2588 if (n_bytes % 2 != 0)
2589 output_asm_insn ("stb %6,0(%0)", operands);
2591 return "";
2593 case 1:
2594 /* Pre-adjust the loop counter. */
2595 operands[4] = GEN_INT (n_bytes - 2);
2596 output_asm_insn ("ldi %4,%2", operands);
2598 /* Copying loop. */
2599 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2600 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2601 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2602 output_asm_insn ("addib,>= -2,%2,.-12", operands);
2603 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2605 /* Handle the residual. */
2606 if (n_bytes % 2 != 0)
2608 output_asm_insn ("ldb 0(%1),%3", operands);
2609 output_asm_insn ("stb %3,0(%0)", operands);
2611 return "";
2613 default:
2614 gcc_unreachable ();
2618 /* Count the number of insns necessary to handle this block move.
2620 Basic structure is the same as emit_block_move, except that we
2621 count insns rather than emit them. */
2623 static int
2624 compute_movmem_length (rtx insn)
2626 rtx pat = PATTERN (insn);
2627 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2628 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2629 unsigned int n_insns = 0;
2631 /* We can't move more than four bytes at a time because the PA
2632 has no longer integer move insns. (Could use fp mem ops?) */
2633 if (align > (TARGET_64BIT ? 8 : 4))
2634 align = (TARGET_64BIT ? 8 : 4);
2636 /* The basic copying loop. */
2637 n_insns = 6;
2639 /* Residuals. */
2640 if (n_bytes % (2 * align) != 0)
2642 if ((n_bytes % (2 * align)) >= align)
2643 n_insns += 2;
2645 if ((n_bytes % align) != 0)
2646 n_insns += 2;
2649 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2650 return n_insns * 4;
2653 /* Emit code to perform a block clear.
2655 OPERANDS[0] is the destination pointer as a REG, clobbered.
2656 OPERANDS[1] is a register for temporary storage.
2657 OPERANDS[2] is the size as a CONST_INT
2658 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
2660 const char *
2661 output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2663 int align = INTVAL (operands[3]);
2664 unsigned long n_bytes = INTVAL (operands[2]);
2666 /* We can't clear more than a word at a time because the PA
2667 has no longer integer move insns. */
2668 if (align > (TARGET_64BIT ? 8 : 4))
2669 align = (TARGET_64BIT ? 8 : 4);
2671 /* Note that we know each loop below will execute at least twice
2672 (else we would have open-coded the copy). */
2673 switch (align)
2675 case 8:
2676 /* Pre-adjust the loop counter. */
2677 operands[2] = GEN_INT (n_bytes - 16);
2678 output_asm_insn ("ldi %2,%1", operands);
2680 /* Loop. */
2681 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2682 output_asm_insn ("addib,>= -16,%1,.-4", operands);
2683 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2685 /* Handle the residual. There could be up to 7 bytes of
2686 residual to copy! */
2687 if (n_bytes % 16 != 0)
2689 operands[2] = GEN_INT (n_bytes % 8);
2690 if (n_bytes % 16 >= 8)
2691 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2692 if (n_bytes % 8 != 0)
2693 output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
2695 return "";
2697 case 4:
2698 /* Pre-adjust the loop counter. */
2699 operands[2] = GEN_INT (n_bytes - 8);
2700 output_asm_insn ("ldi %2,%1", operands);
2702 /* Loop. */
2703 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2704 output_asm_insn ("addib,>= -8,%1,.-4", operands);
2705 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2707 /* Handle the residual. There could be up to 7 bytes of
2708 residual to copy! */
2709 if (n_bytes % 8 != 0)
2711 operands[2] = GEN_INT (n_bytes % 4);
2712 if (n_bytes % 8 >= 4)
2713 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2714 if (n_bytes % 4 != 0)
2715 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
2717 return "";
2719 case 2:
2720 /* Pre-adjust the loop counter. */
2721 operands[2] = GEN_INT (n_bytes - 4);
2722 output_asm_insn ("ldi %2,%1", operands);
2724 /* Loop. */
2725 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2726 output_asm_insn ("addib,>= -4,%1,.-4", operands);
2727 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2729 /* Handle the residual. */
2730 if (n_bytes % 4 != 0)
2732 if (n_bytes % 4 >= 2)
2733 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2734 if (n_bytes % 2 != 0)
2735 output_asm_insn ("stb %%r0,0(%0)", operands);
2737 return "";
2739 case 1:
2740 /* Pre-adjust the loop counter. */
2741 operands[2] = GEN_INT (n_bytes - 2);
2742 output_asm_insn ("ldi %2,%1", operands);
2744 /* Loop. */
2745 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2746 output_asm_insn ("addib,>= -2,%1,.-4", operands);
2747 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2749 /* Handle the residual. */
2750 if (n_bytes % 2 != 0)
2751 output_asm_insn ("stb %%r0,0(%0)", operands);
2753 return "";
2755 default:
2756 gcc_unreachable ();
2760 /* Count the number of insns necessary to handle this block move.
2762 Basic structure is the same as emit_block_move, except that we
2763 count insns rather than emit them. */
2765 static int
2766 compute_clrmem_length (rtx insn)
2768 rtx pat = PATTERN (insn);
2769 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
2770 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
2771 unsigned int n_insns = 0;
2773 /* We can't clear more than a word at a time because the PA
2774 has no longer integer move insns. */
2775 if (align > (TARGET_64BIT ? 8 : 4))
2776 align = (TARGET_64BIT ? 8 : 4);
2778 /* The basic loop. */
2779 n_insns = 4;
2781 /* Residuals. */
2782 if (n_bytes % (2 * align) != 0)
2784 if ((n_bytes % (2 * align)) >= align)
2785 n_insns++;
2787 if ((n_bytes % align) != 0)
2788 n_insns++;
2791 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2792 return n_insns * 4;
2796 const char *
2797 output_and (rtx *operands)
2799 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2801 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2802 int ls0, ls1, ms0, p, len;
2804 for (ls0 = 0; ls0 < 32; ls0++)
2805 if ((mask & (1 << ls0)) == 0)
2806 break;
2808 for (ls1 = ls0; ls1 < 32; ls1++)
2809 if ((mask & (1 << ls1)) != 0)
2810 break;
2812 for (ms0 = ls1; ms0 < 32; ms0++)
2813 if ((mask & (1 << ms0)) == 0)
2814 break;
2816 gcc_assert (ms0 == 32);
2818 if (ls1 == 32)
2820 len = ls0;
2822 gcc_assert (len);
2824 operands[2] = GEN_INT (len);
2825 return "{extru|extrw,u} %1,31,%2,%0";
2827 else
2829 /* We could use this `depi' for the case above as well, but `depi'
2830 requires one more register file access than an `extru'. */
2832 p = 31 - ls0;
2833 len = ls1 - ls0;
2835 operands[2] = GEN_INT (p);
2836 operands[3] = GEN_INT (len);
2837 return "{depi|depwi} 0,%2,%3,%0";
2840 else
2841 return "and %1,%2,%0";
2844 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
2845 storing the result in operands[0]. */
2846 const char *
2847 output_64bit_and (rtx *operands)
2849 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2851 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2852 int ls0, ls1, ms0, p, len;
2854 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
2855 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
2856 break;
2858 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
2859 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
2860 break;
2862 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
2863 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
2864 break;
2866 gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
2868 if (ls1 == HOST_BITS_PER_WIDE_INT)
2870 len = ls0;
2872 gcc_assert (len);
2874 operands[2] = GEN_INT (len);
2875 return "extrd,u %1,63,%2,%0";
2877 else
2879 /* We could use this `depi' for the case above as well, but `depi'
2880 requires one more register file access than an `extru'. */
2882 p = 63 - ls0;
2883 len = ls1 - ls0;
2885 operands[2] = GEN_INT (p);
2886 operands[3] = GEN_INT (len);
2887 return "depdi 0,%2,%3,%0";
2890 else
2891 return "and %1,%2,%0";
2894 const char *
2895 output_ior (rtx *operands)
2897 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2898 int bs0, bs1, p, len;
2900 if (INTVAL (operands[2]) == 0)
2901 return "copy %1,%0";
2903 for (bs0 = 0; bs0 < 32; bs0++)
2904 if ((mask & (1 << bs0)) != 0)
2905 break;
2907 for (bs1 = bs0; bs1 < 32; bs1++)
2908 if ((mask & (1 << bs1)) == 0)
2909 break;
2911 gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
2913 p = 31 - bs0;
2914 len = bs1 - bs0;
2916 operands[2] = GEN_INT (p);
2917 operands[3] = GEN_INT (len);
2918 return "{depi|depwi} -1,%2,%3,%0";
2921 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
2922 storing the result in operands[0]. */
2923 const char *
2924 output_64bit_ior (rtx *operands)
2926 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2927 int bs0, bs1, p, len;
2929 if (INTVAL (operands[2]) == 0)
2930 return "copy %1,%0";
2932 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
2933 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
2934 break;
2936 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
2937 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
2938 break;
2940 gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
2941 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
2943 p = 63 - bs0;
2944 len = bs1 - bs0;
2946 operands[2] = GEN_INT (p);
2947 operands[3] = GEN_INT (len);
2948 return "depdi -1,%2,%3,%0";
2951 /* Target hook for assembling integer objects. This code handles
2952 aligned SI and DI integers specially since function references
2953 must be preceded by P%. */
2955 static bool
2956 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
2958 if (size == UNITS_PER_WORD
2959 && aligned_p
2960 && function_label_operand (x, VOIDmode))
2962 fputs (size == 8? "\t.dword\tP%" : "\t.word\tP%", asm_out_file);
2963 output_addr_const (asm_out_file, x);
2964 fputc ('\n', asm_out_file);
2965 return true;
2967 return default_assemble_integer (x, size, aligned_p);
2970 /* Output an ascii string. */
2971 void
2972 output_ascii (FILE *file, const char *p, int size)
2974 int i;
2975 int chars_output;
2976 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
2978 /* The HP assembler can only take strings of 256 characters at one
2979 time. This is a limitation on input line length, *not* the
2980 length of the string. Sigh. Even worse, it seems that the
2981 restriction is in number of input characters (see \xnn &
2982 \whatever). So we have to do this very carefully. */
2984 fputs ("\t.STRING \"", file);
2986 chars_output = 0;
2987 for (i = 0; i < size; i += 4)
2989 int co = 0;
2990 int io = 0;
2991 for (io = 0, co = 0; io < MIN (4, size - i); io++)
2993 register unsigned int c = (unsigned char) p[i + io];
2995 if (c == '\"' || c == '\\')
2996 partial_output[co++] = '\\';
2997 if (c >= ' ' && c < 0177)
2998 partial_output[co++] = c;
2999 else
3001 unsigned int hexd;
3002 partial_output[co++] = '\\';
3003 partial_output[co++] = 'x';
3004 hexd = c / 16 - 0 + '0';
3005 if (hexd > '9')
3006 hexd -= '9' - 'a' + 1;
3007 partial_output[co++] = hexd;
3008 hexd = c % 16 - 0 + '0';
3009 if (hexd > '9')
3010 hexd -= '9' - 'a' + 1;
3011 partial_output[co++] = hexd;
3014 if (chars_output + co > 243)
3016 fputs ("\"\n\t.STRING \"", file);
3017 chars_output = 0;
3019 fwrite (partial_output, 1, (size_t) co, file);
3020 chars_output += co;
3021 co = 0;
3023 fputs ("\"\n", file);
3026 /* Try to rewrite floating point comparisons & branches to avoid
3027 useless add,tr insns.
3029 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3030 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3031 first attempt to remove useless add,tr insns. It is zero
3032 for the second pass as reorg sometimes leaves bogus REG_DEAD
3033 notes lying around.
3035 When CHECK_NOTES is zero we can only eliminate add,tr insns
3036 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3037 instructions. */
3038 static void
3039 remove_useless_addtr_insns (int check_notes)
3041 rtx insn;
3042 static int pass = 0;
3044 /* This is fairly cheap, so always run it when optimizing. */
3045 if (optimize > 0)
3047 int fcmp_count = 0;
3048 int fbranch_count = 0;
3050 /* Walk all the insns in this function looking for fcmp & fbranch
3051 instructions. Keep track of how many of each we find. */
3052 for (insn = get_insns (); insn; insn = next_insn (insn))
3054 rtx tmp;
3056 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
3057 if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
3058 continue;
3060 tmp = PATTERN (insn);
3062 /* It must be a set. */
3063 if (GET_CODE (tmp) != SET)
3064 continue;
3066 /* If the destination is CCFP, then we've found an fcmp insn. */
3067 tmp = SET_DEST (tmp);
3068 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3070 fcmp_count++;
3071 continue;
3074 tmp = PATTERN (insn);
3075 /* If this is an fbranch instruction, bump the fbranch counter. */
3076 if (GET_CODE (tmp) == SET
3077 && SET_DEST (tmp) == pc_rtx
3078 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3079 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3080 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3081 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3083 fbranch_count++;
3084 continue;
3089 /* Find all floating point compare + branch insns. If possible,
3090 reverse the comparison & the branch to avoid add,tr insns. */
3091 for (insn = get_insns (); insn; insn = next_insn (insn))
3093 rtx tmp, next;
3095 /* Ignore anything that isn't an INSN. */
3096 if (GET_CODE (insn) != INSN)
3097 continue;
3099 tmp = PATTERN (insn);
3101 /* It must be a set. */
3102 if (GET_CODE (tmp) != SET)
3103 continue;
3105 /* The destination must be CCFP, which is register zero. */
3106 tmp = SET_DEST (tmp);
3107 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3108 continue;
3110 /* INSN should be a set of CCFP.
3112 See if the result of this insn is used in a reversed FP
3113 conditional branch. If so, reverse our condition and
3114 the branch. Doing so avoids useless add,tr insns. */
3115 next = next_insn (insn);
3116 while (next)
3118 /* Jumps, calls and labels stop our search. */
3119 if (GET_CODE (next) == JUMP_INSN
3120 || GET_CODE (next) == CALL_INSN
3121 || GET_CODE (next) == CODE_LABEL)
3122 break;
3124 /* As does another fcmp insn. */
3125 if (GET_CODE (next) == INSN
3126 && GET_CODE (PATTERN (next)) == SET
3127 && GET_CODE (SET_DEST (PATTERN (next))) == REG
3128 && REGNO (SET_DEST (PATTERN (next))) == 0)
3129 break;
3131 next = next_insn (next);
3134 /* Is NEXT_INSN a branch? */
3135 if (next
3136 && GET_CODE (next) == JUMP_INSN)
3138 rtx pattern = PATTERN (next);
3140 /* If it a reversed fp conditional branch (e.g. uses add,tr)
3141 and CCFP dies, then reverse our conditional and the branch
3142 to avoid the add,tr. */
3143 if (GET_CODE (pattern) == SET
3144 && SET_DEST (pattern) == pc_rtx
3145 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3146 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3147 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3148 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3149 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3150 && (fcmp_count == fbranch_count
3151 || (check_notes
3152 && find_regno_note (next, REG_DEAD, 0))))
3154 /* Reverse the branch. */
3155 tmp = XEXP (SET_SRC (pattern), 1);
3156 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3157 XEXP (SET_SRC (pattern), 2) = tmp;
3158 INSN_CODE (next) = -1;
3160 /* Reverse our condition. */
3161 tmp = PATTERN (insn);
3162 PUT_CODE (XEXP (tmp, 1),
3163 (reverse_condition_maybe_unordered
3164 (GET_CODE (XEXP (tmp, 1)))));
3170 pass = !pass;
3174 /* You may have trouble believing this, but this is the 32 bit HP-PA
3175 stack layout. Wow.
3177 Offset Contents
3179 Variable arguments (optional; any number may be allocated)
3181 SP-(4*(N+9)) arg word N
3183 SP-56 arg word 5
3184 SP-52 arg word 4
3186 Fixed arguments (must be allocated; may remain unused)
3188 SP-48 arg word 3
3189 SP-44 arg word 2
3190 SP-40 arg word 1
3191 SP-36 arg word 0
3193 Frame Marker
3195 SP-32 External Data Pointer (DP)
3196 SP-28 External sr4
3197 SP-24 External/stub RP (RP')
3198 SP-20 Current RP
3199 SP-16 Static Link
3200 SP-12 Clean up
3201 SP-8 Calling Stub RP (RP'')
3202 SP-4 Previous SP
3204 Top of Frame
3206 SP-0 Stack Pointer (points to next available address)
3210 /* This function saves registers as follows. Registers marked with ' are
3211 this function's registers (as opposed to the previous function's).
3212 If a frame_pointer isn't needed, r4 is saved as a general register;
3213 the space for the frame pointer is still allocated, though, to keep
3214 things simple.
3217 Top of Frame
3219 SP (FP') Previous FP
3220 SP + 4 Alignment filler (sigh)
3221 SP + 8 Space for locals reserved here.
3225 SP + n All call saved register used.
3229 SP + o All call saved fp registers used.
3233 SP + p (SP') points to next available address.
3237 /* Global variables set by output_function_prologue(). */
3238 /* Size of frame. Need to know this to emit return insns from
3239 leaf procedures. */
3240 static HOST_WIDE_INT actual_fsize, local_fsize;
3241 static int save_fregs;
3243 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3244 Handle case where DISP > 8k by using the add_high_const patterns.
3246 Note in DISP > 8k case, we will leave the high part of the address
3247 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3249 static void
3250 store_reg (int reg, HOST_WIDE_INT disp, int base)
3252 rtx insn, dest, src, basereg;
3254 src = gen_rtx_REG (word_mode, reg);
3255 basereg = gen_rtx_REG (Pmode, base);
3256 if (VAL_14_BITS_P (disp))
3258 dest = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3259 insn = emit_move_insn (dest, src);
3261 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3263 rtx delta = GEN_INT (disp);
3264 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3266 emit_move_insn (tmpreg, delta);
3267 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3268 dest = gen_rtx_MEM (word_mode, tmpreg);
3269 insn = emit_move_insn (dest, src);
3270 if (DO_FRAME_NOTES)
3272 REG_NOTES (insn)
3273 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3274 gen_rtx_SET (VOIDmode,
3275 gen_rtx_MEM (word_mode,
3276 gen_rtx_PLUS (word_mode, basereg,
3277 delta)),
3278 src),
3279 REG_NOTES (insn));
3282 else
3284 rtx delta = GEN_INT (disp);
3285 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3286 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3288 emit_move_insn (tmpreg, high);
3289 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3290 insn = emit_move_insn (dest, src);
3291 if (DO_FRAME_NOTES)
3293 REG_NOTES (insn)
3294 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3295 gen_rtx_SET (VOIDmode,
3296 gen_rtx_MEM (word_mode,
3297 gen_rtx_PLUS (word_mode, basereg,
3298 delta)),
3299 src),
3300 REG_NOTES (insn));
3304 if (DO_FRAME_NOTES)
3305 RTX_FRAME_RELATED_P (insn) = 1;
3308 /* Emit RTL to store REG at the memory location specified by BASE and then
3309 add MOD to BASE. MOD must be <= 8k. */
3311 static void
3312 store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3314 rtx insn, basereg, srcreg, delta;
3316 gcc_assert (VAL_14_BITS_P (mod));
3318 basereg = gen_rtx_REG (Pmode, base);
3319 srcreg = gen_rtx_REG (word_mode, reg);
3320 delta = GEN_INT (mod);
3322 insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3323 if (DO_FRAME_NOTES)
3325 RTX_FRAME_RELATED_P (insn) = 1;
3327 /* RTX_FRAME_RELATED_P must be set on each frame related set
3328 in a parallel with more than one element. Don't set
3329 RTX_FRAME_RELATED_P in the first set if reg is temporary
3330 register 1. The effect of this operation is recorded in
3331 the initial copy. */
3332 if (reg != 1)
3334 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3335 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3337 else
3339 /* The first element of a PARALLEL is always processed if it is
3340 a SET. Thus, we need an expression list for this case. */
3341 REG_NOTES (insn)
3342 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3343 gen_rtx_SET (VOIDmode, basereg,
3344 gen_rtx_PLUS (word_mode, basereg, delta)),
3345 REG_NOTES (insn));
3350 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3351 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3352 whether to add a frame note or not.
3354 In the DISP > 8k case, we leave the high part of the address in %r1.
3355 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3357 static void
3358 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3360 rtx insn;
3362 if (VAL_14_BITS_P (disp))
3364 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3365 plus_constant (gen_rtx_REG (Pmode, base), disp));
3367 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3369 rtx basereg = gen_rtx_REG (Pmode, base);
3370 rtx delta = GEN_INT (disp);
3371 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3373 emit_move_insn (tmpreg, delta);
3374 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3375 gen_rtx_PLUS (Pmode, tmpreg, basereg));
3377 else
3379 rtx basereg = gen_rtx_REG (Pmode, base);
3380 rtx delta = GEN_INT (disp);
3381 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3383 emit_move_insn (tmpreg,
3384 gen_rtx_PLUS (Pmode, basereg,
3385 gen_rtx_HIGH (Pmode, delta)));
3386 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3387 gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3390 if (DO_FRAME_NOTES && note)
3391 RTX_FRAME_RELATED_P (insn) = 1;
3394 HOST_WIDE_INT
3395 compute_frame_size (HOST_WIDE_INT size, int *fregs_live)
3397 int freg_saved = 0;
3398 int i, j;
3400 /* The code in hppa_expand_prologue and hppa_expand_epilogue must
3401 be consistent with the rounding and size calculation done here.
3402 Change them at the same time. */
3404 /* We do our own stack alignment. First, round the size of the
3405 stack locals up to a word boundary. */
3406 size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3408 /* Space for previous frame pointer + filler. If any frame is
3409 allocated, we need to add in the STARTING_FRAME_OFFSET. We
3410 waste some space here for the sake of HP compatibility. The
3411 first slot is only used when the frame pointer is needed. */
3412 if (size || frame_pointer_needed)
3413 size += STARTING_FRAME_OFFSET;
3415 /* If the current function calls __builtin_eh_return, then we need
3416 to allocate stack space for registers that will hold data for
3417 the exception handler. */
3418 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3420 unsigned int i;
3422 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3423 continue;
3424 size += i * UNITS_PER_WORD;
3427 /* Account for space used by the callee general register saves. */
3428 for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3429 if (regs_ever_live[i])
3430 size += UNITS_PER_WORD;
3432 /* Account for space used by the callee floating point register saves. */
3433 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3434 if (regs_ever_live[i]
3435 || (!TARGET_64BIT && regs_ever_live[i + 1]))
3437 freg_saved = 1;
3439 /* We always save both halves of the FP register, so always
3440 increment the frame size by 8 bytes. */
3441 size += 8;
3444 /* If any of the floating registers are saved, account for the
3445 alignment needed for the floating point register save block. */
3446 if (freg_saved)
3448 size = (size + 7) & ~7;
3449 if (fregs_live)
3450 *fregs_live = 1;
3453 /* The various ABIs include space for the outgoing parameters in the
3454 size of the current function's stack frame. We don't need to align
3455 for the outgoing arguments as their alignment is set by the final
3456 rounding for the frame as a whole. */
3457 size += current_function_outgoing_args_size;
3459 /* Allocate space for the fixed frame marker. This space must be
3460 allocated for any function that makes calls or allocates
3461 stack space. */
3462 if (!current_function_is_leaf || size)
3463 size += TARGET_64BIT ? 48 : 32;
3465 /* Finally, round to the preferred stack boundary. */
3466 return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3467 & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3470 /* Generate the assembly code for function entry. FILE is a stdio
3471 stream to output the code to. SIZE is an int: how many units of
3472 temporary storage to allocate.
3474 Refer to the array `regs_ever_live' to determine which registers to
3475 save; `regs_ever_live[I]' is nonzero if register number I is ever
3476 used in the function. This function is responsible for knowing
3477 which registers should not be saved even if used. */
3479 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3480 of memory. If any fpu reg is used in the function, we allocate
3481 such a block here, at the bottom of the frame, just in case it's needed.
3483 If this function is a leaf procedure, then we may choose not
3484 to do a "save" insn. The decision about whether or not
3485 to do this is made in regclass.c. */
3487 static void
3488 pa_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3490 /* The function's label and associated .PROC must never be
3491 separated and must be output *after* any profiling declarations
3492 to avoid changing spaces/subspaces within a procedure. */
3493 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3494 fputs ("\t.PROC\n", file);
3496 /* hppa_expand_prologue does the dirty work now. We just need
3497 to output the assembler directives which denote the start
3498 of a function. */
3499 fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
3500 if (regs_ever_live[2])
3501 fputs (",CALLS,SAVE_RP", file);
3502 else
3503 fputs (",NO_CALLS", file);
3505 /* The SAVE_SP flag is used to indicate that register %r3 is stored
3506 at the beginning of the frame and that it is used as the frame
3507 pointer for the frame. We do this because our current frame
3508 layout doesn't conform to that specified in the HP runtime
3509 documentation and we need a way to indicate to programs such as
3510 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
3511 isn't used by HP compilers but is supported by the assembler.
3512 However, SAVE_SP is supposed to indicate that the previous stack
3513 pointer has been saved in the frame marker. */
3514 if (frame_pointer_needed)
3515 fputs (",SAVE_SP", file);
3517 /* Pass on information about the number of callee register saves
3518 performed in the prologue.
3520 The compiler is supposed to pass the highest register number
3521 saved, the assembler then has to adjust that number before
3522 entering it into the unwind descriptor (to account for any
3523 caller saved registers with lower register numbers than the
3524 first callee saved register). */
3525 if (gr_saved)
3526 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3528 if (fr_saved)
3529 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3531 fputs ("\n\t.ENTRY\n", file);
3533 remove_useless_addtr_insns (0);
3536 void
3537 hppa_expand_prologue (void)
3539 int merge_sp_adjust_with_store = 0;
3540 HOST_WIDE_INT size = get_frame_size ();
3541 HOST_WIDE_INT offset;
3542 int i;
3543 rtx insn, tmpreg;
3545 gr_saved = 0;
3546 fr_saved = 0;
3547 save_fregs = 0;
3549 /* Compute total size for frame pointer, filler, locals and rounding to
3550 the next word boundary. Similar code appears in compute_frame_size
3551 and must be changed in tandem with this code. */
3552 local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3553 if (local_fsize || frame_pointer_needed)
3554 local_fsize += STARTING_FRAME_OFFSET;
3556 actual_fsize = compute_frame_size (size, &save_fregs);
3558 /* Compute a few things we will use often. */
3559 tmpreg = gen_rtx_REG (word_mode, 1);
3561 /* Save RP first. The calling conventions manual states RP will
3562 always be stored into the caller's frame at sp - 20 or sp - 16
3563 depending on which ABI is in use. */
3564 if (regs_ever_live[2] || current_function_calls_eh_return)
3565 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3567 /* Allocate the local frame and set up the frame pointer if needed. */
3568 if (actual_fsize != 0)
3570 if (frame_pointer_needed)
3572 /* Copy the old frame pointer temporarily into %r1. Set up the
3573 new stack pointer, then store away the saved old frame pointer
3574 into the stack at sp and at the same time update the stack
3575 pointer by actual_fsize bytes. Two versions, first
3576 handles small (<8k) frames. The second handles large (>=8k)
3577 frames. */
3578 insn = emit_move_insn (tmpreg, frame_pointer_rtx);
3579 if (DO_FRAME_NOTES)
3581 /* We need to record the frame pointer save here since the
3582 new frame pointer is set in the following insn. */
3583 RTX_FRAME_RELATED_P (insn) = 1;
3584 REG_NOTES (insn)
3585 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3586 gen_rtx_SET (VOIDmode,
3587 gen_rtx_MEM (word_mode, stack_pointer_rtx),
3588 frame_pointer_rtx),
3589 REG_NOTES (insn));
3592 insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
3593 if (DO_FRAME_NOTES)
3594 RTX_FRAME_RELATED_P (insn) = 1;
3596 if (VAL_14_BITS_P (actual_fsize))
3597 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
3598 else
3600 /* It is incorrect to store the saved frame pointer at *sp,
3601 then increment sp (writes beyond the current stack boundary).
3603 So instead use stwm to store at *sp and post-increment the
3604 stack pointer as an atomic operation. Then increment sp to
3605 finish allocating the new frame. */
3606 HOST_WIDE_INT adjust1 = 8192 - 64;
3607 HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
3609 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
3610 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3611 adjust2, 1);
3614 /* We set SAVE_SP in frames that need a frame pointer. Thus,
3615 we need to store the previous stack pointer (frame pointer)
3616 into the frame marker on targets that use the HP unwind
3617 library. This allows the HP unwind library to be used to
3618 unwind GCC frames. However, we are not fully compatible
3619 with the HP library because our frame layout differs from
3620 that specified in the HP runtime specification.
3622 We don't want a frame note on this instruction as the frame
3623 marker moves during dynamic stack allocation.
3625 This instruction also serves as a blockage to prevent
3626 register spills from being scheduled before the stack
3627 pointer is raised. This is necessary as we store
3628 registers using the frame pointer as a base register,
3629 and the frame pointer is set before sp is raised. */
3630 if (TARGET_HPUX_UNWIND_LIBRARY)
3632 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
3633 GEN_INT (TARGET_64BIT ? -8 : -4));
3635 emit_move_insn (gen_rtx_MEM (word_mode, addr),
3636 frame_pointer_rtx);
3638 else
3639 emit_insn (gen_blockage ());
3641 /* no frame pointer needed. */
3642 else
3644 /* In some cases we can perform the first callee register save
3645 and allocating the stack frame at the same time. If so, just
3646 make a note of it and defer allocating the frame until saving
3647 the callee registers. */
3648 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
3649 merge_sp_adjust_with_store = 1;
3650 /* Can not optimize. Adjust the stack frame by actual_fsize
3651 bytes. */
3652 else
3653 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3654 actual_fsize, 1);
3658 /* Normal register save.
3660 Do not save the frame pointer in the frame_pointer_needed case. It
3661 was done earlier. */
3662 if (frame_pointer_needed)
3664 offset = local_fsize;
3666 /* Saving the EH return data registers in the frame is the simplest
3667 way to get the frame unwind information emitted. We put them
3668 just before the general registers. */
3669 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3671 unsigned int i, regno;
3673 for (i = 0; ; ++i)
3675 regno = EH_RETURN_DATA_REGNO (i);
3676 if (regno == INVALID_REGNUM)
3677 break;
3679 store_reg (regno, offset, FRAME_POINTER_REGNUM);
3680 offset += UNITS_PER_WORD;
3684 for (i = 18; i >= 4; i--)
3685 if (regs_ever_live[i] && ! call_used_regs[i])
3687 store_reg (i, offset, FRAME_POINTER_REGNUM);
3688 offset += UNITS_PER_WORD;
3689 gr_saved++;
3691 /* Account for %r3 which is saved in a special place. */
3692 gr_saved++;
3694 /* No frame pointer needed. */
3695 else
3697 offset = local_fsize - actual_fsize;
3699 /* Saving the EH return data registers in the frame is the simplest
3700 way to get the frame unwind information emitted. */
3701 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3703 unsigned int i, regno;
3705 for (i = 0; ; ++i)
3707 regno = EH_RETURN_DATA_REGNO (i);
3708 if (regno == INVALID_REGNUM)
3709 break;
3711 /* If merge_sp_adjust_with_store is nonzero, then we can
3712 optimize the first save. */
3713 if (merge_sp_adjust_with_store)
3715 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
3716 merge_sp_adjust_with_store = 0;
3718 else
3719 store_reg (regno, offset, STACK_POINTER_REGNUM);
3720 offset += UNITS_PER_WORD;
3724 for (i = 18; i >= 3; i--)
3725 if (regs_ever_live[i] && ! call_used_regs[i])
3727 /* If merge_sp_adjust_with_store is nonzero, then we can
3728 optimize the first GR save. */
3729 if (merge_sp_adjust_with_store)
3731 store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
3732 merge_sp_adjust_with_store = 0;
3734 else
3735 store_reg (i, offset, STACK_POINTER_REGNUM);
3736 offset += UNITS_PER_WORD;
3737 gr_saved++;
3740 /* If we wanted to merge the SP adjustment with a GR save, but we never
3741 did any GR saves, then just emit the adjustment here. */
3742 if (merge_sp_adjust_with_store)
3743 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3744 actual_fsize, 1);
3747 /* The hppa calling conventions say that %r19, the pic offset
3748 register, is saved at sp - 32 (in this function's frame)
3749 when generating PIC code. FIXME: What is the correct thing
3750 to do for functions which make no calls and allocate no
3751 frame? Do we need to allocate a frame, or can we just omit
3752 the save? For now we'll just omit the save.
3754 We don't want a note on this insn as the frame marker can
3755 move if there is a dynamic stack allocation. */
3756 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
3758 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
3760 emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
3764 /* Align pointer properly (doubleword boundary). */
3765 offset = (offset + 7) & ~7;
3767 /* Floating point register store. */
3768 if (save_fregs)
3770 rtx base;
3772 /* First get the frame or stack pointer to the start of the FP register
3773 save area. */
3774 if (frame_pointer_needed)
3776 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
3777 base = frame_pointer_rtx;
3779 else
3781 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
3782 base = stack_pointer_rtx;
3785 /* Now actually save the FP registers. */
3786 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3788 if (regs_ever_live[i]
3789 || (! TARGET_64BIT && regs_ever_live[i + 1]))
3791 rtx addr, insn, reg;
3792 addr = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
3793 reg = gen_rtx_REG (DFmode, i);
3794 insn = emit_move_insn (addr, reg);
3795 if (DO_FRAME_NOTES)
3797 RTX_FRAME_RELATED_P (insn) = 1;
3798 if (TARGET_64BIT)
3800 rtx mem = gen_rtx_MEM (DFmode,
3801 plus_constant (base, offset));
3802 REG_NOTES (insn)
3803 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3804 gen_rtx_SET (VOIDmode, mem, reg),
3805 REG_NOTES (insn));
3807 else
3809 rtx meml = gen_rtx_MEM (SFmode,
3810 plus_constant (base, offset));
3811 rtx memr = gen_rtx_MEM (SFmode,
3812 plus_constant (base, offset + 4));
3813 rtx regl = gen_rtx_REG (SFmode, i);
3814 rtx regr = gen_rtx_REG (SFmode, i + 1);
3815 rtx setl = gen_rtx_SET (VOIDmode, meml, regl);
3816 rtx setr = gen_rtx_SET (VOIDmode, memr, regr);
3817 rtvec vec;
3819 RTX_FRAME_RELATED_P (setl) = 1;
3820 RTX_FRAME_RELATED_P (setr) = 1;
3821 vec = gen_rtvec (2, setl, setr);
3822 REG_NOTES (insn)
3823 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3824 gen_rtx_SEQUENCE (VOIDmode, vec),
3825 REG_NOTES (insn));
3828 offset += GET_MODE_SIZE (DFmode);
3829 fr_saved++;
3835 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
3836 Handle case where DISP > 8k by using the add_high_const patterns. */
3838 static void
3839 load_reg (int reg, HOST_WIDE_INT disp, int base)
3841 rtx dest = gen_rtx_REG (word_mode, reg);
3842 rtx basereg = gen_rtx_REG (Pmode, base);
3843 rtx src;
3845 if (VAL_14_BITS_P (disp))
3846 src = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3847 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3849 rtx delta = GEN_INT (disp);
3850 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3852 emit_move_insn (tmpreg, delta);
3853 if (TARGET_DISABLE_INDEXING)
3855 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3856 src = gen_rtx_MEM (word_mode, tmpreg);
3858 else
3859 src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3861 else
3863 rtx delta = GEN_INT (disp);
3864 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3865 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3867 emit_move_insn (tmpreg, high);
3868 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3871 emit_move_insn (dest, src);
3874 /* Update the total code bytes output to the text section. */
3876 static void
3877 update_total_code_bytes (int nbytes)
3879 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
3880 && !IN_NAMED_SECTION_P (cfun->decl))
3882 if (INSN_ADDRESSES_SET_P ())
3884 unsigned long old_total = total_code_bytes;
3886 total_code_bytes += nbytes;
3888 /* Be prepared to handle overflows. */
3889 if (old_total > total_code_bytes)
3890 total_code_bytes = -1;
3892 else
3893 total_code_bytes = -1;
3897 /* This function generates the assembly code for function exit.
3898 Args are as for output_function_prologue ().
3900 The function epilogue should not depend on the current stack
3901 pointer! It should use the frame pointer only. This is mandatory
3902 because of alloca; we also take advantage of it to omit stack
3903 adjustments before returning. */
3905 static void
3906 pa_output_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3908 rtx insn = get_last_insn ();
3910 last_address = 0;
3912 /* hppa_expand_epilogue does the dirty work now. We just need
3913 to output the assembler directives which denote the end
3914 of a function.
3916 To make debuggers happy, emit a nop if the epilogue was completely
3917 eliminated due to a volatile call as the last insn in the
3918 current function. That way the return address (in %r2) will
3919 always point to a valid instruction in the current function. */
3921 /* Get the last real insn. */
3922 if (GET_CODE (insn) == NOTE)
3923 insn = prev_real_insn (insn);
3925 /* If it is a sequence, then look inside. */
3926 if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
3927 insn = XVECEXP (PATTERN (insn), 0, 0);
3929 /* If insn is a CALL_INSN, then it must be a call to a volatile
3930 function (otherwise there would be epilogue insns). */
3931 if (insn && GET_CODE (insn) == CALL_INSN)
3933 fputs ("\tnop\n", file);
3934 last_address += 4;
3937 fputs ("\t.EXIT\n\t.PROCEND\n", file);
3939 if (TARGET_SOM && TARGET_GAS)
3941 /* We done with this subspace except possibly for some additional
3942 debug information. Forget that we are in this subspace to ensure
3943 that the next function is output in its own subspace. */
3944 forget_section ();
3947 if (INSN_ADDRESSES_SET_P ())
3949 insn = get_last_nonnote_insn ();
3950 last_address += INSN_ADDRESSES (INSN_UID (insn));
3951 if (INSN_P (insn))
3952 last_address += insn_default_length (insn);
3953 last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
3954 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
3957 /* Finally, update the total number of code bytes output so far. */
3958 update_total_code_bytes (last_address);
3961 void
3962 hppa_expand_epilogue (void)
3964 rtx tmpreg;
3965 HOST_WIDE_INT offset;
3966 HOST_WIDE_INT ret_off = 0;
3967 int i;
3968 int merge_sp_adjust_with_load = 0;
3970 /* We will use this often. */
3971 tmpreg = gen_rtx_REG (word_mode, 1);
3973 /* Try to restore RP early to avoid load/use interlocks when
3974 RP gets used in the return (bv) instruction. This appears to still
3975 be necessary even when we schedule the prologue and epilogue. */
3976 if (regs_ever_live [2] || current_function_calls_eh_return)
3978 ret_off = TARGET_64BIT ? -16 : -20;
3979 if (frame_pointer_needed)
3981 load_reg (2, ret_off, FRAME_POINTER_REGNUM);
3982 ret_off = 0;
3984 else
3986 /* No frame pointer, and stack is smaller than 8k. */
3987 if (VAL_14_BITS_P (ret_off - actual_fsize))
3989 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
3990 ret_off = 0;
3995 /* General register restores. */
3996 if (frame_pointer_needed)
3998 offset = local_fsize;
4000 /* If the current function calls __builtin_eh_return, then we need
4001 to restore the saved EH data registers. */
4002 if (DO_FRAME_NOTES && current_function_calls_eh_return)
4004 unsigned int i, regno;
4006 for (i = 0; ; ++i)
4008 regno = EH_RETURN_DATA_REGNO (i);
4009 if (regno == INVALID_REGNUM)
4010 break;
4012 load_reg (regno, offset, FRAME_POINTER_REGNUM);
4013 offset += UNITS_PER_WORD;
4017 for (i = 18; i >= 4; i--)
4018 if (regs_ever_live[i] && ! call_used_regs[i])
4020 load_reg (i, offset, FRAME_POINTER_REGNUM);
4021 offset += UNITS_PER_WORD;
4024 else
4026 offset = local_fsize - actual_fsize;
4028 /* If the current function calls __builtin_eh_return, then we need
4029 to restore the saved EH data registers. */
4030 if (DO_FRAME_NOTES && current_function_calls_eh_return)
4032 unsigned int i, regno;
4034 for (i = 0; ; ++i)
4036 regno = EH_RETURN_DATA_REGNO (i);
4037 if (regno == INVALID_REGNUM)
4038 break;
4040 /* Only for the first load.
4041 merge_sp_adjust_with_load holds the register load
4042 with which we will merge the sp adjustment. */
4043 if (merge_sp_adjust_with_load == 0
4044 && local_fsize == 0
4045 && VAL_14_BITS_P (-actual_fsize))
4046 merge_sp_adjust_with_load = regno;
4047 else
4048 load_reg (regno, offset, STACK_POINTER_REGNUM);
4049 offset += UNITS_PER_WORD;
4053 for (i = 18; i >= 3; i--)
4055 if (regs_ever_live[i] && ! call_used_regs[i])
4057 /* Only for the first load.
4058 merge_sp_adjust_with_load holds the register load
4059 with which we will merge the sp adjustment. */
4060 if (merge_sp_adjust_with_load == 0
4061 && local_fsize == 0
4062 && VAL_14_BITS_P (-actual_fsize))
4063 merge_sp_adjust_with_load = i;
4064 else
4065 load_reg (i, offset, STACK_POINTER_REGNUM);
4066 offset += UNITS_PER_WORD;
4071 /* Align pointer properly (doubleword boundary). */
4072 offset = (offset + 7) & ~7;
4074 /* FP register restores. */
4075 if (save_fregs)
4077 /* Adjust the register to index off of. */
4078 if (frame_pointer_needed)
4079 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
4080 else
4081 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4083 /* Actually do the restores now. */
4084 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4085 if (regs_ever_live[i]
4086 || (! TARGET_64BIT && regs_ever_live[i + 1]))
4088 rtx src = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
4089 rtx dest = gen_rtx_REG (DFmode, i);
4090 emit_move_insn (dest, src);
4094 /* Emit a blockage insn here to keep these insns from being moved to
4095 an earlier spot in the epilogue, or into the main instruction stream.
4097 This is necessary as we must not cut the stack back before all the
4098 restores are finished. */
4099 emit_insn (gen_blockage ());
4101 /* Reset stack pointer (and possibly frame pointer). The stack
4102 pointer is initially set to fp + 64 to avoid a race condition. */
4103 if (frame_pointer_needed)
4105 rtx delta = GEN_INT (-64);
4107 set_reg_plus_d (STACK_POINTER_REGNUM, FRAME_POINTER_REGNUM, 64, 0);
4108 emit_insn (gen_pre_load (frame_pointer_rtx, stack_pointer_rtx, delta));
4110 /* If we were deferring a callee register restore, do it now. */
4111 else if (merge_sp_adjust_with_load)
4113 rtx delta = GEN_INT (-actual_fsize);
4114 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4116 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4118 else if (actual_fsize != 0)
4119 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4120 - actual_fsize, 0);
4122 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4123 frame greater than 8k), do so now. */
4124 if (ret_off != 0)
4125 load_reg (2, ret_off, STACK_POINTER_REGNUM);
4127 if (DO_FRAME_NOTES && current_function_calls_eh_return)
4129 rtx sa = EH_RETURN_STACKADJ_RTX;
4131 emit_insn (gen_blockage ());
4132 emit_insn (TARGET_64BIT
4133 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4134 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4139 hppa_pic_save_rtx (void)
4141 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4144 void
4145 hppa_profile_hook (int label_no)
4147 /* We use SImode for the address of the function in both 32 and
4148 64-bit code to avoid having to provide DImode versions of the
4149 lcla2 and load_offset_label_address insn patterns. */
4150 rtx reg = gen_reg_rtx (SImode);
4151 rtx label_rtx = gen_label_rtx ();
4152 rtx begin_label_rtx, call_insn;
4153 char begin_label_name[16];
4155 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4156 label_no);
4157 begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4159 if (TARGET_64BIT)
4160 emit_move_insn (arg_pointer_rtx,
4161 gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
4162 GEN_INT (64)));
4164 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4166 /* The address of the function is loaded into %r25 with a instruction-
4167 relative sequence that avoids the use of relocations. The sequence
4168 is split so that the load_offset_label_address instruction can
4169 occupy the delay slot of the call to _mcount. */
4170 if (TARGET_PA_20)
4171 emit_insn (gen_lcla2 (reg, label_rtx));
4172 else
4173 emit_insn (gen_lcla1 (reg, label_rtx));
4175 emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25),
4176 reg, begin_label_rtx, label_rtx));
4178 #ifndef NO_PROFILE_COUNTERS
4180 rtx count_label_rtx, addr, r24;
4181 char count_label_name[16];
4183 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4184 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name));
4186 addr = force_reg (Pmode, count_label_rtx);
4187 r24 = gen_rtx_REG (Pmode, 24);
4188 emit_move_insn (r24, addr);
4190 call_insn =
4191 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4192 gen_rtx_SYMBOL_REF (Pmode,
4193 "_mcount")),
4194 GEN_INT (TARGET_64BIT ? 24 : 12)));
4196 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4198 #else
4200 call_insn =
4201 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4202 gen_rtx_SYMBOL_REF (Pmode,
4203 "_mcount")),
4204 GEN_INT (TARGET_64BIT ? 16 : 8)));
4206 #endif
4208 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4209 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4211 /* Indicate the _mcount call cannot throw, nor will it execute a
4212 non-local goto. */
4213 REG_NOTES (call_insn)
4214 = gen_rtx_EXPR_LIST (REG_EH_REGION, constm1_rtx, REG_NOTES (call_insn));
4217 /* Fetch the return address for the frame COUNT steps up from
4218 the current frame, after the prologue. FRAMEADDR is the
4219 frame pointer of the COUNT frame.
4221 We want to ignore any export stub remnants here. To handle this,
4222 we examine the code at the return address, and if it is an export
4223 stub, we return a memory rtx for the stub return address stored
4224 at frame-24.
4226 The value returned is used in two different ways:
4228 1. To find a function's caller.
4230 2. To change the return address for a function.
4232 This function handles most instances of case 1; however, it will
4233 fail if there are two levels of stubs to execute on the return
4234 path. The only way I believe that can happen is if the return value
4235 needs a parameter relocation, which never happens for C code.
4237 This function handles most instances of case 2; however, it will
4238 fail if we did not originally have stub code on the return path
4239 but will need stub code on the new return path. This can happen if
4240 the caller & callee are both in the main program, but the new
4241 return location is in a shared library. */
4244 return_addr_rtx (int count, rtx frameaddr)
4246 rtx label;
4247 rtx rp;
4248 rtx saved_rp;
4249 rtx ins;
4251 if (count != 0)
4252 return NULL_RTX;
4254 rp = get_hard_reg_initial_val (Pmode, 2);
4256 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4257 return rp;
4259 saved_rp = gen_reg_rtx (Pmode);
4260 emit_move_insn (saved_rp, rp);
4262 /* Get pointer to the instruction stream. We have to mask out the
4263 privilege level from the two low order bits of the return address
4264 pointer here so that ins will point to the start of the first
4265 instruction that would have been executed if we returned. */
4266 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4267 label = gen_label_rtx ();
4269 /* Check the instruction stream at the normal return address for the
4270 export stub:
4272 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4273 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4274 0x00011820 | stub+16: mtsp r1,sr0
4275 0xe0400002 | stub+20: be,n 0(sr0,rp)
4277 If it is an export stub, than our return address is really in
4278 -24[frameaddr]. */
4280 emit_cmp_insn (gen_rtx_MEM (SImode, ins), GEN_INT (0x4bc23fd1), NE,
4281 NULL_RTX, SImode, 1);
4282 emit_jump_insn (gen_bne (label));
4284 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 4)),
4285 GEN_INT (0x004010a1), NE, NULL_RTX, SImode, 1);
4286 emit_jump_insn (gen_bne (label));
4288 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 8)),
4289 GEN_INT (0x00011820), NE, NULL_RTX, SImode, 1);
4290 emit_jump_insn (gen_bne (label));
4292 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 12)),
4293 GEN_INT (0xe0400002), NE, NULL_RTX, SImode, 1);
4295 /* If there is no export stub then just use the value saved from
4296 the return pointer register. */
4298 emit_jump_insn (gen_bne (label));
4300 /* Here we know that our return address points to an export
4301 stub. We don't want to return the address of the export stub,
4302 but rather the return address of the export stub. That return
4303 address is stored at -24[frameaddr]. */
4305 emit_move_insn (saved_rp,
4306 gen_rtx_MEM (Pmode,
4307 memory_address (Pmode,
4308 plus_constant (frameaddr,
4309 -24))));
4311 emit_label (label);
4312 return saved_rp;
4315 /* This is only valid once reload has completed because it depends on
4316 knowing exactly how much (if any) frame there is and...
4318 It's only valid if there is no frame marker to de-allocate and...
4320 It's only valid if %r2 hasn't been saved into the caller's frame
4321 (we're not profiling and %r2 isn't live anywhere). */
4323 hppa_can_use_return_insn_p (void)
4325 return (reload_completed
4326 && (compute_frame_size (get_frame_size (), 0) ? 0 : 1)
4327 && ! regs_ever_live[2]
4328 && ! frame_pointer_needed);
4331 void
4332 emit_bcond_fp (enum rtx_code code, rtx operand0)
4334 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4335 gen_rtx_IF_THEN_ELSE (VOIDmode,
4336 gen_rtx_fmt_ee (code,
4337 VOIDmode,
4338 gen_rtx_REG (CCFPmode, 0),
4339 const0_rtx),
4340 gen_rtx_LABEL_REF (VOIDmode, operand0),
4341 pc_rtx)));
4346 gen_cmp_fp (enum rtx_code code, rtx operand0, rtx operand1)
4348 return gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0),
4349 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1));
4352 /* Adjust the cost of a scheduling dependency. Return the new cost of
4353 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4355 static int
4356 pa_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
4358 enum attr_type attr_type;
4360 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4361 true dependencies as they are described with bypasses now. */
4362 if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0)
4363 return cost;
4365 if (! recog_memoized (insn))
4366 return 0;
4368 attr_type = get_attr_type (insn);
4370 switch (REG_NOTE_KIND (link))
4372 case REG_DEP_ANTI:
4373 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4374 cycles later. */
4376 if (attr_type == TYPE_FPLOAD)
4378 rtx pat = PATTERN (insn);
4379 rtx dep_pat = PATTERN (dep_insn);
4380 if (GET_CODE (pat) == PARALLEL)
4382 /* This happens for the fldXs,mb patterns. */
4383 pat = XVECEXP (pat, 0, 0);
4385 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4386 /* If this happens, we have to extend this to schedule
4387 optimally. Return 0 for now. */
4388 return 0;
4390 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4392 if (! recog_memoized (dep_insn))
4393 return 0;
4394 switch (get_attr_type (dep_insn))
4396 case TYPE_FPALU:
4397 case TYPE_FPMULSGL:
4398 case TYPE_FPMULDBL:
4399 case TYPE_FPDIVSGL:
4400 case TYPE_FPDIVDBL:
4401 case TYPE_FPSQRTSGL:
4402 case TYPE_FPSQRTDBL:
4403 /* A fpload can't be issued until one cycle before a
4404 preceding arithmetic operation has finished if
4405 the target of the fpload is any of the sources
4406 (or destination) of the arithmetic operation. */
4407 return insn_default_latency (dep_insn) - 1;
4409 default:
4410 return 0;
4414 else if (attr_type == TYPE_FPALU)
4416 rtx pat = PATTERN (insn);
4417 rtx dep_pat = PATTERN (dep_insn);
4418 if (GET_CODE (pat) == PARALLEL)
4420 /* This happens for the fldXs,mb patterns. */
4421 pat = XVECEXP (pat, 0, 0);
4423 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4424 /* If this happens, we have to extend this to schedule
4425 optimally. Return 0 for now. */
4426 return 0;
4428 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4430 if (! recog_memoized (dep_insn))
4431 return 0;
4432 switch (get_attr_type (dep_insn))
4434 case TYPE_FPDIVSGL:
4435 case TYPE_FPDIVDBL:
4436 case TYPE_FPSQRTSGL:
4437 case TYPE_FPSQRTDBL:
4438 /* An ALU flop can't be issued until two cycles before a
4439 preceding divide or sqrt operation has finished if
4440 the target of the ALU flop is any of the sources
4441 (or destination) of the divide or sqrt operation. */
4442 return insn_default_latency (dep_insn) - 2;
4444 default:
4445 return 0;
4450 /* For other anti dependencies, the cost is 0. */
4451 return 0;
4453 case REG_DEP_OUTPUT:
4454 /* Output dependency; DEP_INSN writes a register that INSN writes some
4455 cycles later. */
4456 if (attr_type == TYPE_FPLOAD)
4458 rtx pat = PATTERN (insn);
4459 rtx dep_pat = PATTERN (dep_insn);
4460 if (GET_CODE (pat) == PARALLEL)
4462 /* This happens for the fldXs,mb patterns. */
4463 pat = XVECEXP (pat, 0, 0);
4465 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4466 /* If this happens, we have to extend this to schedule
4467 optimally. Return 0 for now. */
4468 return 0;
4470 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4472 if (! recog_memoized (dep_insn))
4473 return 0;
4474 switch (get_attr_type (dep_insn))
4476 case TYPE_FPALU:
4477 case TYPE_FPMULSGL:
4478 case TYPE_FPMULDBL:
4479 case TYPE_FPDIVSGL:
4480 case TYPE_FPDIVDBL:
4481 case TYPE_FPSQRTSGL:
4482 case TYPE_FPSQRTDBL:
4483 /* A fpload can't be issued until one cycle before a
4484 preceding arithmetic operation has finished if
4485 the target of the fpload is the destination of the
4486 arithmetic operation.
4488 Exception: For PA7100LC, PA7200 and PA7300, the cost
4489 is 3 cycles, unless they bundle together. We also
4490 pay the penalty if the second insn is a fpload. */
4491 return insn_default_latency (dep_insn) - 1;
4493 default:
4494 return 0;
4498 else if (attr_type == TYPE_FPALU)
4500 rtx pat = PATTERN (insn);
4501 rtx dep_pat = PATTERN (dep_insn);
4502 if (GET_CODE (pat) == PARALLEL)
4504 /* This happens for the fldXs,mb patterns. */
4505 pat = XVECEXP (pat, 0, 0);
4507 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4508 /* If this happens, we have to extend this to schedule
4509 optimally. Return 0 for now. */
4510 return 0;
4512 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4514 if (! recog_memoized (dep_insn))
4515 return 0;
4516 switch (get_attr_type (dep_insn))
4518 case TYPE_FPDIVSGL:
4519 case TYPE_FPDIVDBL:
4520 case TYPE_FPSQRTSGL:
4521 case TYPE_FPSQRTDBL:
4522 /* An ALU flop can't be issued until two cycles before a
4523 preceding divide or sqrt operation has finished if
4524 the target of the ALU flop is also the target of
4525 the divide or sqrt operation. */
4526 return insn_default_latency (dep_insn) - 2;
4528 default:
4529 return 0;
4534 /* For other output dependencies, the cost is 0. */
4535 return 0;
4537 default:
4538 gcc_unreachable ();
4542 /* Adjust scheduling priorities. We use this to try and keep addil
4543 and the next use of %r1 close together. */
4544 static int
4545 pa_adjust_priority (rtx insn, int priority)
4547 rtx set = single_set (insn);
4548 rtx src, dest;
4549 if (set)
4551 src = SET_SRC (set);
4552 dest = SET_DEST (set);
4553 if (GET_CODE (src) == LO_SUM
4554 && symbolic_operand (XEXP (src, 1), VOIDmode)
4555 && ! read_only_operand (XEXP (src, 1), VOIDmode))
4556 priority >>= 3;
4558 else if (GET_CODE (src) == MEM
4559 && GET_CODE (XEXP (src, 0)) == LO_SUM
4560 && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
4561 && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
4562 priority >>= 1;
4564 else if (GET_CODE (dest) == MEM
4565 && GET_CODE (XEXP (dest, 0)) == LO_SUM
4566 && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
4567 && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
4568 priority >>= 3;
4570 return priority;
4573 /* The 700 can only issue a single insn at a time.
4574 The 7XXX processors can issue two insns at a time.
4575 The 8000 can issue 4 insns at a time. */
4576 static int
4577 pa_issue_rate (void)
4579 switch (pa_cpu)
4581 case PROCESSOR_700: return 1;
4582 case PROCESSOR_7100: return 2;
4583 case PROCESSOR_7100LC: return 2;
4584 case PROCESSOR_7200: return 2;
4585 case PROCESSOR_7300: return 2;
4586 case PROCESSOR_8000: return 4;
4588 default:
4589 gcc_unreachable ();
4595 /* Return any length adjustment needed by INSN which already has its length
4596 computed as LENGTH. Return zero if no adjustment is necessary.
4598 For the PA: function calls, millicode calls, and backwards short
4599 conditional branches with unfilled delay slots need an adjustment by +1
4600 (to account for the NOP which will be inserted into the instruction stream).
4602 Also compute the length of an inline block move here as it is too
4603 complicated to express as a length attribute in pa.md. */
4605 pa_adjust_insn_length (rtx insn, int length)
4607 rtx pat = PATTERN (insn);
4609 /* Jumps inside switch tables which have unfilled delay slots need
4610 adjustment. */
4611 if (GET_CODE (insn) == JUMP_INSN
4612 && GET_CODE (pat) == PARALLEL
4613 && get_attr_type (insn) == TYPE_BTABLE_BRANCH)
4614 return 4;
4615 /* Millicode insn with an unfilled delay slot. */
4616 else if (GET_CODE (insn) == INSN
4617 && GET_CODE (pat) != SEQUENCE
4618 && GET_CODE (pat) != USE
4619 && GET_CODE (pat) != CLOBBER
4620 && get_attr_type (insn) == TYPE_MILLI)
4621 return 4;
4622 /* Block move pattern. */
4623 else if (GET_CODE (insn) == INSN
4624 && GET_CODE (pat) == PARALLEL
4625 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4626 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4627 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
4628 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
4629 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
4630 return compute_movmem_length (insn) - 4;
4631 /* Block clear pattern. */
4632 else if (GET_CODE (insn) == INSN
4633 && GET_CODE (pat) == PARALLEL
4634 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4635 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4636 && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
4637 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
4638 return compute_clrmem_length (insn) - 4;
4639 /* Conditional branch with an unfilled delay slot. */
4640 else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
4642 /* Adjust a short backwards conditional with an unfilled delay slot. */
4643 if (GET_CODE (pat) == SET
4644 && length == 4
4645 && ! forward_branch_p (insn))
4646 return 4;
4647 else if (GET_CODE (pat) == PARALLEL
4648 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
4649 && length == 4)
4650 return 4;
4651 /* Adjust dbra insn with short backwards conditional branch with
4652 unfilled delay slot -- only for case where counter is in a
4653 general register register. */
4654 else if (GET_CODE (pat) == PARALLEL
4655 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
4656 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
4657 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
4658 && length == 4
4659 && ! forward_branch_p (insn))
4660 return 4;
4661 else
4662 return 0;
4664 return 0;
4667 /* Print operand X (an rtx) in assembler syntax to file FILE.
4668 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
4669 For `%' followed by punctuation, CODE is the punctuation and X is null. */
4671 void
4672 print_operand (FILE *file, rtx x, int code)
4674 switch (code)
4676 case '#':
4677 /* Output a 'nop' if there's nothing for the delay slot. */
4678 if (dbr_sequence_length () == 0)
4679 fputs ("\n\tnop", file);
4680 return;
4681 case '*':
4682 /* Output a nullification completer if there's nothing for the */
4683 /* delay slot or nullification is requested. */
4684 if (dbr_sequence_length () == 0 ||
4685 (final_sequence &&
4686 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
4687 fputs (",n", file);
4688 return;
4689 case 'R':
4690 /* Print out the second register name of a register pair.
4691 I.e., R (6) => 7. */
4692 fputs (reg_names[REGNO (x) + 1], file);
4693 return;
4694 case 'r':
4695 /* A register or zero. */
4696 if (x == const0_rtx
4697 || (x == CONST0_RTX (DFmode))
4698 || (x == CONST0_RTX (SFmode)))
4700 fputs ("%r0", file);
4701 return;
4703 else
4704 break;
4705 case 'f':
4706 /* A register or zero (floating point). */
4707 if (x == const0_rtx
4708 || (x == CONST0_RTX (DFmode))
4709 || (x == CONST0_RTX (SFmode)))
4711 fputs ("%fr0", file);
4712 return;
4714 else
4715 break;
4716 case 'A':
4718 rtx xoperands[2];
4720 xoperands[0] = XEXP (XEXP (x, 0), 0);
4721 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
4722 output_global_address (file, xoperands[1], 0);
4723 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
4724 return;
4727 case 'C': /* Plain (C)ondition */
4728 case 'X':
4729 switch (GET_CODE (x))
4731 case EQ:
4732 fputs ("=", file); break;
4733 case NE:
4734 fputs ("<>", file); break;
4735 case GT:
4736 fputs (">", file); break;
4737 case GE:
4738 fputs (">=", file); break;
4739 case GEU:
4740 fputs (">>=", file); break;
4741 case GTU:
4742 fputs (">>", file); break;
4743 case LT:
4744 fputs ("<", file); break;
4745 case LE:
4746 fputs ("<=", file); break;
4747 case LEU:
4748 fputs ("<<=", file); break;
4749 case LTU:
4750 fputs ("<<", file); break;
4751 default:
4752 gcc_unreachable ();
4754 return;
4755 case 'N': /* Condition, (N)egated */
4756 switch (GET_CODE (x))
4758 case EQ:
4759 fputs ("<>", file); break;
4760 case NE:
4761 fputs ("=", file); break;
4762 case GT:
4763 fputs ("<=", file); break;
4764 case GE:
4765 fputs ("<", file); break;
4766 case GEU:
4767 fputs ("<<", file); break;
4768 case GTU:
4769 fputs ("<<=", file); break;
4770 case LT:
4771 fputs (">=", file); break;
4772 case LE:
4773 fputs (">", file); break;
4774 case LEU:
4775 fputs (">>", file); break;
4776 case LTU:
4777 fputs (">>=", file); break;
4778 default:
4779 gcc_unreachable ();
4781 return;
4782 /* For floating point comparisons. Note that the output
4783 predicates are the complement of the desired mode. The
4784 conditions for GT, GE, LT, LE and LTGT cause an invalid
4785 operation exception if the result is unordered and this
4786 exception is enabled in the floating-point status register. */
4787 case 'Y':
4788 switch (GET_CODE (x))
4790 case EQ:
4791 fputs ("!=", file); break;
4792 case NE:
4793 fputs ("=", file); break;
4794 case GT:
4795 fputs ("!>", file); break;
4796 case GE:
4797 fputs ("!>=", file); break;
4798 case LT:
4799 fputs ("!<", file); break;
4800 case LE:
4801 fputs ("!<=", file); break;
4802 case LTGT:
4803 fputs ("!<>", file); break;
4804 case UNLE:
4805 fputs ("!?<=", file); break;
4806 case UNLT:
4807 fputs ("!?<", file); break;
4808 case UNGE:
4809 fputs ("!?>=", file); break;
4810 case UNGT:
4811 fputs ("!?>", file); break;
4812 case UNEQ:
4813 fputs ("!?=", file); break;
4814 case UNORDERED:
4815 fputs ("!?", file); break;
4816 case ORDERED:
4817 fputs ("?", file); break;
4818 default:
4819 gcc_unreachable ();
4821 return;
4822 case 'S': /* Condition, operands are (S)wapped. */
4823 switch (GET_CODE (x))
4825 case EQ:
4826 fputs ("=", file); break;
4827 case NE:
4828 fputs ("<>", file); break;
4829 case GT:
4830 fputs ("<", file); break;
4831 case GE:
4832 fputs ("<=", file); break;
4833 case GEU:
4834 fputs ("<<=", file); break;
4835 case GTU:
4836 fputs ("<<", file); break;
4837 case LT:
4838 fputs (">", file); break;
4839 case LE:
4840 fputs (">=", file); break;
4841 case LEU:
4842 fputs (">>=", file); break;
4843 case LTU:
4844 fputs (">>", file); break;
4845 default:
4846 gcc_unreachable ();
4848 return;
4849 case 'B': /* Condition, (B)oth swapped and negate. */
4850 switch (GET_CODE (x))
4852 case EQ:
4853 fputs ("<>", file); break;
4854 case NE:
4855 fputs ("=", file); break;
4856 case GT:
4857 fputs (">=", file); break;
4858 case GE:
4859 fputs (">", file); break;
4860 case GEU:
4861 fputs (">>", file); break;
4862 case GTU:
4863 fputs (">>=", file); break;
4864 case LT:
4865 fputs ("<=", file); break;
4866 case LE:
4867 fputs ("<", file); break;
4868 case LEU:
4869 fputs ("<<", file); break;
4870 case LTU:
4871 fputs ("<<=", file); break;
4872 default:
4873 gcc_unreachable ();
4875 return;
4876 case 'k':
4877 gcc_assert (GET_CODE (x) == CONST_INT);
4878 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
4879 return;
4880 case 'Q':
4881 gcc_assert (GET_CODE (x) == CONST_INT);
4882 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
4883 return;
4884 case 'L':
4885 gcc_assert (GET_CODE (x) == CONST_INT);
4886 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
4887 return;
4888 case 'O':
4889 gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
4890 fprintf (file, "%d", exact_log2 (INTVAL (x)));
4891 return;
4892 case 'p':
4893 gcc_assert (GET_CODE (x) == CONST_INT);
4894 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
4895 return;
4896 case 'P':
4897 gcc_assert (GET_CODE (x) == CONST_INT);
4898 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
4899 return;
4900 case 'I':
4901 if (GET_CODE (x) == CONST_INT)
4902 fputs ("i", file);
4903 return;
4904 case 'M':
4905 case 'F':
4906 switch (GET_CODE (XEXP (x, 0)))
4908 case PRE_DEC:
4909 case PRE_INC:
4910 if (ASSEMBLER_DIALECT == 0)
4911 fputs ("s,mb", file);
4912 else
4913 fputs (",mb", file);
4914 break;
4915 case POST_DEC:
4916 case POST_INC:
4917 if (ASSEMBLER_DIALECT == 0)
4918 fputs ("s,ma", file);
4919 else
4920 fputs (",ma", file);
4921 break;
4922 case PLUS:
4923 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
4924 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
4926 if (ASSEMBLER_DIALECT == 0)
4927 fputs ("x", file);
4929 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
4930 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
4932 if (ASSEMBLER_DIALECT == 0)
4933 fputs ("x,s", file);
4934 else
4935 fputs (",s", file);
4937 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
4938 fputs ("s", file);
4939 break;
4940 default:
4941 if (code == 'F' && ASSEMBLER_DIALECT == 0)
4942 fputs ("s", file);
4943 break;
4945 return;
4946 case 'G':
4947 output_global_address (file, x, 0);
4948 return;
4949 case 'H':
4950 output_global_address (file, x, 1);
4951 return;
4952 case 0: /* Don't do anything special */
4953 break;
4954 case 'Z':
4956 unsigned op[3];
4957 compute_zdepwi_operands (INTVAL (x), op);
4958 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
4959 return;
4961 case 'z':
4963 unsigned op[3];
4964 compute_zdepdi_operands (INTVAL (x), op);
4965 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
4966 return;
4968 case 'c':
4969 /* We can get here from a .vtable_inherit due to our
4970 CONSTANT_ADDRESS_P rejecting perfectly good constant
4971 addresses. */
4972 break;
4973 default:
4974 gcc_unreachable ();
4976 if (GET_CODE (x) == REG)
4978 fputs (reg_names [REGNO (x)], file);
4979 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
4981 fputs ("R", file);
4982 return;
4984 if (FP_REG_P (x)
4985 && GET_MODE_SIZE (GET_MODE (x)) <= 4
4986 && (REGNO (x) & 1) == 0)
4987 fputs ("L", file);
4989 else if (GET_CODE (x) == MEM)
4991 int size = GET_MODE_SIZE (GET_MODE (x));
4992 rtx base = NULL_RTX;
4993 switch (GET_CODE (XEXP (x, 0)))
4995 case PRE_DEC:
4996 case POST_DEC:
4997 base = XEXP (XEXP (x, 0), 0);
4998 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
4999 break;
5000 case PRE_INC:
5001 case POST_INC:
5002 base = XEXP (XEXP (x, 0), 0);
5003 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5004 break;
5005 case PLUS:
5006 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5007 fprintf (file, "%s(%s)",
5008 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5009 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5010 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5011 fprintf (file, "%s(%s)",
5012 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5013 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5014 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5015 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5017 /* Because the REG_POINTER flag can get lost during reload,
5018 GO_IF_LEGITIMATE_ADDRESS canonicalizes the order of the
5019 index and base registers in the combined move patterns. */
5020 rtx base = XEXP (XEXP (x, 0), 1);
5021 rtx index = XEXP (XEXP (x, 0), 0);
5023 fprintf (file, "%s(%s)",
5024 reg_names [REGNO (index)], reg_names [REGNO (base)]);
5026 else
5027 output_address (XEXP (x, 0));
5028 break;
5029 default:
5030 output_address (XEXP (x, 0));
5031 break;
5034 else
5035 output_addr_const (file, x);
5038 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
5040 void
5041 output_global_address (FILE *file, rtx x, int round_constant)
5044 /* Imagine (high (const (plus ...))). */
5045 if (GET_CODE (x) == HIGH)
5046 x = XEXP (x, 0);
5048 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5049 output_addr_const (file, x);
5050 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5052 output_addr_const (file, x);
5053 fputs ("-$global$", file);
5055 else if (GET_CODE (x) == CONST)
5057 const char *sep = "";
5058 int offset = 0; /* assembler wants -$global$ at end */
5059 rtx base = NULL_RTX;
5061 switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
5063 case SYMBOL_REF:
5064 base = XEXP (XEXP (x, 0), 0);
5065 output_addr_const (file, base);
5066 break;
5067 case CONST_INT:
5068 offset = INTVAL (XEXP (XEXP (x, 0), 0));
5069 break;
5070 default:
5071 gcc_unreachable ();
5074 switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
5076 case SYMBOL_REF:
5077 base = XEXP (XEXP (x, 0), 1);
5078 output_addr_const (file, base);
5079 break;
5080 case CONST_INT:
5081 offset = INTVAL (XEXP (XEXP (x, 0), 1));
5082 break;
5083 default:
5084 gcc_unreachable ();
5087 /* How bogus. The compiler is apparently responsible for
5088 rounding the constant if it uses an LR field selector.
5090 The linker and/or assembler seem a better place since
5091 they have to do this kind of thing already.
5093 If we fail to do this, HP's optimizing linker may eliminate
5094 an addil, but not update the ldw/stw/ldo instruction that
5095 uses the result of the addil. */
5096 if (round_constant)
5097 offset = ((offset + 0x1000) & ~0x1fff);
5099 switch (GET_CODE (XEXP (x, 0)))
5101 case PLUS:
5102 if (offset < 0)
5104 offset = -offset;
5105 sep = "-";
5107 else
5108 sep = "+";
5109 break;
5111 case MINUS:
5112 gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5113 sep = "-";
5114 break;
5116 default:
5117 gcc_unreachable ();
5120 if (!read_only_operand (base, VOIDmode) && !flag_pic)
5121 fputs ("-$global$", file);
5122 if (offset)
5123 fprintf (file, "%s%d", sep, offset);
5125 else
5126 output_addr_const (file, x);
5129 /* Output boilerplate text to appear at the beginning of the file.
5130 There are several possible versions. */
5131 #define aputs(x) fputs(x, asm_out_file)
5132 static inline void
5133 pa_file_start_level (void)
5135 if (TARGET_64BIT)
5136 aputs ("\t.LEVEL 2.0w\n");
5137 else if (TARGET_PA_20)
5138 aputs ("\t.LEVEL 2.0\n");
5139 else if (TARGET_PA_11)
5140 aputs ("\t.LEVEL 1.1\n");
5141 else
5142 aputs ("\t.LEVEL 1.0\n");
5145 static inline void
5146 pa_file_start_space (int sortspace)
5148 aputs ("\t.SPACE $PRIVATE$");
5149 if (sortspace)
5150 aputs (",SORT=16");
5151 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31"
5152 "\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5153 "\n\t.SPACE $TEXT$");
5154 if (sortspace)
5155 aputs (",SORT=8");
5156 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5157 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5160 static inline void
5161 pa_file_start_file (int want_version)
5163 if (write_symbols != NO_DEBUG)
5165 output_file_directive (asm_out_file, main_input_filename);
5166 if (want_version)
5167 aputs ("\t.version\t\"01.01\"\n");
5171 static inline void
5172 pa_file_start_mcount (const char *aswhat)
5174 if (profile_flag)
5175 fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5178 static void
5179 pa_elf_file_start (void)
5181 pa_file_start_level ();
5182 pa_file_start_mcount ("ENTRY");
5183 pa_file_start_file (0);
5186 static void
5187 pa_som_file_start (void)
5189 pa_file_start_level ();
5190 pa_file_start_space (0);
5191 aputs ("\t.IMPORT $global$,DATA\n"
5192 "\t.IMPORT $$dyncall,MILLICODE\n");
5193 pa_file_start_mcount ("CODE");
5194 pa_file_start_file (0);
5197 static void
5198 pa_linux_file_start (void)
5200 pa_file_start_file (1);
5201 pa_file_start_level ();
5202 pa_file_start_mcount ("CODE");
5205 static void
5206 pa_hpux64_gas_file_start (void)
5208 pa_file_start_level ();
5209 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5210 if (profile_flag)
5211 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5212 #endif
5213 pa_file_start_file (1);
5216 static void
5217 pa_hpux64_hpas_file_start (void)
5219 pa_file_start_level ();
5220 pa_file_start_space (1);
5221 pa_file_start_mcount ("CODE");
5222 pa_file_start_file (0);
5224 #undef aputs
5226 /* Search the deferred plabel list for SYMBOL and return its internal
5227 label. If an entry for SYMBOL is not found, a new entry is created. */
5230 get_deferred_plabel (rtx symbol)
5232 const char *fname = XSTR (symbol, 0);
5233 size_t i;
5235 /* See if we have already put this function on the list of deferred
5236 plabels. This list is generally small, so a liner search is not
5237 too ugly. If it proves too slow replace it with something faster. */
5238 for (i = 0; i < n_deferred_plabels; i++)
5239 if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
5240 break;
5242 /* If the deferred plabel list is empty, or this entry was not found
5243 on the list, create a new entry on the list. */
5244 if (deferred_plabels == NULL || i == n_deferred_plabels)
5246 tree id;
5248 if (deferred_plabels == 0)
5249 deferred_plabels = (struct deferred_plabel *)
5250 ggc_alloc (sizeof (struct deferred_plabel));
5251 else
5252 deferred_plabels = (struct deferred_plabel *)
5253 ggc_realloc (deferred_plabels,
5254 ((n_deferred_plabels + 1)
5255 * sizeof (struct deferred_plabel)));
5257 i = n_deferred_plabels++;
5258 deferred_plabels[i].internal_label = gen_label_rtx ();
5259 deferred_plabels[i].symbol = symbol;
5261 /* Gross. We have just implicitly taken the address of this
5262 function. Mark it in the same manner as assemble_name. */
5263 id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5264 if (id)
5265 mark_referenced (id);
5268 return deferred_plabels[i].internal_label;
5271 static void
5272 output_deferred_plabels (void)
5274 size_t i;
5275 /* If we have deferred plabels, then we need to switch into the data
5276 section and align it to a 4 byte boundary before we output the
5277 deferred plabels. */
5278 if (n_deferred_plabels)
5280 data_section ();
5281 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5284 /* Now output the deferred plabels. */
5285 for (i = 0; i < n_deferred_plabels; i++)
5287 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5288 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5289 assemble_integer (deferred_plabels[i].symbol,
5290 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5294 #ifdef HPUX_LONG_DOUBLE_LIBRARY
5295 /* Initialize optabs to point to HPUX long double emulation routines. */
5296 static void
5297 pa_hpux_init_libfuncs (void)
5299 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5300 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5301 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5302 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5303 set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5304 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5305 set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5306 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5307 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5309 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5310 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5311 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5312 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5313 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5314 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5315 set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5317 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5318 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5319 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5320 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5322 set_conv_libfunc (sfix_optab, SImode, TFmode, TARGET_64BIT
5323 ? "__U_Qfcnvfxt_quad_to_sgl"
5324 : "_U_Qfcnvfxt_quad_to_sgl");
5325 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
5326 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_usgl");
5327 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_udbl");
5329 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
5330 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
5332 #endif
5334 /* HP's millicode routines mean something special to the assembler.
5335 Keep track of which ones we have used. */
5337 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5338 static void import_milli (enum millicodes);
5339 static char imported[(int) end1000];
5340 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5341 static const char import_string[] = ".IMPORT $$....,MILLICODE";
5342 #define MILLI_START 10
5344 static void
5345 import_milli (enum millicodes code)
5347 char str[sizeof (import_string)];
5349 if (!imported[(int) code])
5351 imported[(int) code] = 1;
5352 strcpy (str, import_string);
5353 strncpy (str + MILLI_START, milli_names[(int) code], 4);
5354 output_asm_insn (str, 0);
5358 /* The register constraints have put the operands and return value in
5359 the proper registers. */
5361 const char *
5362 output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx insn)
5364 import_milli (mulI);
5365 return output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5368 /* Emit the rtl for doing a division by a constant. */
5370 /* Do magic division millicodes exist for this value? */
5371 const int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5373 /* We'll use an array to keep track of the magic millicodes and
5374 whether or not we've used them already. [n][0] is signed, [n][1] is
5375 unsigned. */
5377 static int div_milli[16][2];
5380 emit_hpdiv_const (rtx *operands, int unsignedp)
5382 if (GET_CODE (operands[2]) == CONST_INT
5383 && INTVAL (operands[2]) > 0
5384 && INTVAL (operands[2]) < 16
5385 && magic_milli[INTVAL (operands[2])])
5387 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5389 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
5390 emit
5391 (gen_rtx_PARALLEL
5392 (VOIDmode,
5393 gen_rtvec (6, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29),
5394 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5395 SImode,
5396 gen_rtx_REG (SImode, 26),
5397 operands[2])),
5398 gen_rtx_CLOBBER (VOIDmode, operands[4]),
5399 gen_rtx_CLOBBER (VOIDmode, operands[3]),
5400 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5401 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
5402 gen_rtx_CLOBBER (VOIDmode, ret))));
5403 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
5404 return 1;
5406 return 0;
5409 const char *
5410 output_div_insn (rtx *operands, int unsignedp, rtx insn)
5412 int divisor;
5414 /* If the divisor is a constant, try to use one of the special
5415 opcodes .*/
5416 if (GET_CODE (operands[0]) == CONST_INT)
5418 static char buf[100];
5419 divisor = INTVAL (operands[0]);
5420 if (!div_milli[divisor][unsignedp])
5422 div_milli[divisor][unsignedp] = 1;
5423 if (unsignedp)
5424 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
5425 else
5426 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
5428 if (unsignedp)
5430 sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
5431 INTVAL (operands[0]));
5432 return output_millicode_call (insn,
5433 gen_rtx_SYMBOL_REF (SImode, buf));
5435 else
5437 sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
5438 INTVAL (operands[0]));
5439 return output_millicode_call (insn,
5440 gen_rtx_SYMBOL_REF (SImode, buf));
5443 /* Divisor isn't a special constant. */
5444 else
5446 if (unsignedp)
5448 import_milli (divU);
5449 return output_millicode_call (insn,
5450 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
5452 else
5454 import_milli (divI);
5455 return output_millicode_call (insn,
5456 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
5461 /* Output a $$rem millicode to do mod. */
5463 const char *
5464 output_mod_insn (int unsignedp, rtx insn)
5466 if (unsignedp)
5468 import_milli (remU);
5469 return output_millicode_call (insn,
5470 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
5472 else
5474 import_milli (remI);
5475 return output_millicode_call (insn,
5476 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
5480 void
5481 output_arg_descriptor (rtx call_insn)
5483 const char *arg_regs[4];
5484 enum machine_mode arg_mode;
5485 rtx link;
5486 int i, output_flag = 0;
5487 int regno;
5489 /* We neither need nor want argument location descriptors for the
5490 64bit runtime environment or the ELF32 environment. */
5491 if (TARGET_64BIT || TARGET_ELF32)
5492 return;
5494 for (i = 0; i < 4; i++)
5495 arg_regs[i] = 0;
5497 /* Specify explicitly that no argument relocations should take place
5498 if using the portable runtime calling conventions. */
5499 if (TARGET_PORTABLE_RUNTIME)
5501 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5502 asm_out_file);
5503 return;
5506 gcc_assert (GET_CODE (call_insn) == CALL_INSN);
5507 for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
5508 link; link = XEXP (link, 1))
5510 rtx use = XEXP (link, 0);
5512 if (! (GET_CODE (use) == USE
5513 && GET_CODE (XEXP (use, 0)) == REG
5514 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5515 continue;
5517 arg_mode = GET_MODE (XEXP (use, 0));
5518 regno = REGNO (XEXP (use, 0));
5519 if (regno >= 23 && regno <= 26)
5521 arg_regs[26 - regno] = "GR";
5522 if (arg_mode == DImode)
5523 arg_regs[25 - regno] = "GR";
5525 else if (regno >= 32 && regno <= 39)
5527 if (arg_mode == SFmode)
5528 arg_regs[(regno - 32) / 2] = "FR";
5529 else
5531 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
5532 arg_regs[(regno - 34) / 2] = "FR";
5533 arg_regs[(regno - 34) / 2 + 1] = "FU";
5534 #else
5535 arg_regs[(regno - 34) / 2] = "FU";
5536 arg_regs[(regno - 34) / 2 + 1] = "FR";
5537 #endif
5541 fputs ("\t.CALL ", asm_out_file);
5542 for (i = 0; i < 4; i++)
5544 if (arg_regs[i])
5546 if (output_flag++)
5547 fputc (',', asm_out_file);
5548 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
5551 fputc ('\n', asm_out_file);
5554 /* Return the class of any secondary reload register that is needed to
5555 move IN into a register in class CLASS using mode MODE.
5557 Profiling has showed this routine and its descendants account for
5558 a significant amount of compile time (~7%). So it has been
5559 optimized to reduce redundant computations and eliminate useless
5560 function calls.
5562 It might be worthwhile to try and make this a leaf function too. */
5564 enum reg_class
5565 secondary_reload_class (enum reg_class class, enum machine_mode mode, rtx in)
5567 int regno, is_symbolic;
5569 /* Trying to load a constant into a FP register during PIC code
5570 generation will require %r1 as a scratch register. */
5571 if (flag_pic
5572 && GET_MODE_CLASS (mode) == MODE_INT
5573 && FP_REG_CLASS_P (class)
5574 && (GET_CODE (in) == CONST_INT || GET_CODE (in) == CONST_DOUBLE))
5575 return R1_REGS;
5577 /* Profiling showed the PA port spends about 1.3% of its compilation
5578 time in true_regnum from calls inside secondary_reload_class. */
5580 if (GET_CODE (in) == REG)
5582 regno = REGNO (in);
5583 if (regno >= FIRST_PSEUDO_REGISTER)
5584 regno = true_regnum (in);
5586 else if (GET_CODE (in) == SUBREG)
5587 regno = true_regnum (in);
5588 else
5589 regno = -1;
5591 /* If we have something like (mem (mem (...)), we can safely assume the
5592 inner MEM will end up in a general register after reloading, so there's
5593 no need for a secondary reload. */
5594 if (GET_CODE (in) == MEM
5595 && GET_CODE (XEXP (in, 0)) == MEM)
5596 return NO_REGS;
5598 /* Handle out of range displacement for integer mode loads/stores of
5599 FP registers. */
5600 if (((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
5601 && GET_MODE_CLASS (mode) == MODE_INT
5602 && FP_REG_CLASS_P (class))
5603 || (class == SHIFT_REGS && (regno <= 0 || regno >= 32)))
5604 return GENERAL_REGS;
5606 /* A SAR<->FP register copy requires a secondary register (GPR) as
5607 well as secondary memory. */
5608 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
5609 && ((REGNO_REG_CLASS (regno) == SHIFT_REGS && FP_REG_CLASS_P (class))
5610 || (class == SHIFT_REGS && FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))))
5611 return GENERAL_REGS;
5613 if (GET_CODE (in) == HIGH)
5614 in = XEXP (in, 0);
5616 /* Profiling has showed GCC spends about 2.6% of its compilation
5617 time in symbolic_operand from calls inside secondary_reload_class.
5619 We use an inline copy and only compute its return value once to avoid
5620 useless work. */
5621 switch (GET_CODE (in))
5623 rtx tmp;
5625 case SYMBOL_REF:
5626 case LABEL_REF:
5627 is_symbolic = 1;
5628 break;
5629 case CONST:
5630 tmp = XEXP (in, 0);
5631 is_symbolic = ((GET_CODE (XEXP (tmp, 0)) == SYMBOL_REF
5632 || GET_CODE (XEXP (tmp, 0)) == LABEL_REF)
5633 && GET_CODE (XEXP (tmp, 1)) == CONST_INT);
5634 break;
5636 default:
5637 is_symbolic = 0;
5638 break;
5641 if (!flag_pic
5642 && is_symbolic
5643 && read_only_operand (in, VOIDmode))
5644 return NO_REGS;
5646 if (class != R1_REGS && is_symbolic)
5647 return R1_REGS;
5649 return NO_REGS;
5652 /* In the 32-bit runtime, arguments larger than eight bytes are passed
5653 by invisible reference. As a GCC extension, we also pass anything
5654 with a zero or variable size by reference.
5656 The 64-bit runtime does not describe passing any types by invisible
5657 reference. The internals of GCC can't currently handle passing
5658 empty structures, and zero or variable length arrays when they are
5659 not passed entirely on the stack or by reference. Thus, as a GCC
5660 extension, we pass these types by reference. The HP compiler doesn't
5661 support these types, so hopefully there shouldn't be any compatibility
5662 issues. This may have to be revisited when HP releases a C99 compiler
5663 or updates the ABI. */
5665 static bool
5666 pa_pass_by_reference (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED,
5667 enum machine_mode mode, tree type,
5668 bool named ATTRIBUTE_UNUSED)
5670 HOST_WIDE_INT size;
5672 if (type)
5673 size = int_size_in_bytes (type);
5674 else
5675 size = GET_MODE_SIZE (mode);
5677 if (TARGET_64BIT)
5678 return size <= 0;
5679 else
5680 return size <= 0 || size > 8;
5683 enum direction
5684 function_arg_padding (enum machine_mode mode, tree type)
5686 if (mode == BLKmode
5687 || (TARGET_64BIT && type && AGGREGATE_TYPE_P (type)))
5689 /* Return none if justification is not required. */
5690 if (type
5691 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
5692 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
5693 return none;
5695 /* The directions set here are ignored when a BLKmode argument larger
5696 than a word is placed in a register. Different code is used for
5697 the stack and registers. This makes it difficult to have a
5698 consistent data representation for both the stack and registers.
5699 For both runtimes, the justification and padding for arguments on
5700 the stack and in registers should be identical. */
5701 if (TARGET_64BIT)
5702 /* The 64-bit runtime specifies left justification for aggregates. */
5703 return upward;
5704 else
5705 /* The 32-bit runtime architecture specifies right justification.
5706 When the argument is passed on the stack, the argument is padded
5707 with garbage on the left. The HP compiler pads with zeros. */
5708 return downward;
5711 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
5712 return downward;
5713 else
5714 return none;
5718 /* Do what is necessary for `va_start'. We look at the current function
5719 to determine if stdargs or varargs is used and fill in an initial
5720 va_list. A pointer to this constructor is returned. */
5722 static rtx
5723 hppa_builtin_saveregs (void)
5725 rtx offset, dest;
5726 tree fntype = TREE_TYPE (current_function_decl);
5727 int argadj = ((!(TYPE_ARG_TYPES (fntype) != 0
5728 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
5729 != void_type_node)))
5730 ? UNITS_PER_WORD : 0);
5732 if (argadj)
5733 offset = plus_constant (current_function_arg_offset_rtx, argadj);
5734 else
5735 offset = current_function_arg_offset_rtx;
5737 if (TARGET_64BIT)
5739 int i, off;
5741 /* Adjust for varargs/stdarg differences. */
5742 if (argadj)
5743 offset = plus_constant (current_function_arg_offset_rtx, -argadj);
5744 else
5745 offset = current_function_arg_offset_rtx;
5747 /* We need to save %r26 .. %r19 inclusive starting at offset -64
5748 from the incoming arg pointer and growing to larger addresses. */
5749 for (i = 26, off = -64; i >= 19; i--, off += 8)
5750 emit_move_insn (gen_rtx_MEM (word_mode,
5751 plus_constant (arg_pointer_rtx, off)),
5752 gen_rtx_REG (word_mode, i));
5754 /* The incoming args pointer points just beyond the flushback area;
5755 normally this is not a serious concern. However, when we are doing
5756 varargs/stdargs we want to make the arg pointer point to the start
5757 of the incoming argument area. */
5758 emit_move_insn (virtual_incoming_args_rtx,
5759 plus_constant (arg_pointer_rtx, -64));
5761 /* Now return a pointer to the first anonymous argument. */
5762 return copy_to_reg (expand_binop (Pmode, add_optab,
5763 virtual_incoming_args_rtx,
5764 offset, 0, 0, OPTAB_LIB_WIDEN));
5767 /* Store general registers on the stack. */
5768 dest = gen_rtx_MEM (BLKmode,
5769 plus_constant (current_function_internal_arg_pointer,
5770 -16));
5771 set_mem_alias_set (dest, get_varargs_alias_set ());
5772 set_mem_align (dest, BITS_PER_WORD);
5773 move_block_from_reg (23, dest, 4);
5775 /* move_block_from_reg will emit code to store the argument registers
5776 individually as scalar stores.
5778 However, other insns may later load from the same addresses for
5779 a structure load (passing a struct to a varargs routine).
5781 The alias code assumes that such aliasing can never happen, so we
5782 have to keep memory referencing insns from moving up beyond the
5783 last argument register store. So we emit a blockage insn here. */
5784 emit_insn (gen_blockage ());
5786 return copy_to_reg (expand_binop (Pmode, add_optab,
5787 current_function_internal_arg_pointer,
5788 offset, 0, 0, OPTAB_LIB_WIDEN));
5791 void
5792 hppa_va_start (tree valist, rtx nextarg)
5794 nextarg = expand_builtin_saveregs ();
5795 std_expand_builtin_va_start (valist, nextarg);
5798 static tree
5799 hppa_gimplify_va_arg_expr (tree valist, tree type, tree *pre_p, tree *post_p)
5801 if (TARGET_64BIT)
5803 /* Args grow upward. We can use the generic routines. */
5804 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
5806 else /* !TARGET_64BIT */
5808 tree ptr = build_pointer_type (type);
5809 tree valist_type;
5810 tree t, u;
5811 unsigned int size, ofs;
5812 bool indirect;
5814 indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0);
5815 if (indirect)
5817 type = ptr;
5818 ptr = build_pointer_type (type);
5820 size = int_size_in_bytes (type);
5821 valist_type = TREE_TYPE (valist);
5823 /* Args grow down. Not handled by generic routines. */
5825 u = fold_convert (valist_type, size_in_bytes (type));
5826 t = build (MINUS_EXPR, valist_type, valist, u);
5828 /* Copied from va-pa.h, but we probably don't need to align to
5829 word size, since we generate and preserve that invariant. */
5830 u = build_int_cst (valist_type, (size > 4 ? -8 : -4));
5831 t = build (BIT_AND_EXPR, valist_type, t, u);
5833 t = build (MODIFY_EXPR, valist_type, valist, t);
5835 ofs = (8 - size) % 4;
5836 if (ofs != 0)
5838 u = fold_convert (valist_type, size_int (ofs));
5839 t = build (PLUS_EXPR, valist_type, t, u);
5842 t = fold_convert (ptr, t);
5843 t = build_va_arg_indirect_ref (t);
5845 if (indirect)
5846 t = build_va_arg_indirect_ref (t);
5848 return t;
5852 /* True if MODE is valid for the target. By "valid", we mean able to
5853 be manipulated in non-trivial ways. In particular, this means all
5854 the arithmetic is supported.
5856 Currently, TImode is not valid as the HP 64-bit runtime documentation
5857 doesn't document the alignment and calling conventions for this type.
5858 Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
5859 2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE. */
5861 static bool
5862 pa_scalar_mode_supported_p (enum machine_mode mode)
5864 int precision = GET_MODE_PRECISION (mode);
5866 switch (GET_MODE_CLASS (mode))
5868 case MODE_PARTIAL_INT:
5869 case MODE_INT:
5870 if (precision == CHAR_TYPE_SIZE)
5871 return true;
5872 if (precision == SHORT_TYPE_SIZE)
5873 return true;
5874 if (precision == INT_TYPE_SIZE)
5875 return true;
5876 if (precision == LONG_TYPE_SIZE)
5877 return true;
5878 if (precision == LONG_LONG_TYPE_SIZE)
5879 return true;
5880 return false;
5882 case MODE_FLOAT:
5883 if (precision == FLOAT_TYPE_SIZE)
5884 return true;
5885 if (precision == DOUBLE_TYPE_SIZE)
5886 return true;
5887 if (precision == LONG_DOUBLE_TYPE_SIZE)
5888 return true;
5889 return false;
5891 default:
5892 gcc_unreachable ();
5896 /* This routine handles all the normal conditional branch sequences we
5897 might need to generate. It handles compare immediate vs compare
5898 register, nullification of delay slots, varying length branches,
5899 negated branches, and all combinations of the above. It returns the
5900 output appropriate to emit the branch corresponding to all given
5901 parameters. */
5903 const char *
5904 output_cbranch (rtx *operands, int nullify, int length, int negated, rtx insn)
5906 static char buf[100];
5907 int useskip = 0;
5908 rtx xoperands[5];
5910 /* A conditional branch to the following instruction (e.g. the delay slot)
5911 is asking for a disaster. This can happen when not optimizing and
5912 when jump optimization fails.
5914 While it is usually safe to emit nothing, this can fail if the
5915 preceding instruction is a nullified branch with an empty delay
5916 slot and the same branch target as this branch. We could check
5917 for this but jump optimization should eliminate nop jumps. It
5918 is always safe to emit a nop. */
5919 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
5920 return "nop";
5922 /* The doubleword form of the cmpib instruction doesn't have the LEU
5923 and GTU conditions while the cmpb instruction does. Since we accept
5924 zero for cmpb, we must ensure that we use cmpb for the comparison. */
5925 if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
5926 operands[2] = gen_rtx_REG (DImode, 0);
5927 if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
5928 operands[1] = gen_rtx_REG (DImode, 0);
5930 /* If this is a long branch with its delay slot unfilled, set `nullify'
5931 as it can nullify the delay slot and save a nop. */
5932 if (length == 8 && dbr_sequence_length () == 0)
5933 nullify = 1;
5935 /* If this is a short forward conditional branch which did not get
5936 its delay slot filled, the delay slot can still be nullified. */
5937 if (! nullify && length == 4 && dbr_sequence_length () == 0)
5938 nullify = forward_branch_p (insn);
5940 /* A forward branch over a single nullified insn can be done with a
5941 comclr instruction. This avoids a single cycle penalty due to
5942 mis-predicted branch if we fall through (branch not taken). */
5943 if (length == 4
5944 && next_real_insn (insn) != 0
5945 && get_attr_length (next_real_insn (insn)) == 4
5946 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
5947 && nullify)
5948 useskip = 1;
5950 switch (length)
5952 /* All short conditional branches except backwards with an unfilled
5953 delay slot. */
5954 case 4:
5955 if (useskip)
5956 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
5957 else
5958 strcpy (buf, "{com%I2b,|cmp%I2b,}");
5959 if (GET_MODE (operands[1]) == DImode)
5960 strcat (buf, "*");
5961 if (negated)
5962 strcat (buf, "%B3");
5963 else
5964 strcat (buf, "%S3");
5965 if (useskip)
5966 strcat (buf, " %2,%r1,%%r0");
5967 else if (nullify)
5968 strcat (buf, ",n %2,%r1,%0");
5969 else
5970 strcat (buf, " %2,%r1,%0");
5971 break;
5973 /* All long conditionals. Note a short backward branch with an
5974 unfilled delay slot is treated just like a long backward branch
5975 with an unfilled delay slot. */
5976 case 8:
5977 /* Handle weird backwards branch with a filled delay slot
5978 with is nullified. */
5979 if (dbr_sequence_length () != 0
5980 && ! forward_branch_p (insn)
5981 && nullify)
5983 strcpy (buf, "{com%I2b,|cmp%I2b,}");
5984 if (GET_MODE (operands[1]) == DImode)
5985 strcat (buf, "*");
5986 if (negated)
5987 strcat (buf, "%S3");
5988 else
5989 strcat (buf, "%B3");
5990 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
5992 /* Handle short backwards branch with an unfilled delay slot.
5993 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
5994 taken and untaken branches. */
5995 else if (dbr_sequence_length () == 0
5996 && ! forward_branch_p (insn)
5997 && INSN_ADDRESSES_SET_P ()
5998 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
5999 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6001 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6002 if (GET_MODE (operands[1]) == DImode)
6003 strcat (buf, "*");
6004 if (negated)
6005 strcat (buf, "%B3 %2,%r1,%0%#");
6006 else
6007 strcat (buf, "%S3 %2,%r1,%0%#");
6009 else
6011 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6012 if (GET_MODE (operands[1]) == DImode)
6013 strcat (buf, "*");
6014 if (negated)
6015 strcat (buf, "%S3");
6016 else
6017 strcat (buf, "%B3");
6018 if (nullify)
6019 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6020 else
6021 strcat (buf, " %2,%r1,%%r0\n\tb %0");
6023 break;
6025 case 20:
6026 case 28:
6027 xoperands[0] = operands[0];
6028 xoperands[1] = operands[1];
6029 xoperands[2] = operands[2];
6030 xoperands[3] = operands[3];
6032 /* The reversed conditional branch must branch over one additional
6033 instruction if the delay slot is filled. If the delay slot
6034 is empty, the instruction after the reversed condition branch
6035 must be nullified. */
6036 nullify = dbr_sequence_length () == 0;
6037 xoperands[4] = nullify ? GEN_INT (length) : GEN_INT (length + 4);
6039 /* Create a reversed conditional branch which branches around
6040 the following insns. */
6041 if (GET_MODE (operands[1]) != DImode)
6043 if (nullify)
6045 if (negated)
6046 strcpy (buf,
6047 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6048 else
6049 strcpy (buf,
6050 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6052 else
6054 if (negated)
6055 strcpy (buf,
6056 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6057 else
6058 strcpy (buf,
6059 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6062 else
6064 if (nullify)
6066 if (negated)
6067 strcpy (buf,
6068 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6069 else
6070 strcpy (buf,
6071 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6073 else
6075 if (negated)
6076 strcpy (buf,
6077 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6078 else
6079 strcpy (buf,
6080 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6084 output_asm_insn (buf, xoperands);
6085 return output_lbranch (operands[0], insn);
6087 default:
6088 gcc_unreachable ();
6090 return buf;
6093 /* This routine handles long unconditional branches that exceed the
6094 maximum range of a simple branch instruction. */
6096 const char *
6097 output_lbranch (rtx dest, rtx insn)
6099 rtx xoperands[2];
6101 xoperands[0] = dest;
6103 /* First, free up the delay slot. */
6104 if (dbr_sequence_length () != 0)
6106 /* We can't handle a jump in the delay slot. */
6107 gcc_assert (GET_CODE (NEXT_INSN (insn)) != JUMP_INSN);
6109 final_scan_insn (NEXT_INSN (insn), asm_out_file,
6110 optimize, 0, NULL);
6112 /* Now delete the delay insn. */
6113 PUT_CODE (NEXT_INSN (insn), NOTE);
6114 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
6115 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
6118 /* Output an insn to save %r1. The runtime documentation doesn't
6119 specify whether the "Clean Up" slot in the callers frame can
6120 be clobbered by the callee. It isn't copied by HP's builtin
6121 alloca, so this suggests that it can be clobbered if necessary.
6122 The "Static Link" location is copied by HP builtin alloca, so
6123 we avoid using it. Using the cleanup slot might be a problem
6124 if we have to interoperate with languages that pass cleanup
6125 information. However, it should be possible to handle these
6126 situations with GCC's asm feature.
6128 The "Current RP" slot is reserved for the called procedure, so
6129 we try to use it when we don't have a frame of our own. It's
6130 rather unlikely that we won't have a frame when we need to emit
6131 a very long branch.
6133 Really the way to go long term is a register scavenger; goto
6134 the target of the jump and find a register which we can use
6135 as a scratch to hold the value in %r1. Then, we wouldn't have
6136 to free up the delay slot or clobber a slot that may be needed
6137 for other purposes. */
6138 if (TARGET_64BIT)
6140 if (actual_fsize == 0 && !regs_ever_live[2])
6141 /* Use the return pointer slot in the frame marker. */
6142 output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
6143 else
6144 /* Use the slot at -40 in the frame marker since HP builtin
6145 alloca doesn't copy it. */
6146 output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
6148 else
6150 if (actual_fsize == 0 && !regs_ever_live[2])
6151 /* Use the return pointer slot in the frame marker. */
6152 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
6153 else
6154 /* Use the "Clean Up" slot in the frame marker. In GCC,
6155 the only other use of this location is for copying a
6156 floating point double argument from a floating-point
6157 register to two general registers. The copy is done
6158 as an "atomic" operation when outputting a call, so it
6159 won't interfere with our using the location here. */
6160 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
6163 if (TARGET_PORTABLE_RUNTIME)
6165 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6166 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6167 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6169 else if (flag_pic)
6171 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6172 if (TARGET_SOM || !TARGET_GAS)
6174 xoperands[1] = gen_label_rtx ();
6175 output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands);
6176 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6177 CODE_LABEL_NUMBER (xoperands[1]));
6178 output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands);
6180 else
6182 output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands);
6183 output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
6185 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6187 else
6188 /* Now output a very long branch to the original target. */
6189 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
6191 /* Now restore the value of %r1 in the delay slot. */
6192 if (TARGET_64BIT)
6194 if (actual_fsize == 0 && !regs_ever_live[2])
6195 return "ldd -16(%%r30),%%r1";
6196 else
6197 return "ldd -40(%%r30),%%r1";
6199 else
6201 if (actual_fsize == 0 && !regs_ever_live[2])
6202 return "ldw -20(%%r30),%%r1";
6203 else
6204 return "ldw -12(%%r30),%%r1";
6208 /* This routine handles all the branch-on-bit conditional branch sequences we
6209 might need to generate. It handles nullification of delay slots,
6210 varying length branches, negated branches and all combinations of the
6211 above. it returns the appropriate output template to emit the branch. */
6213 const char *
6214 output_bb (rtx *operands ATTRIBUTE_UNUSED, int nullify, int length,
6215 int negated, rtx insn, int which)
6217 static char buf[100];
6218 int useskip = 0;
6220 /* A conditional branch to the following instruction (e.g. the delay slot) is
6221 asking for a disaster. I do not think this can happen as this pattern
6222 is only used when optimizing; jump optimization should eliminate the
6223 jump. But be prepared just in case. */
6225 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6226 return "nop";
6228 /* If this is a long branch with its delay slot unfilled, set `nullify'
6229 as it can nullify the delay slot and save a nop. */
6230 if (length == 8 && dbr_sequence_length () == 0)
6231 nullify = 1;
6233 /* If this is a short forward conditional branch which did not get
6234 its delay slot filled, the delay slot can still be nullified. */
6235 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6236 nullify = forward_branch_p (insn);
6238 /* A forward branch over a single nullified insn can be done with a
6239 extrs instruction. This avoids a single cycle penalty due to
6240 mis-predicted branch if we fall through (branch not taken). */
6242 if (length == 4
6243 && next_real_insn (insn) != 0
6244 && get_attr_length (next_real_insn (insn)) == 4
6245 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6246 && nullify)
6247 useskip = 1;
6249 switch (length)
6252 /* All short conditional branches except backwards with an unfilled
6253 delay slot. */
6254 case 4:
6255 if (useskip)
6256 strcpy (buf, "{extrs,|extrw,s,}");
6257 else
6258 strcpy (buf, "bb,");
6259 if (useskip && GET_MODE (operands[0]) == DImode)
6260 strcpy (buf, "extrd,s,*");
6261 else if (GET_MODE (operands[0]) == DImode)
6262 strcpy (buf, "bb,*");
6263 if ((which == 0 && negated)
6264 || (which == 1 && ! negated))
6265 strcat (buf, ">=");
6266 else
6267 strcat (buf, "<");
6268 if (useskip)
6269 strcat (buf, " %0,%1,1,%%r0");
6270 else if (nullify && negated)
6271 strcat (buf, ",n %0,%1,%3");
6272 else if (nullify && ! negated)
6273 strcat (buf, ",n %0,%1,%2");
6274 else if (! nullify && negated)
6275 strcat (buf, "%0,%1,%3");
6276 else if (! nullify && ! negated)
6277 strcat (buf, " %0,%1,%2");
6278 break;
6280 /* All long conditionals. Note a short backward branch with an
6281 unfilled delay slot is treated just like a long backward branch
6282 with an unfilled delay slot. */
6283 case 8:
6284 /* Handle weird backwards branch with a filled delay slot
6285 with is nullified. */
6286 if (dbr_sequence_length () != 0
6287 && ! forward_branch_p (insn)
6288 && nullify)
6290 strcpy (buf, "bb,");
6291 if (GET_MODE (operands[0]) == DImode)
6292 strcat (buf, "*");
6293 if ((which == 0 && negated)
6294 || (which == 1 && ! negated))
6295 strcat (buf, "<");
6296 else
6297 strcat (buf, ">=");
6298 if (negated)
6299 strcat (buf, ",n %0,%1,.+12\n\tb %3");
6300 else
6301 strcat (buf, ",n %0,%1,.+12\n\tb %2");
6303 /* Handle short backwards branch with an unfilled delay slot.
6304 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6305 taken and untaken branches. */
6306 else if (dbr_sequence_length () == 0
6307 && ! forward_branch_p (insn)
6308 && INSN_ADDRESSES_SET_P ()
6309 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6310 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6312 strcpy (buf, "bb,");
6313 if (GET_MODE (operands[0]) == DImode)
6314 strcat (buf, "*");
6315 if ((which == 0 && negated)
6316 || (which == 1 && ! negated))
6317 strcat (buf, ">=");
6318 else
6319 strcat (buf, "<");
6320 if (negated)
6321 strcat (buf, " %0,%1,%3%#");
6322 else
6323 strcat (buf, " %0,%1,%2%#");
6325 else
6327 strcpy (buf, "{extrs,|extrw,s,}");
6328 if (GET_MODE (operands[0]) == DImode)
6329 strcpy (buf, "extrd,s,*");
6330 if ((which == 0 && negated)
6331 || (which == 1 && ! negated))
6332 strcat (buf, "<");
6333 else
6334 strcat (buf, ">=");
6335 if (nullify && negated)
6336 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
6337 else if (nullify && ! negated)
6338 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
6339 else if (negated)
6340 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
6341 else
6342 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
6344 break;
6346 default:
6347 gcc_unreachable ();
6349 return buf;
6352 /* This routine handles all the branch-on-variable-bit conditional branch
6353 sequences we might need to generate. It handles nullification of delay
6354 slots, varying length branches, negated branches and all combinations
6355 of the above. it returns the appropriate output template to emit the
6356 branch. */
6358 const char *
6359 output_bvb (rtx *operands ATTRIBUTE_UNUSED, int nullify, int length,
6360 int negated, rtx insn, int which)
6362 static char buf[100];
6363 int useskip = 0;
6365 /* A conditional branch to the following instruction (e.g. the delay slot) is
6366 asking for a disaster. I do not think this can happen as this pattern
6367 is only used when optimizing; jump optimization should eliminate the
6368 jump. But be prepared just in case. */
6370 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6371 return "nop";
6373 /* If this is a long branch with its delay slot unfilled, set `nullify'
6374 as it can nullify the delay slot and save a nop. */
6375 if (length == 8 && dbr_sequence_length () == 0)
6376 nullify = 1;
6378 /* If this is a short forward conditional branch which did not get
6379 its delay slot filled, the delay slot can still be nullified. */
6380 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6381 nullify = forward_branch_p (insn);
6383 /* A forward branch over a single nullified insn can be done with a
6384 extrs instruction. This avoids a single cycle penalty due to
6385 mis-predicted branch if we fall through (branch not taken). */
6387 if (length == 4
6388 && next_real_insn (insn) != 0
6389 && get_attr_length (next_real_insn (insn)) == 4
6390 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6391 && nullify)
6392 useskip = 1;
6394 switch (length)
6397 /* All short conditional branches except backwards with an unfilled
6398 delay slot. */
6399 case 4:
6400 if (useskip)
6401 strcpy (buf, "{vextrs,|extrw,s,}");
6402 else
6403 strcpy (buf, "{bvb,|bb,}");
6404 if (useskip && GET_MODE (operands[0]) == DImode)
6405 strcpy (buf, "extrd,s,*");
6406 else if (GET_MODE (operands[0]) == DImode)
6407 strcpy (buf, "bb,*");
6408 if ((which == 0 && negated)
6409 || (which == 1 && ! negated))
6410 strcat (buf, ">=");
6411 else
6412 strcat (buf, "<");
6413 if (useskip)
6414 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
6415 else if (nullify && negated)
6416 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
6417 else if (nullify && ! negated)
6418 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
6419 else if (! nullify && negated)
6420 strcat (buf, "{%0,%3|%0,%%sar,%3}");
6421 else if (! nullify && ! negated)
6422 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
6423 break;
6425 /* All long conditionals. Note a short backward branch with an
6426 unfilled delay slot is treated just like a long backward branch
6427 with an unfilled delay slot. */
6428 case 8:
6429 /* Handle weird backwards branch with a filled delay slot
6430 with is nullified. */
6431 if (dbr_sequence_length () != 0
6432 && ! forward_branch_p (insn)
6433 && nullify)
6435 strcpy (buf, "{bvb,|bb,}");
6436 if (GET_MODE (operands[0]) == DImode)
6437 strcat (buf, "*");
6438 if ((which == 0 && negated)
6439 || (which == 1 && ! negated))
6440 strcat (buf, "<");
6441 else
6442 strcat (buf, ">=");
6443 if (negated)
6444 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
6445 else
6446 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
6448 /* Handle short backwards branch with an unfilled delay slot.
6449 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6450 taken and untaken branches. */
6451 else if (dbr_sequence_length () == 0
6452 && ! forward_branch_p (insn)
6453 && INSN_ADDRESSES_SET_P ()
6454 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6455 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6457 strcpy (buf, "{bvb,|bb,}");
6458 if (GET_MODE (operands[0]) == DImode)
6459 strcat (buf, "*");
6460 if ((which == 0 && negated)
6461 || (which == 1 && ! negated))
6462 strcat (buf, ">=");
6463 else
6464 strcat (buf, "<");
6465 if (negated)
6466 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
6467 else
6468 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
6470 else
6472 strcpy (buf, "{vextrs,|extrw,s,}");
6473 if (GET_MODE (operands[0]) == DImode)
6474 strcpy (buf, "extrd,s,*");
6475 if ((which == 0 && negated)
6476 || (which == 1 && ! negated))
6477 strcat (buf, "<");
6478 else
6479 strcat (buf, ">=");
6480 if (nullify && negated)
6481 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
6482 else if (nullify && ! negated)
6483 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
6484 else if (negated)
6485 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
6486 else
6487 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
6489 break;
6491 default:
6492 gcc_unreachable ();
6494 return buf;
6497 /* Return the output template for emitting a dbra type insn.
6499 Note it may perform some output operations on its own before
6500 returning the final output string. */
6501 const char *
6502 output_dbra (rtx *operands, rtx insn, int which_alternative)
6505 /* A conditional branch to the following instruction (e.g. the delay slot) is
6506 asking for a disaster. Be prepared! */
6508 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6510 if (which_alternative == 0)
6511 return "ldo %1(%0),%0";
6512 else if (which_alternative == 1)
6514 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
6515 output_asm_insn ("ldw -16(%%r30),%4", operands);
6516 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6517 return "{fldws|fldw} -16(%%r30),%0";
6519 else
6521 output_asm_insn ("ldw %0,%4", operands);
6522 return "ldo %1(%4),%4\n\tstw %4,%0";
6526 if (which_alternative == 0)
6528 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6529 int length = get_attr_length (insn);
6531 /* If this is a long branch with its delay slot unfilled, set `nullify'
6532 as it can nullify the delay slot and save a nop. */
6533 if (length == 8 && dbr_sequence_length () == 0)
6534 nullify = 1;
6536 /* If this is a short forward conditional branch which did not get
6537 its delay slot filled, the delay slot can still be nullified. */
6538 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6539 nullify = forward_branch_p (insn);
6541 switch (length)
6543 case 4:
6544 if (nullify)
6545 return "addib,%C2,n %1,%0,%3";
6546 else
6547 return "addib,%C2 %1,%0,%3";
6549 case 8:
6550 /* Handle weird backwards branch with a fulled delay slot
6551 which is nullified. */
6552 if (dbr_sequence_length () != 0
6553 && ! forward_branch_p (insn)
6554 && nullify)
6555 return "addib,%N2,n %1,%0,.+12\n\tb %3";
6556 /* Handle short backwards branch with an unfilled delay slot.
6557 Using a addb;nop rather than addi;bl saves 1 cycle for both
6558 taken and untaken branches. */
6559 else if (dbr_sequence_length () == 0
6560 && ! forward_branch_p (insn)
6561 && INSN_ADDRESSES_SET_P ()
6562 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6563 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6564 return "addib,%C2 %1,%0,%3%#";
6566 /* Handle normal cases. */
6567 if (nullify)
6568 return "addi,%N2 %1,%0,%0\n\tb,n %3";
6569 else
6570 return "addi,%N2 %1,%0,%0\n\tb %3";
6572 default:
6573 gcc_unreachable ();
6577 /* Deal with gross reload from FP register case. */
6578 else if (which_alternative == 1)
6580 /* Move loop counter from FP register to MEM then into a GR,
6581 increment the GR, store the GR into MEM, and finally reload
6582 the FP register from MEM from within the branch's delay slot. */
6583 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
6584 operands);
6585 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6586 if (get_attr_length (insn) == 24)
6587 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
6588 else
6589 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
6591 /* Deal with gross reload from memory case. */
6592 else
6594 /* Reload loop counter from memory, the store back to memory
6595 happens in the branch's delay slot. */
6596 output_asm_insn ("ldw %0,%4", operands);
6597 if (get_attr_length (insn) == 12)
6598 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
6599 else
6600 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
6604 /* Return the output template for emitting a dbra type insn.
6606 Note it may perform some output operations on its own before
6607 returning the final output string. */
6608 const char *
6609 output_movb (rtx *operands, rtx insn, int which_alternative,
6610 int reverse_comparison)
6613 /* A conditional branch to the following instruction (e.g. the delay slot) is
6614 asking for a disaster. Be prepared! */
6616 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6618 if (which_alternative == 0)
6619 return "copy %1,%0";
6620 else if (which_alternative == 1)
6622 output_asm_insn ("stw %1,-16(%%r30)", operands);
6623 return "{fldws|fldw} -16(%%r30),%0";
6625 else if (which_alternative == 2)
6626 return "stw %1,%0";
6627 else
6628 return "mtsar %r1";
6631 /* Support the second variant. */
6632 if (reverse_comparison)
6633 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
6635 if (which_alternative == 0)
6637 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6638 int length = get_attr_length (insn);
6640 /* If this is a long branch with its delay slot unfilled, set `nullify'
6641 as it can nullify the delay slot and save a nop. */
6642 if (length == 8 && dbr_sequence_length () == 0)
6643 nullify = 1;
6645 /* If this is a short forward conditional branch which did not get
6646 its delay slot filled, the delay slot can still be nullified. */
6647 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6648 nullify = forward_branch_p (insn);
6650 switch (length)
6652 case 4:
6653 if (nullify)
6654 return "movb,%C2,n %1,%0,%3";
6655 else
6656 return "movb,%C2 %1,%0,%3";
6658 case 8:
6659 /* Handle weird backwards branch with a filled delay slot
6660 which is nullified. */
6661 if (dbr_sequence_length () != 0
6662 && ! forward_branch_p (insn)
6663 && nullify)
6664 return "movb,%N2,n %1,%0,.+12\n\tb %3";
6666 /* Handle short backwards branch with an unfilled delay slot.
6667 Using a movb;nop rather than or;bl saves 1 cycle for both
6668 taken and untaken branches. */
6669 else if (dbr_sequence_length () == 0
6670 && ! forward_branch_p (insn)
6671 && INSN_ADDRESSES_SET_P ()
6672 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6673 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6674 return "movb,%C2 %1,%0,%3%#";
6675 /* Handle normal cases. */
6676 if (nullify)
6677 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
6678 else
6679 return "or,%N2 %1,%%r0,%0\n\tb %3";
6681 default:
6682 gcc_unreachable ();
6685 /* Deal with gross reload from FP register case. */
6686 else if (which_alternative == 1)
6688 /* Move loop counter from FP register to MEM then into a GR,
6689 increment the GR, store the GR into MEM, and finally reload
6690 the FP register from MEM from within the branch's delay slot. */
6691 output_asm_insn ("stw %1,-16(%%r30)", operands);
6692 if (get_attr_length (insn) == 12)
6693 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
6694 else
6695 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
6697 /* Deal with gross reload from memory case. */
6698 else if (which_alternative == 2)
6700 /* Reload loop counter from memory, the store back to memory
6701 happens in the branch's delay slot. */
6702 if (get_attr_length (insn) == 8)
6703 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
6704 else
6705 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
6707 /* Handle SAR as a destination. */
6708 else
6710 if (get_attr_length (insn) == 8)
6711 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
6712 else
6713 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
6717 /* Copy any FP arguments in INSN into integer registers. */
6718 static void
6719 copy_fp_args (rtx insn)
6721 rtx link;
6722 rtx xoperands[2];
6724 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
6726 int arg_mode, regno;
6727 rtx use = XEXP (link, 0);
6729 if (! (GET_CODE (use) == USE
6730 && GET_CODE (XEXP (use, 0)) == REG
6731 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
6732 continue;
6734 arg_mode = GET_MODE (XEXP (use, 0));
6735 regno = REGNO (XEXP (use, 0));
6737 /* Is it a floating point register? */
6738 if (regno >= 32 && regno <= 39)
6740 /* Copy the FP register into an integer register via memory. */
6741 if (arg_mode == SFmode)
6743 xoperands[0] = XEXP (use, 0);
6744 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
6745 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
6746 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
6748 else
6750 xoperands[0] = XEXP (use, 0);
6751 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
6752 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
6753 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
6754 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
6760 /* Compute length of the FP argument copy sequence for INSN. */
6761 static int
6762 length_fp_args (rtx insn)
6764 int length = 0;
6765 rtx link;
6767 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
6769 int arg_mode, regno;
6770 rtx use = XEXP (link, 0);
6772 if (! (GET_CODE (use) == USE
6773 && GET_CODE (XEXP (use, 0)) == REG
6774 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
6775 continue;
6777 arg_mode = GET_MODE (XEXP (use, 0));
6778 regno = REGNO (XEXP (use, 0));
6780 /* Is it a floating point register? */
6781 if (regno >= 32 && regno <= 39)
6783 if (arg_mode == SFmode)
6784 length += 8;
6785 else
6786 length += 12;
6790 return length;
6793 /* Return the attribute length for the millicode call instruction INSN.
6794 The length must match the code generated by output_millicode_call.
6795 We include the delay slot in the returned length as it is better to
6796 over estimate the length than to under estimate it. */
6799 attr_length_millicode_call (rtx insn)
6801 unsigned long distance = -1;
6802 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
6804 if (INSN_ADDRESSES_SET_P ())
6806 distance = (total + insn_current_reference_address (insn));
6807 if (distance < total)
6808 distance = -1;
6811 if (TARGET_64BIT)
6813 if (!TARGET_LONG_CALLS && distance < 7600000)
6814 return 8;
6816 return 20;
6818 else if (TARGET_PORTABLE_RUNTIME)
6819 return 24;
6820 else
6822 if (!TARGET_LONG_CALLS && distance < 240000)
6823 return 8;
6825 if (TARGET_LONG_ABS_CALL && !flag_pic)
6826 return 12;
6828 return 24;
6832 /* INSN is a function call. It may have an unconditional jump
6833 in its delay slot.
6835 CALL_DEST is the routine we are calling. */
6837 const char *
6838 output_millicode_call (rtx insn, rtx call_dest)
6840 int attr_length = get_attr_length (insn);
6841 int seq_length = dbr_sequence_length ();
6842 int distance;
6843 rtx seq_insn;
6844 rtx xoperands[3];
6846 xoperands[0] = call_dest;
6847 xoperands[2] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
6849 /* Handle the common case where we are sure that the branch will
6850 reach the beginning of the $CODE$ subspace. The within reach
6851 form of the $$sh_func_adrs call has a length of 28. Because
6852 it has an attribute type of multi, it never has a nonzero
6853 sequence length. The length of the $$sh_func_adrs is the same
6854 as certain out of reach PIC calls to other routines. */
6855 if (!TARGET_LONG_CALLS
6856 && ((seq_length == 0
6857 && (attr_length == 12
6858 || (attr_length == 28 && get_attr_type (insn) == TYPE_MULTI)))
6859 || (seq_length != 0 && attr_length == 8)))
6861 output_asm_insn ("{bl|b,l} %0,%2", xoperands);
6863 else
6865 if (TARGET_64BIT)
6867 /* It might seem that one insn could be saved by accessing
6868 the millicode function using the linkage table. However,
6869 this doesn't work in shared libraries and other dynamically
6870 loaded objects. Using a pc-relative sequence also avoids
6871 problems related to the implicit use of the gp register. */
6872 output_asm_insn ("b,l .+8,%%r1", xoperands);
6874 if (TARGET_GAS)
6876 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
6877 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
6879 else
6881 xoperands[1] = gen_label_rtx ();
6882 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
6883 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6884 CODE_LABEL_NUMBER (xoperands[1]));
6885 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
6888 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
6890 else if (TARGET_PORTABLE_RUNTIME)
6892 /* Pure portable runtime doesn't allow be/ble; we also don't
6893 have PIC support in the assembler/linker, so this sequence
6894 is needed. */
6896 /* Get the address of our target into %r1. */
6897 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6898 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6900 /* Get our return address into %r31. */
6901 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
6902 output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
6904 /* Jump to our target address in %r1. */
6905 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6907 else if (!flag_pic)
6909 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6910 if (TARGET_PA_20)
6911 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
6912 else
6913 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
6915 else
6917 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6918 output_asm_insn ("addi 16,%%r1,%%r31", xoperands);
6920 if (TARGET_SOM || !TARGET_GAS)
6922 /* The HP assembler can generate relocations for the
6923 difference of two symbols. GAS can do this for a
6924 millicode symbol but not an arbitrary external
6925 symbol when generating SOM output. */
6926 xoperands[1] = gen_label_rtx ();
6927 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6928 CODE_LABEL_NUMBER (xoperands[1]));
6929 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
6930 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
6932 else
6934 output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands);
6935 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1",
6936 xoperands);
6939 /* Jump to our target address in %r1. */
6940 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6944 if (seq_length == 0)
6945 output_asm_insn ("nop", xoperands);
6947 /* We are done if there isn't a jump in the delay slot. */
6948 if (seq_length == 0 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
6949 return "";
6951 /* This call has an unconditional jump in its delay slot. */
6952 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
6954 /* See if the return address can be adjusted. Use the containing
6955 sequence insn's address. */
6956 if (INSN_ADDRESSES_SET_P ())
6958 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
6959 distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
6960 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
6962 if (VAL_14_BITS_P (distance))
6964 xoperands[1] = gen_label_rtx ();
6965 output_asm_insn ("ldo %0-%1(%2),%2", xoperands);
6966 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6967 CODE_LABEL_NUMBER (xoperands[1]));
6969 else
6970 /* ??? This branch may not reach its target. */
6971 output_asm_insn ("nop\n\tb,n %0", xoperands);
6973 else
6974 /* ??? This branch may not reach its target. */
6975 output_asm_insn ("nop\n\tb,n %0", xoperands);
6977 /* Delete the jump. */
6978 PUT_CODE (NEXT_INSN (insn), NOTE);
6979 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
6980 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
6982 return "";
6985 /* Return the attribute length of the call instruction INSN. The SIBCALL
6986 flag indicates whether INSN is a regular call or a sibling call. The
6987 length returned must be longer than the code actually generated by
6988 output_call. Since branch shortening is done before delay branch
6989 sequencing, there is no way to determine whether or not the delay
6990 slot will be filled during branch shortening. Even when the delay
6991 slot is filled, we may have to add a nop if the delay slot contains
6992 a branch that can't reach its target. Thus, we always have to include
6993 the delay slot in the length estimate. This used to be done in
6994 pa_adjust_insn_length but we do it here now as some sequences always
6995 fill the delay slot and we can save four bytes in the estimate for
6996 these sequences. */
6999 attr_length_call (rtx insn, int sibcall)
7001 int local_call;
7002 rtx call_dest;
7003 tree call_decl;
7004 int length = 0;
7005 rtx pat = PATTERN (insn);
7006 unsigned long distance = -1;
7008 if (INSN_ADDRESSES_SET_P ())
7010 unsigned long total;
7012 total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7013 distance = (total + insn_current_reference_address (insn));
7014 if (distance < total)
7015 distance = -1;
7018 /* Determine if this is a local call. */
7019 if (GET_CODE (XVECEXP (pat, 0, 0)) == CALL)
7020 call_dest = XEXP (XEXP (XVECEXP (pat, 0, 0), 0), 0);
7021 else
7022 call_dest = XEXP (XEXP (XEXP (XVECEXP (pat, 0, 0), 1), 0), 0);
7024 call_decl = SYMBOL_REF_DECL (call_dest);
7025 local_call = call_decl && (*targetm.binds_local_p) (call_decl);
7027 /* pc-relative branch. */
7028 if (!TARGET_LONG_CALLS
7029 && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7030 || distance < 240000))
7031 length += 8;
7033 /* 64-bit plabel sequence. */
7034 else if (TARGET_64BIT && !local_call)
7035 length += sibcall ? 28 : 24;
7037 /* non-pic long absolute branch sequence. */
7038 else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7039 length += 12;
7041 /* long pc-relative branch sequence. */
7042 else if ((TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
7043 || (TARGET_64BIT && !TARGET_GAS)
7044 || (TARGET_GAS && (TARGET_LONG_PIC_PCREL_CALL || local_call)))
7046 length += 20;
7048 if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS)
7049 length += 8;
7052 /* 32-bit plabel sequence. */
7053 else
7055 length += 32;
7057 if (TARGET_SOM)
7058 length += length_fp_args (insn);
7060 if (flag_pic)
7061 length += 4;
7063 if (!TARGET_PA_20)
7065 if (!sibcall)
7066 length += 8;
7068 if (!TARGET_NO_SPACE_REGS)
7069 length += 8;
7073 return length;
7076 /* INSN is a function call. It may have an unconditional jump
7077 in its delay slot.
7079 CALL_DEST is the routine we are calling. */
7081 const char *
7082 output_call (rtx insn, rtx call_dest, int sibcall)
7084 int delay_insn_deleted = 0;
7085 int delay_slot_filled = 0;
7086 int seq_length = dbr_sequence_length ();
7087 tree call_decl = SYMBOL_REF_DECL (call_dest);
7088 int local_call = call_decl && (*targetm.binds_local_p) (call_decl);
7089 rtx xoperands[2];
7091 xoperands[0] = call_dest;
7093 /* Handle the common case where we're sure that the branch will reach
7094 the beginning of the "$CODE$" subspace. This is the beginning of
7095 the current function if we are in a named section. */
7096 if (!TARGET_LONG_CALLS && attr_length_call (insn, sibcall) == 8)
7098 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
7099 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7101 else
7103 if (TARGET_64BIT && !local_call)
7105 /* ??? As far as I can tell, the HP linker doesn't support the
7106 long pc-relative sequence described in the 64-bit runtime
7107 architecture. So, we use a slightly longer indirect call. */
7108 xoperands[0] = get_deferred_plabel (call_dest);
7109 xoperands[1] = gen_label_rtx ();
7111 /* If this isn't a sibcall, we put the load of %r27 into the
7112 delay slot. We can't do this in a sibcall as we don't
7113 have a second call-clobbered scratch register available. */
7114 if (seq_length != 0
7115 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7116 && !sibcall)
7118 final_scan_insn (NEXT_INSN (insn), asm_out_file,
7119 optimize, 0, NULL);
7121 /* Now delete the delay insn. */
7122 PUT_CODE (NEXT_INSN (insn), NOTE);
7123 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
7124 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
7125 delay_insn_deleted = 1;
7128 output_asm_insn ("addil LT'%0,%%r27", xoperands);
7129 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
7130 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
7132 if (sibcall)
7134 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7135 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands);
7136 output_asm_insn ("bve (%%r1)", xoperands);
7138 else
7140 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
7141 output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
7142 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7143 delay_slot_filled = 1;
7146 else
7148 int indirect_call = 0;
7150 /* Emit a long call. There are several different sequences
7151 of increasing length and complexity. In most cases,
7152 they don't allow an instruction in the delay slot. */
7153 if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7154 && !(TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
7155 && !(TARGET_GAS && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7156 && !TARGET_64BIT)
7157 indirect_call = 1;
7159 if (seq_length != 0
7160 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7161 && !sibcall
7162 && (!TARGET_PA_20 || indirect_call))
7164 /* A non-jump insn in the delay slot. By definition we can
7165 emit this insn before the call (and in fact before argument
7166 relocating. */
7167 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
7168 NULL);
7170 /* Now delete the delay insn. */
7171 PUT_CODE (NEXT_INSN (insn), NOTE);
7172 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
7173 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
7174 delay_insn_deleted = 1;
7177 if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7179 /* This is the best sequence for making long calls in
7180 non-pic code. Unfortunately, GNU ld doesn't provide
7181 the stub needed for external calls, and GAS's support
7182 for this with the SOM linker is buggy. It is safe
7183 to use this for local calls. */
7184 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7185 if (sibcall)
7186 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
7187 else
7189 if (TARGET_PA_20)
7190 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
7191 xoperands);
7192 else
7193 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7195 output_asm_insn ("copy %%r31,%%r2", xoperands);
7196 delay_slot_filled = 1;
7199 else
7201 if ((TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
7202 || (TARGET_64BIT && !TARGET_GAS))
7204 /* The HP assembler and linker can handle relocations
7205 for the difference of two symbols. GAS and the HP
7206 linker can't do this when one of the symbols is
7207 external. */
7208 xoperands[1] = gen_label_rtx ();
7209 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7210 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7211 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7212 CODE_LABEL_NUMBER (xoperands[1]));
7213 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7215 else if (TARGET_GAS && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7217 /* GAS currently can't generate the relocations that
7218 are needed for the SOM linker under HP-UX using this
7219 sequence. The GNU linker doesn't generate the stubs
7220 that are needed for external calls on TARGET_ELF32
7221 with this sequence. For now, we have to use a
7222 longer plabel sequence when using GAS. */
7223 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7224 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1",
7225 xoperands);
7226 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1",
7227 xoperands);
7229 else
7231 /* Emit a long plabel-based call sequence. This is
7232 essentially an inline implementation of $$dyncall.
7233 We don't actually try to call $$dyncall as this is
7234 as difficult as calling the function itself. */
7235 xoperands[0] = get_deferred_plabel (call_dest);
7236 xoperands[1] = gen_label_rtx ();
7238 /* Since the call is indirect, FP arguments in registers
7239 need to be copied to the general registers. Then, the
7240 argument relocation stub will copy them back. */
7241 if (TARGET_SOM)
7242 copy_fp_args (insn);
7244 if (flag_pic)
7246 output_asm_insn ("addil LT'%0,%%r19", xoperands);
7247 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
7248 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands);
7250 else
7252 output_asm_insn ("addil LR'%0-$global$,%%r27",
7253 xoperands);
7254 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
7255 xoperands);
7258 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands);
7259 output_asm_insn ("depi 0,31,2,%%r1", xoperands);
7260 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands);
7261 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands);
7263 if (!sibcall && !TARGET_PA_20)
7265 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
7266 if (TARGET_NO_SPACE_REGS)
7267 output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
7268 else
7269 output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
7273 if (TARGET_PA_20)
7275 if (sibcall)
7276 output_asm_insn ("bve (%%r1)", xoperands);
7277 else
7279 if (indirect_call)
7281 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7282 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
7283 delay_slot_filled = 1;
7285 else
7286 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7289 else
7291 if (!TARGET_NO_SPACE_REGS)
7292 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
7293 xoperands);
7295 if (sibcall)
7297 if (TARGET_NO_SPACE_REGS)
7298 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
7299 else
7300 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
7302 else
7304 if (TARGET_NO_SPACE_REGS)
7305 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
7306 else
7307 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
7309 if (indirect_call)
7310 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
7311 else
7312 output_asm_insn ("copy %%r31,%%r2", xoperands);
7313 delay_slot_filled = 1;
7320 if (!delay_slot_filled && (seq_length == 0 || delay_insn_deleted))
7321 output_asm_insn ("nop", xoperands);
7323 /* We are done if there isn't a jump in the delay slot. */
7324 if (seq_length == 0
7325 || delay_insn_deleted
7326 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7327 return "";
7329 /* A sibcall should never have a branch in the delay slot. */
7330 gcc_assert (!sibcall);
7332 /* This call has an unconditional jump in its delay slot. */
7333 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7335 if (!delay_slot_filled && INSN_ADDRESSES_SET_P ())
7337 /* See if the return address can be adjusted. Use the containing
7338 sequence insn's address. */
7339 rtx seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7340 int distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7341 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7343 if (VAL_14_BITS_P (distance))
7345 xoperands[1] = gen_label_rtx ();
7346 output_asm_insn ("ldo %0-%1(%%r2),%%r2", xoperands);
7347 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7348 CODE_LABEL_NUMBER (xoperands[1]));
7350 else
7351 output_asm_insn ("nop\n\tb,n %0", xoperands);
7353 else
7354 output_asm_insn ("b,n %0", xoperands);
7356 /* Delete the jump. */
7357 PUT_CODE (NEXT_INSN (insn), NOTE);
7358 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
7359 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
7361 return "";
7364 /* Return the attribute length of the indirect call instruction INSN.
7365 The length must match the code generated by output_indirect call.
7366 The returned length includes the delay slot. Currently, the delay
7367 slot of an indirect call sequence is not exposed and it is used by
7368 the sequence itself. */
7371 attr_length_indirect_call (rtx insn)
7373 unsigned long distance = -1;
7374 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7376 if (INSN_ADDRESSES_SET_P ())
7378 distance = (total + insn_current_reference_address (insn));
7379 if (distance < total)
7380 distance = -1;
7383 if (TARGET_64BIT)
7384 return 12;
7386 if (TARGET_FAST_INDIRECT_CALLS
7387 || (!TARGET_PORTABLE_RUNTIME
7388 && ((TARGET_PA_20 && distance < 7600000) || distance < 240000)))
7389 return 8;
7391 if (flag_pic)
7392 return 24;
7394 if (TARGET_PORTABLE_RUNTIME)
7395 return 20;
7397 /* Out of reach, can use ble. */
7398 return 12;
7401 const char *
7402 output_indirect_call (rtx insn, rtx call_dest)
7404 rtx xoperands[1];
7406 if (TARGET_64BIT)
7408 xoperands[0] = call_dest;
7409 output_asm_insn ("ldd 16(%0),%%r2", xoperands);
7410 output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands);
7411 return "";
7414 /* First the special case for kernels, level 0 systems, etc. */
7415 if (TARGET_FAST_INDIRECT_CALLS)
7416 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
7418 /* Now the normal case -- we can reach $$dyncall directly or
7419 we're sure that we can get there via a long-branch stub.
7421 No need to check target flags as the length uniquely identifies
7422 the remaining cases. */
7423 if (attr_length_indirect_call (insn) == 8)
7425 /* The HP linker substitutes a BLE for millicode calls using
7426 the short PIC PCREL form. Thus, we must use %r31 as the
7427 link register when generating PA 1.x code. */
7428 if (TARGET_PA_20)
7429 return ".CALL\tARGW0=GR\n\tb,l $$dyncall,%%r2\n\tcopy %%r2,%%r31";
7430 else
7431 return ".CALL\tARGW0=GR\n\tbl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
7434 /* Long millicode call, but we are not generating PIC or portable runtime
7435 code. */
7436 if (attr_length_indirect_call (insn) == 12)
7437 return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
7439 /* Long millicode call for portable runtime. */
7440 if (attr_length_indirect_call (insn) == 20)
7441 return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)\n\tnop";
7443 /* We need a long PIC call to $$dyncall. */
7444 xoperands[0] = NULL_RTX;
7445 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7446 if (TARGET_SOM || !TARGET_GAS)
7448 xoperands[0] = gen_label_rtx ();
7449 output_asm_insn ("addil L'$$dyncall-%0,%%r1", xoperands);
7450 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7451 CODE_LABEL_NUMBER (xoperands[0]));
7452 output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands);
7454 else
7456 output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r1", xoperands);
7457 output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
7458 xoperands);
7460 output_asm_insn ("blr %%r0,%%r2", xoperands);
7461 output_asm_insn ("bv,n %%r0(%%r1)\n\tnop", xoperands);
7462 return "";
7465 /* Return the total length of the save and restore instructions needed for
7466 the data linkage table pointer (i.e., the PIC register) across the call
7467 instruction INSN. No-return calls do not require a save and restore.
7468 In addition, we may be able to avoid the save and restore for calls
7469 within the same translation unit. */
7472 attr_length_save_restore_dltp (rtx insn)
7474 if (find_reg_note (insn, REG_NORETURN, NULL_RTX))
7475 return 0;
7477 return 8;
7480 /* In HPUX 8.0's shared library scheme, special relocations are needed
7481 for function labels if they might be passed to a function
7482 in a shared library (because shared libraries don't live in code
7483 space), and special magic is needed to construct their address. */
7485 void
7486 hppa_encode_label (rtx sym)
7488 const char *str = XSTR (sym, 0);
7489 int len = strlen (str) + 1;
7490 char *newstr, *p;
7492 p = newstr = alloca (len + 1);
7493 *p++ = '@';
7494 strcpy (p, str);
7496 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
7499 static void
7500 pa_encode_section_info (tree decl, rtx rtl, int first)
7502 default_encode_section_info (decl, rtl, first);
7504 if (first && TEXT_SPACE_P (decl))
7506 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
7507 if (TREE_CODE (decl) == FUNCTION_DECL)
7508 hppa_encode_label (XEXP (rtl, 0));
7512 /* This is sort of inverse to pa_encode_section_info. */
7514 static const char *
7515 pa_strip_name_encoding (const char *str)
7517 str += (*str == '@');
7518 str += (*str == '*');
7519 return str;
7523 function_label_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7525 return GET_CODE (op) == SYMBOL_REF && FUNCTION_NAME_P (XSTR (op, 0));
7528 /* Returns 1 if OP is a function label involved in a simple addition
7529 with a constant. Used to keep certain patterns from matching
7530 during instruction combination. */
7532 is_function_label_plus_const (rtx op)
7534 /* Strip off any CONST. */
7535 if (GET_CODE (op) == CONST)
7536 op = XEXP (op, 0);
7538 return (GET_CODE (op) == PLUS
7539 && function_label_operand (XEXP (op, 0), Pmode)
7540 && GET_CODE (XEXP (op, 1)) == CONST_INT);
7543 /* Output assembly code for a thunk to FUNCTION. */
7545 static void
7546 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
7547 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
7548 tree function)
7550 static unsigned int current_thunk_number;
7551 int val_14 = VAL_14_BITS_P (delta);
7552 int nbytes = 0;
7553 char label[16];
7554 rtx xoperands[4];
7556 xoperands[0] = XEXP (DECL_RTL (function), 0);
7557 xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
7558 xoperands[2] = GEN_INT (delta);
7560 ASM_OUTPUT_LABEL (file, XSTR (xoperands[1], 0));
7561 fprintf (file, "\t.PROC\n\t.CALLINFO FRAME=0,NO_CALLS\n\t.ENTRY\n");
7563 /* Output the thunk. We know that the function is in the same
7564 translation unit (i.e., the same space) as the thunk, and that
7565 thunks are output after their method. Thus, we don't need an
7566 external branch to reach the function. With SOM and GAS,
7567 functions and thunks are effectively in different sections.
7568 Thus, we can always use a IA-relative branch and the linker
7569 will add a long branch stub if necessary.
7571 However, we have to be careful when generating PIC code on the
7572 SOM port to ensure that the sequence does not transfer to an
7573 import stub for the target function as this could clobber the
7574 return value saved at SP-24. This would also apply to the
7575 32-bit linux port if the multi-space model is implemented. */
7576 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
7577 && !(flag_pic && TREE_PUBLIC (function))
7578 && (TARGET_GAS || last_address < 262132))
7579 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
7580 && ((targetm.have_named_sections
7581 && DECL_SECTION_NAME (thunk_fndecl) != NULL
7582 /* The GNU 64-bit linker has rather poor stub management.
7583 So, we use a long branch from thunks that aren't in
7584 the same section as the target function. */
7585 && ((!TARGET_64BIT
7586 && (DECL_SECTION_NAME (thunk_fndecl)
7587 != DECL_SECTION_NAME (function)))
7588 || ((DECL_SECTION_NAME (thunk_fndecl)
7589 == DECL_SECTION_NAME (function))
7590 && last_address < 262132)))
7591 || (!targetm.have_named_sections && last_address < 262132))))
7593 if (!val_14)
7594 output_asm_insn ("addil L'%2,%%r26", xoperands);
7596 output_asm_insn ("b %0", xoperands);
7598 if (val_14)
7600 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
7601 nbytes += 8;
7603 else
7605 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
7606 nbytes += 12;
7609 else if (TARGET_64BIT)
7611 /* We only have one call-clobbered scratch register, so we can't
7612 make use of the delay slot if delta doesn't fit in 14 bits. */
7613 if (!val_14)
7615 output_asm_insn ("addil L'%2,%%r26", xoperands);
7616 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
7619 output_asm_insn ("b,l .+8,%%r1", xoperands);
7621 if (TARGET_GAS)
7623 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
7624 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
7626 else
7628 xoperands[3] = GEN_INT (val_14 ? 8 : 16);
7629 output_asm_insn ("addil L'%0-%1-%3,%%r1", xoperands);
7632 if (val_14)
7634 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7635 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
7636 nbytes += 20;
7638 else
7640 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
7641 nbytes += 24;
7644 else if (TARGET_PORTABLE_RUNTIME)
7646 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7647 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
7649 if (!val_14)
7650 output_asm_insn ("addil L'%2,%%r26", xoperands);
7652 output_asm_insn ("bv %%r0(%%r22)", xoperands);
7654 if (val_14)
7656 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
7657 nbytes += 16;
7659 else
7661 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
7662 nbytes += 20;
7665 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
7667 /* The function is accessible from outside this module. The only
7668 way to avoid an import stub between the thunk and function is to
7669 call the function directly with an indirect sequence similar to
7670 that used by $$dyncall. This is possible because $$dyncall acts
7671 as the import stub in an indirect call. */
7672 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
7673 xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
7674 output_asm_insn ("addil LT'%3,%%r19", xoperands);
7675 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
7676 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
7677 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
7678 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
7679 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
7680 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
7682 if (!val_14)
7684 output_asm_insn ("addil L'%2,%%r26", xoperands);
7685 nbytes += 4;
7688 if (TARGET_PA_20)
7690 output_asm_insn ("bve (%%r22)", xoperands);
7691 nbytes += 36;
7693 else if (TARGET_NO_SPACE_REGS)
7695 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
7696 nbytes += 36;
7698 else
7700 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
7701 output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
7702 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
7703 nbytes += 44;
7706 if (val_14)
7707 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
7708 else
7709 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
7711 else if (flag_pic)
7713 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7715 if (TARGET_SOM || !TARGET_GAS)
7717 output_asm_insn ("addil L'%0-%1-8,%%r1", xoperands);
7718 output_asm_insn ("ldo R'%0-%1-8(%%r1),%%r22", xoperands);
7720 else
7722 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
7723 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r22", xoperands);
7726 if (!val_14)
7727 output_asm_insn ("addil L'%2,%%r26", xoperands);
7729 output_asm_insn ("bv %%r0(%%r22)", xoperands);
7731 if (val_14)
7733 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
7734 nbytes += 20;
7736 else
7738 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
7739 nbytes += 24;
7742 else
7744 if (!val_14)
7745 output_asm_insn ("addil L'%2,%%r26", xoperands);
7747 output_asm_insn ("ldil L'%0,%%r22", xoperands);
7748 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
7750 if (val_14)
7752 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
7753 nbytes += 12;
7755 else
7757 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
7758 nbytes += 16;
7762 fprintf (file, "\t.EXIT\n\t.PROCEND\n");
7764 if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
7766 data_section ();
7767 output_asm_insn (".align 4", xoperands);
7768 ASM_OUTPUT_LABEL (file, label);
7769 output_asm_insn (".word P'%0", xoperands);
7771 else if (TARGET_SOM && TARGET_GAS)
7772 forget_section ();
7774 current_thunk_number++;
7775 nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
7776 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
7777 last_address += nbytes;
7778 update_total_code_bytes (nbytes);
7781 /* Only direct calls to static functions are allowed to be sibling (tail)
7782 call optimized.
7784 This restriction is necessary because some linker generated stubs will
7785 store return pointers into rp' in some cases which might clobber a
7786 live value already in rp'.
7788 In a sibcall the current function and the target function share stack
7789 space. Thus if the path to the current function and the path to the
7790 target function save a value in rp', they save the value into the
7791 same stack slot, which has undesirable consequences.
7793 Because of the deferred binding nature of shared libraries any function
7794 with external scope could be in a different load module and thus require
7795 rp' to be saved when calling that function. So sibcall optimizations
7796 can only be safe for static function.
7798 Note that GCC never needs return value relocations, so we don't have to
7799 worry about static calls with return value relocations (which require
7800 saving rp').
7802 It is safe to perform a sibcall optimization when the target function
7803 will never return. */
7804 static bool
7805 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
7807 if (TARGET_PORTABLE_RUNTIME)
7808 return false;
7810 /* Sibcalls are ok for TARGET_ELF32 as along as the linker is used in
7811 single subspace mode and the call is not indirect. As far as I know,
7812 there is no operating system support for the multiple subspace mode.
7813 It might be possible to support indirect calls if we didn't use
7814 $$dyncall (see the indirect sequence generated in output_call). */
7815 if (TARGET_ELF32)
7816 return (decl != NULL_TREE);
7818 /* Sibcalls are not ok because the arg pointer register is not a fixed
7819 register. This prevents the sibcall optimization from occurring. In
7820 addition, there are problems with stub placement using GNU ld. This
7821 is because a normal sibcall branch uses a 17-bit relocation while
7822 a regular call branch uses a 22-bit relocation. As a result, more
7823 care needs to be taken in the placement of long-branch stubs. */
7824 if (TARGET_64BIT)
7825 return false;
7827 /* Sibcalls are only ok within a translation unit. */
7828 return (decl && !TREE_PUBLIC (decl));
7831 /* ??? Addition is not commutative on the PA due to the weird implicit
7832 space register selection rules for memory addresses. Therefore, we
7833 don't consider a + b == b + a, as this might be inside a MEM. */
7834 static bool
7835 pa_commutative_p (rtx x, int outer_code)
7837 return (COMMUTATIVE_P (x)
7838 && (TARGET_NO_SPACE_REGS
7839 || (outer_code != UNKNOWN && outer_code != MEM)
7840 || GET_CODE (x) != PLUS));
7843 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
7844 use in fmpyadd instructions. */
7846 fmpyaddoperands (rtx *operands)
7848 enum machine_mode mode = GET_MODE (operands[0]);
7850 /* Must be a floating point mode. */
7851 if (mode != SFmode && mode != DFmode)
7852 return 0;
7854 /* All modes must be the same. */
7855 if (! (mode == GET_MODE (operands[1])
7856 && mode == GET_MODE (operands[2])
7857 && mode == GET_MODE (operands[3])
7858 && mode == GET_MODE (operands[4])
7859 && mode == GET_MODE (operands[5])))
7860 return 0;
7862 /* All operands must be registers. */
7863 if (! (GET_CODE (operands[1]) == REG
7864 && GET_CODE (operands[2]) == REG
7865 && GET_CODE (operands[3]) == REG
7866 && GET_CODE (operands[4]) == REG
7867 && GET_CODE (operands[5]) == REG))
7868 return 0;
7870 /* Only 2 real operands to the addition. One of the input operands must
7871 be the same as the output operand. */
7872 if (! rtx_equal_p (operands[3], operands[4])
7873 && ! rtx_equal_p (operands[3], operands[5]))
7874 return 0;
7876 /* Inout operand of add cannot conflict with any operands from multiply. */
7877 if (rtx_equal_p (operands[3], operands[0])
7878 || rtx_equal_p (operands[3], operands[1])
7879 || rtx_equal_p (operands[3], operands[2]))
7880 return 0;
7882 /* multiply cannot feed into addition operands. */
7883 if (rtx_equal_p (operands[4], operands[0])
7884 || rtx_equal_p (operands[5], operands[0]))
7885 return 0;
7887 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
7888 if (mode == SFmode
7889 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
7890 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
7891 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
7892 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
7893 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
7894 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
7895 return 0;
7897 /* Passed. Operands are suitable for fmpyadd. */
7898 return 1;
7901 #if !defined(USE_COLLECT2)
7902 static void
7903 pa_asm_out_constructor (rtx symbol, int priority)
7905 if (!function_label_operand (symbol, VOIDmode))
7906 hppa_encode_label (symbol);
7908 #ifdef CTORS_SECTION_ASM_OP
7909 default_ctor_section_asm_out_constructor (symbol, priority);
7910 #else
7911 # ifdef TARGET_ASM_NAMED_SECTION
7912 default_named_section_asm_out_constructor (symbol, priority);
7913 # else
7914 default_stabs_asm_out_constructor (symbol, priority);
7915 # endif
7916 #endif
7919 static void
7920 pa_asm_out_destructor (rtx symbol, int priority)
7922 if (!function_label_operand (symbol, VOIDmode))
7923 hppa_encode_label (symbol);
7925 #ifdef DTORS_SECTION_ASM_OP
7926 default_dtor_section_asm_out_destructor (symbol, priority);
7927 #else
7928 # ifdef TARGET_ASM_NAMED_SECTION
7929 default_named_section_asm_out_destructor (symbol, priority);
7930 # else
7931 default_stabs_asm_out_destructor (symbol, priority);
7932 # endif
7933 #endif
7935 #endif
7937 /* This function places uninitialized global data in the bss section.
7938 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
7939 function on the SOM port to prevent uninitialized global data from
7940 being placed in the data section. */
7942 void
7943 pa_asm_output_aligned_bss (FILE *stream,
7944 const char *name,
7945 unsigned HOST_WIDE_INT size,
7946 unsigned int align)
7948 bss_section ();
7949 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
7951 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
7952 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
7953 #endif
7955 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
7956 ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
7957 #endif
7959 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
7960 ASM_OUTPUT_LABEL (stream, name);
7961 fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
7964 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
7965 that doesn't allow the alignment of global common storage to be directly
7966 specified. The SOM linker aligns common storage based on the rounded
7967 value of the NUM_BYTES parameter in the .comm directive. It's not
7968 possible to use the .align directive as it doesn't affect the alignment
7969 of the label associated with a .comm directive. */
7971 void
7972 pa_asm_output_aligned_common (FILE *stream,
7973 const char *name,
7974 unsigned HOST_WIDE_INT size,
7975 unsigned int align)
7977 unsigned int max_common_align;
7979 max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
7980 if (align > max_common_align)
7982 warning (0, "alignment (%u) for %s exceeds maximum alignment "
7983 "for global common data. Using %u",
7984 align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT);
7985 align = max_common_align;
7988 bss_section ();
7990 assemble_name (stream, name);
7991 fprintf (stream, "\t.comm "HOST_WIDE_INT_PRINT_UNSIGNED"\n",
7992 MAX (size, align / BITS_PER_UNIT));
7995 /* We can't use .comm for local common storage as the SOM linker effectively
7996 treats the symbol as universal and uses the same storage for local symbols
7997 with the same name in different object files. The .block directive
7998 reserves an uninitialized block of storage. However, it's not common
7999 storage. Fortunately, GCC never requests common storage with the same
8000 name in any given translation unit. */
8002 void
8003 pa_asm_output_aligned_local (FILE *stream,
8004 const char *name,
8005 unsigned HOST_WIDE_INT size,
8006 unsigned int align)
8008 bss_section ();
8009 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8011 #ifdef LOCAL_ASM_OP
8012 fprintf (stream, "%s", LOCAL_ASM_OP);
8013 assemble_name (stream, name);
8014 fprintf (stream, "\n");
8015 #endif
8017 ASM_OUTPUT_LABEL (stream, name);
8018 fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8021 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8022 use in fmpysub instructions. */
8024 fmpysuboperands (rtx *operands)
8026 enum machine_mode mode = GET_MODE (operands[0]);
8028 /* Must be a floating point mode. */
8029 if (mode != SFmode && mode != DFmode)
8030 return 0;
8032 /* All modes must be the same. */
8033 if (! (mode == GET_MODE (operands[1])
8034 && mode == GET_MODE (operands[2])
8035 && mode == GET_MODE (operands[3])
8036 && mode == GET_MODE (operands[4])
8037 && mode == GET_MODE (operands[5])))
8038 return 0;
8040 /* All operands must be registers. */
8041 if (! (GET_CODE (operands[1]) == REG
8042 && GET_CODE (operands[2]) == REG
8043 && GET_CODE (operands[3]) == REG
8044 && GET_CODE (operands[4]) == REG
8045 && GET_CODE (operands[5]) == REG))
8046 return 0;
8048 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
8049 operation, so operands[4] must be the same as operand[3]. */
8050 if (! rtx_equal_p (operands[3], operands[4]))
8051 return 0;
8053 /* multiply cannot feed into subtraction. */
8054 if (rtx_equal_p (operands[5], operands[0]))
8055 return 0;
8057 /* Inout operand of sub cannot conflict with any operands from multiply. */
8058 if (rtx_equal_p (operands[3], operands[0])
8059 || rtx_equal_p (operands[3], operands[1])
8060 || rtx_equal_p (operands[3], operands[2]))
8061 return 0;
8063 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8064 if (mode == SFmode
8065 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8066 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8067 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8068 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8069 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8070 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8071 return 0;
8073 /* Passed. Operands are suitable for fmpysub. */
8074 return 1;
8077 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
8078 constants for shadd instructions. */
8080 shadd_constant_p (int val)
8082 if (val == 2 || val == 4 || val == 8)
8083 return 1;
8084 else
8085 return 0;
8088 /* Return 1 if OP is valid as a base or index register in a
8089 REG+REG address. */
8092 borx_reg_operand (rtx op, enum machine_mode mode)
8094 if (GET_CODE (op) != REG)
8095 return 0;
8097 /* We must reject virtual registers as the only expressions that
8098 can be instantiated are REG and REG+CONST. */
8099 if (op == virtual_incoming_args_rtx
8100 || op == virtual_stack_vars_rtx
8101 || op == virtual_stack_dynamic_rtx
8102 || op == virtual_outgoing_args_rtx
8103 || op == virtual_cfa_rtx)
8104 return 0;
8106 /* While it's always safe to index off the frame pointer, it's not
8107 profitable to do so when the frame pointer is being eliminated. */
8108 if (!reload_completed
8109 && flag_omit_frame_pointer
8110 && !current_function_calls_alloca
8111 && op == frame_pointer_rtx)
8112 return 0;
8114 return register_operand (op, mode);
8117 /* Return 1 if this operand is anything other than a hard register. */
8120 non_hard_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8122 return ! (GET_CODE (op) == REG && REGNO (op) < FIRST_PSEUDO_REGISTER);
8125 /* Return 1 if INSN branches forward. Should be using insn_addresses
8126 to avoid walking through all the insns... */
8127 static int
8128 forward_branch_p (rtx insn)
8130 rtx label = JUMP_LABEL (insn);
8132 while (insn)
8134 if (insn == label)
8135 break;
8136 else
8137 insn = NEXT_INSN (insn);
8140 return (insn == label);
8143 /* Return 1 if OP is an equality comparison, else return 0. */
8145 eq_neq_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8147 return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
8150 /* Return 1 if INSN is in the delay slot of a call instruction. */
8152 jump_in_call_delay (rtx insn)
8155 if (GET_CODE (insn) != JUMP_INSN)
8156 return 0;
8158 if (PREV_INSN (insn)
8159 && PREV_INSN (PREV_INSN (insn))
8160 && GET_CODE (next_real_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
8162 rtx test_insn = next_real_insn (PREV_INSN (PREV_INSN (insn)));
8164 return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
8165 && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
8168 else
8169 return 0;
8172 /* Output an unconditional move and branch insn. */
8174 const char *
8175 output_parallel_movb (rtx *operands, int length)
8177 /* These are the cases in which we win. */
8178 if (length == 4)
8179 return "mov%I1b,tr %1,%0,%2";
8181 /* None of these cases wins, but they don't lose either. */
8182 if (dbr_sequence_length () == 0)
8184 /* Nothing in the delay slot, fake it by putting the combined
8185 insn (the copy or add) in the delay slot of a bl. */
8186 if (GET_CODE (operands[1]) == CONST_INT)
8187 return "b %2\n\tldi %1,%0";
8188 else
8189 return "b %2\n\tcopy %1,%0";
8191 else
8193 /* Something in the delay slot, but we've got a long branch. */
8194 if (GET_CODE (operands[1]) == CONST_INT)
8195 return "ldi %1,%0\n\tb %2";
8196 else
8197 return "copy %1,%0\n\tb %2";
8201 /* Output an unconditional add and branch insn. */
8203 const char *
8204 output_parallel_addb (rtx *operands, int length)
8206 /* To make life easy we want operand0 to be the shared input/output
8207 operand and operand1 to be the readonly operand. */
8208 if (operands[0] == operands[1])
8209 operands[1] = operands[2];
8211 /* These are the cases in which we win. */
8212 if (length == 4)
8213 return "add%I1b,tr %1,%0,%3";
8215 /* None of these cases win, but they don't lose either. */
8216 if (dbr_sequence_length () == 0)
8218 /* Nothing in the delay slot, fake it by putting the combined
8219 insn (the copy or add) in the delay slot of a bl. */
8220 return "b %3\n\tadd%I1 %1,%0,%0";
8222 else
8224 /* Something in the delay slot, but we've got a long branch. */
8225 return "add%I1 %1,%0,%0\n\tb %3";
8229 /* Return nonzero if INSN (a jump insn) immediately follows a call
8230 to a named function. This is used to avoid filling the delay slot
8231 of the jump since it can usually be eliminated by modifying RP in
8232 the delay slot of the call. */
8235 following_call (rtx insn)
8237 if (! TARGET_JUMP_IN_DELAY)
8238 return 0;
8240 /* Find the previous real insn, skipping NOTEs. */
8241 insn = PREV_INSN (insn);
8242 while (insn && GET_CODE (insn) == NOTE)
8243 insn = PREV_INSN (insn);
8245 /* Check for CALL_INSNs and millicode calls. */
8246 if (insn
8247 && ((GET_CODE (insn) == CALL_INSN
8248 && get_attr_type (insn) != TYPE_DYNCALL)
8249 || (GET_CODE (insn) == INSN
8250 && GET_CODE (PATTERN (insn)) != SEQUENCE
8251 && GET_CODE (PATTERN (insn)) != USE
8252 && GET_CODE (PATTERN (insn)) != CLOBBER
8253 && get_attr_type (insn) == TYPE_MILLI)))
8254 return 1;
8256 return 0;
8259 /* We use this hook to perform a PA specific optimization which is difficult
8260 to do in earlier passes.
8262 We want the delay slots of branches within jump tables to be filled.
8263 None of the compiler passes at the moment even has the notion that a
8264 PA jump table doesn't contain addresses, but instead contains actual
8265 instructions!
8267 Because we actually jump into the table, the addresses of each entry
8268 must stay constant in relation to the beginning of the table (which
8269 itself must stay constant relative to the instruction to jump into
8270 it). I don't believe we can guarantee earlier passes of the compiler
8271 will adhere to those rules.
8273 So, late in the compilation process we find all the jump tables, and
8274 expand them into real code -- e.g. each entry in the jump table vector
8275 will get an appropriate label followed by a jump to the final target.
8277 Reorg and the final jump pass can then optimize these branches and
8278 fill their delay slots. We end up with smaller, more efficient code.
8280 The jump instructions within the table are special; we must be able
8281 to identify them during assembly output (if the jumps don't get filled
8282 we need to emit a nop rather than nullifying the delay slot)). We
8283 identify jumps in switch tables by using insns with the attribute
8284 type TYPE_BTABLE_BRANCH.
8286 We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB
8287 insns. This serves two purposes, first it prevents jump.c from
8288 noticing that the last N entries in the table jump to the instruction
8289 immediately after the table and deleting the jumps. Second, those
8290 insns mark where we should emit .begin_brtab and .end_brtab directives
8291 when using GAS (allows for better link time optimizations). */
8293 static void
8294 pa_reorg (void)
8296 rtx insn;
8298 remove_useless_addtr_insns (1);
8300 if (pa_cpu < PROCESSOR_8000)
8301 pa_combine_instructions ();
8304 /* This is fairly cheap, so always run it if optimizing. */
8305 if (optimize > 0 && !TARGET_BIG_SWITCH)
8307 /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns. */
8308 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8310 rtx pattern, tmp, location, label;
8311 unsigned int length, i;
8313 /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode. */
8314 if (GET_CODE (insn) != JUMP_INSN
8315 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8316 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8317 continue;
8319 /* Emit marker for the beginning of the branch table. */
8320 emit_insn_before (gen_begin_brtab (), insn);
8322 pattern = PATTERN (insn);
8323 location = PREV_INSN (insn);
8324 length = XVECLEN (pattern, GET_CODE (pattern) == ADDR_DIFF_VEC);
8326 for (i = 0; i < length; i++)
8328 /* Emit a label before each jump to keep jump.c from
8329 removing this code. */
8330 tmp = gen_label_rtx ();
8331 LABEL_NUSES (tmp) = 1;
8332 emit_label_after (tmp, location);
8333 location = NEXT_INSN (location);
8335 if (GET_CODE (pattern) == ADDR_VEC)
8336 label = XEXP (XVECEXP (pattern, 0, i), 0);
8337 else
8338 label = XEXP (XVECEXP (pattern, 1, i), 0);
8340 tmp = gen_short_jump (label);
8342 /* Emit the jump itself. */
8343 tmp = emit_jump_insn_after (tmp, location);
8344 JUMP_LABEL (tmp) = label;
8345 LABEL_NUSES (label)++;
8346 location = NEXT_INSN (location);
8348 /* Emit a BARRIER after the jump. */
8349 emit_barrier_after (location);
8350 location = NEXT_INSN (location);
8353 /* Emit marker for the end of the branch table. */
8354 emit_insn_before (gen_end_brtab (), location);
8355 location = NEXT_INSN (location);
8356 emit_barrier_after (location);
8358 /* Delete the ADDR_VEC or ADDR_DIFF_VEC. */
8359 delete_insn (insn);
8362 else
8364 /* Still need brtab marker insns. FIXME: the presence of these
8365 markers disables output of the branch table to readonly memory,
8366 and any alignment directives that might be needed. Possibly,
8367 the begin_brtab insn should be output before the label for the
8368 table. This doesn't matter at the moment since the tables are
8369 always output in the text section. */
8370 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8372 /* Find an ADDR_VEC insn. */
8373 if (GET_CODE (insn) != JUMP_INSN
8374 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8375 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8376 continue;
8378 /* Now generate markers for the beginning and end of the
8379 branch table. */
8380 emit_insn_before (gen_begin_brtab (), insn);
8381 emit_insn_after (gen_end_brtab (), insn);
8386 /* The PA has a number of odd instructions which can perform multiple
8387 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
8388 it may be profitable to combine two instructions into one instruction
8389 with two outputs. It's not profitable PA2.0 machines because the
8390 two outputs would take two slots in the reorder buffers.
8392 This routine finds instructions which can be combined and combines
8393 them. We only support some of the potential combinations, and we
8394 only try common ways to find suitable instructions.
8396 * addb can add two registers or a register and a small integer
8397 and jump to a nearby (+-8k) location. Normally the jump to the
8398 nearby location is conditional on the result of the add, but by
8399 using the "true" condition we can make the jump unconditional.
8400 Thus addb can perform two independent operations in one insn.
8402 * movb is similar to addb in that it can perform a reg->reg
8403 or small immediate->reg copy and jump to a nearby (+-8k location).
8405 * fmpyadd and fmpysub can perform a FP multiply and either an
8406 FP add or FP sub if the operands of the multiply and add/sub are
8407 independent (there are other minor restrictions). Note both
8408 the fmpy and fadd/fsub can in theory move to better spots according
8409 to data dependencies, but for now we require the fmpy stay at a
8410 fixed location.
8412 * Many of the memory operations can perform pre & post updates
8413 of index registers. GCC's pre/post increment/decrement addressing
8414 is far too simple to take advantage of all the possibilities. This
8415 pass may not be suitable since those insns may not be independent.
8417 * comclr can compare two ints or an int and a register, nullify
8418 the following instruction and zero some other register. This
8419 is more difficult to use as it's harder to find an insn which
8420 will generate a comclr than finding something like an unconditional
8421 branch. (conditional moves & long branches create comclr insns).
8423 * Most arithmetic operations can conditionally skip the next
8424 instruction. They can be viewed as "perform this operation
8425 and conditionally jump to this nearby location" (where nearby
8426 is an insns away). These are difficult to use due to the
8427 branch length restrictions. */
8429 static void
8430 pa_combine_instructions (void)
8432 rtx anchor, new;
8434 /* This can get expensive since the basic algorithm is on the
8435 order of O(n^2) (or worse). Only do it for -O2 or higher
8436 levels of optimization. */
8437 if (optimize < 2)
8438 return;
8440 /* Walk down the list of insns looking for "anchor" insns which
8441 may be combined with "floating" insns. As the name implies,
8442 "anchor" instructions don't move, while "floating" insns may
8443 move around. */
8444 new = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
8445 new = make_insn_raw (new);
8447 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
8449 enum attr_pa_combine_type anchor_attr;
8450 enum attr_pa_combine_type floater_attr;
8452 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
8453 Also ignore any special USE insns. */
8454 if ((GET_CODE (anchor) != INSN
8455 && GET_CODE (anchor) != JUMP_INSN
8456 && GET_CODE (anchor) != CALL_INSN)
8457 || GET_CODE (PATTERN (anchor)) == USE
8458 || GET_CODE (PATTERN (anchor)) == CLOBBER
8459 || GET_CODE (PATTERN (anchor)) == ADDR_VEC
8460 || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC)
8461 continue;
8463 anchor_attr = get_attr_pa_combine_type (anchor);
8464 /* See if anchor is an insn suitable for combination. */
8465 if (anchor_attr == PA_COMBINE_TYPE_FMPY
8466 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
8467 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
8468 && ! forward_branch_p (anchor)))
8470 rtx floater;
8472 for (floater = PREV_INSN (anchor);
8473 floater;
8474 floater = PREV_INSN (floater))
8476 if (GET_CODE (floater) == NOTE
8477 || (GET_CODE (floater) == INSN
8478 && (GET_CODE (PATTERN (floater)) == USE
8479 || GET_CODE (PATTERN (floater)) == CLOBBER)))
8480 continue;
8482 /* Anything except a regular INSN will stop our search. */
8483 if (GET_CODE (floater) != INSN
8484 || GET_CODE (PATTERN (floater)) == ADDR_VEC
8485 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
8487 floater = NULL_RTX;
8488 break;
8491 /* See if FLOATER is suitable for combination with the
8492 anchor. */
8493 floater_attr = get_attr_pa_combine_type (floater);
8494 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
8495 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
8496 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8497 && floater_attr == PA_COMBINE_TYPE_FMPY))
8499 /* If ANCHOR and FLOATER can be combined, then we're
8500 done with this pass. */
8501 if (pa_can_combine_p (new, anchor, floater, 0,
8502 SET_DEST (PATTERN (floater)),
8503 XEXP (SET_SRC (PATTERN (floater)), 0),
8504 XEXP (SET_SRC (PATTERN (floater)), 1)))
8505 break;
8508 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
8509 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
8511 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
8513 if (pa_can_combine_p (new, anchor, floater, 0,
8514 SET_DEST (PATTERN (floater)),
8515 XEXP (SET_SRC (PATTERN (floater)), 0),
8516 XEXP (SET_SRC (PATTERN (floater)), 1)))
8517 break;
8519 else
8521 if (pa_can_combine_p (new, anchor, floater, 0,
8522 SET_DEST (PATTERN (floater)),
8523 SET_SRC (PATTERN (floater)),
8524 SET_SRC (PATTERN (floater))))
8525 break;
8530 /* If we didn't find anything on the backwards scan try forwards. */
8531 if (!floater
8532 && (anchor_attr == PA_COMBINE_TYPE_FMPY
8533 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
8535 for (floater = anchor; floater; floater = NEXT_INSN (floater))
8537 if (GET_CODE (floater) == NOTE
8538 || (GET_CODE (floater) == INSN
8539 && (GET_CODE (PATTERN (floater)) == USE
8540 || GET_CODE (PATTERN (floater)) == CLOBBER)))
8542 continue;
8544 /* Anything except a regular INSN will stop our search. */
8545 if (GET_CODE (floater) != INSN
8546 || GET_CODE (PATTERN (floater)) == ADDR_VEC
8547 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
8549 floater = NULL_RTX;
8550 break;
8553 /* See if FLOATER is suitable for combination with the
8554 anchor. */
8555 floater_attr = get_attr_pa_combine_type (floater);
8556 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
8557 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
8558 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8559 && floater_attr == PA_COMBINE_TYPE_FMPY))
8561 /* If ANCHOR and FLOATER can be combined, then we're
8562 done with this pass. */
8563 if (pa_can_combine_p (new, anchor, floater, 1,
8564 SET_DEST (PATTERN (floater)),
8565 XEXP (SET_SRC (PATTERN (floater)),
8567 XEXP (SET_SRC (PATTERN (floater)),
8568 1)))
8569 break;
8574 /* FLOATER will be nonzero if we found a suitable floating
8575 insn for combination with ANCHOR. */
8576 if (floater
8577 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8578 || anchor_attr == PA_COMBINE_TYPE_FMPY))
8580 /* Emit the new instruction and delete the old anchor. */
8581 emit_insn_before (gen_rtx_PARALLEL
8582 (VOIDmode,
8583 gen_rtvec (2, PATTERN (anchor),
8584 PATTERN (floater))),
8585 anchor);
8587 PUT_CODE (anchor, NOTE);
8588 NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
8589 NOTE_SOURCE_FILE (anchor) = 0;
8591 /* Emit a special USE insn for FLOATER, then delete
8592 the floating insn. */
8593 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
8594 delete_insn (floater);
8596 continue;
8598 else if (floater
8599 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
8601 rtx temp;
8602 /* Emit the new_jump instruction and delete the old anchor. */
8603 temp
8604 = emit_jump_insn_before (gen_rtx_PARALLEL
8605 (VOIDmode,
8606 gen_rtvec (2, PATTERN (anchor),
8607 PATTERN (floater))),
8608 anchor);
8610 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
8611 PUT_CODE (anchor, NOTE);
8612 NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
8613 NOTE_SOURCE_FILE (anchor) = 0;
8615 /* Emit a special USE insn for FLOATER, then delete
8616 the floating insn. */
8617 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
8618 delete_insn (floater);
8619 continue;
8625 static int
8626 pa_can_combine_p (rtx new, rtx anchor, rtx floater, int reversed, rtx dest,
8627 rtx src1, rtx src2)
8629 int insn_code_number;
8630 rtx start, end;
8632 /* Create a PARALLEL with the patterns of ANCHOR and
8633 FLOATER, try to recognize it, then test constraints
8634 for the resulting pattern.
8636 If the pattern doesn't match or the constraints
8637 aren't met keep searching for a suitable floater
8638 insn. */
8639 XVECEXP (PATTERN (new), 0, 0) = PATTERN (anchor);
8640 XVECEXP (PATTERN (new), 0, 1) = PATTERN (floater);
8641 INSN_CODE (new) = -1;
8642 insn_code_number = recog_memoized (new);
8643 if (insn_code_number < 0
8644 || (extract_insn (new), ! constrain_operands (1)))
8645 return 0;
8647 if (reversed)
8649 start = anchor;
8650 end = floater;
8652 else
8654 start = floater;
8655 end = anchor;
8658 /* There's up to three operands to consider. One
8659 output and two inputs.
8661 The output must not be used between FLOATER & ANCHOR
8662 exclusive. The inputs must not be set between
8663 FLOATER and ANCHOR exclusive. */
8665 if (reg_used_between_p (dest, start, end))
8666 return 0;
8668 if (reg_set_between_p (src1, start, end))
8669 return 0;
8671 if (reg_set_between_p (src2, start, end))
8672 return 0;
8674 /* If we get here, then everything is good. */
8675 return 1;
8678 /* Return nonzero if references for INSN are delayed.
8680 Millicode insns are actually function calls with some special
8681 constraints on arguments and register usage.
8683 Millicode calls always expect their arguments in the integer argument
8684 registers, and always return their result in %r29 (ret1). They
8685 are expected to clobber their arguments, %r1, %r29, and the return
8686 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
8688 This function tells reorg that the references to arguments and
8689 millicode calls do not appear to happen until after the millicode call.
8690 This allows reorg to put insns which set the argument registers into the
8691 delay slot of the millicode call -- thus they act more like traditional
8692 CALL_INSNs.
8694 Note we cannot consider side effects of the insn to be delayed because
8695 the branch and link insn will clobber the return pointer. If we happened
8696 to use the return pointer in the delay slot of the call, then we lose.
8698 get_attr_type will try to recognize the given insn, so make sure to
8699 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
8700 in particular. */
8702 insn_refs_are_delayed (rtx insn)
8704 return ((GET_CODE (insn) == INSN
8705 && GET_CODE (PATTERN (insn)) != SEQUENCE
8706 && GET_CODE (PATTERN (insn)) != USE
8707 && GET_CODE (PATTERN (insn)) != CLOBBER
8708 && get_attr_type (insn) == TYPE_MILLI));
8711 /* On the HP-PA the value is found in register(s) 28(-29), unless
8712 the mode is SF or DF. Then the value is returned in fr4 (32).
8714 This must perform the same promotions as PROMOTE_MODE, else
8715 TARGET_PROMOTE_FUNCTION_RETURN will not work correctly.
8717 Small structures must be returned in a PARALLEL on PA64 in order
8718 to match the HP Compiler ABI. */
8721 function_value (tree valtype, tree func ATTRIBUTE_UNUSED)
8723 enum machine_mode valmode;
8725 if (AGGREGATE_TYPE_P (valtype))
8727 if (TARGET_64BIT)
8729 /* Aggregates with a size less than or equal to 128 bits are
8730 returned in GR 28(-29). They are left justified. The pad
8731 bits are undefined. Larger aggregates are returned in
8732 memory. */
8733 rtx loc[2];
8734 int i, offset = 0;
8735 int ub = int_size_in_bytes (valtype) <= UNITS_PER_WORD ? 1 : 2;
8737 for (i = 0; i < ub; i++)
8739 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
8740 gen_rtx_REG (DImode, 28 + i),
8741 GEN_INT (offset));
8742 offset += 8;
8745 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
8747 else if (int_size_in_bytes (valtype) > UNITS_PER_WORD)
8749 /* Aggregates 5 to 8 bytes in size are returned in general
8750 registers r28-r29 in the same manner as other non
8751 floating-point objects. The data is right-justified and
8752 zero-extended to 64 bits. This is opposite to the normal
8753 justification used on big endian targets and requires
8754 special treatment. */
8755 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
8756 gen_rtx_REG (DImode, 28), const0_rtx);
8757 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
8761 if ((INTEGRAL_TYPE_P (valtype)
8762 && TYPE_PRECISION (valtype) < BITS_PER_WORD)
8763 || POINTER_TYPE_P (valtype))
8764 valmode = word_mode;
8765 else
8766 valmode = TYPE_MODE (valtype);
8768 if (TREE_CODE (valtype) == REAL_TYPE
8769 && !AGGREGATE_TYPE_P (valtype)
8770 && TYPE_MODE (valtype) != TFmode
8771 && !TARGET_SOFT_FLOAT)
8772 return gen_rtx_REG (valmode, 32);
8774 return gen_rtx_REG (valmode, 28);
8777 /* Return the location of a parameter that is passed in a register or NULL
8778 if the parameter has any component that is passed in memory.
8780 This is new code and will be pushed to into the net sources after
8781 further testing.
8783 ??? We might want to restructure this so that it looks more like other
8784 ports. */
8786 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
8787 int named ATTRIBUTE_UNUSED)
8789 int max_arg_words = (TARGET_64BIT ? 8 : 4);
8790 int alignment = 0;
8791 int arg_size;
8792 int fpr_reg_base;
8793 int gpr_reg_base;
8794 rtx retval;
8796 if (mode == VOIDmode)
8797 return NULL_RTX;
8799 arg_size = FUNCTION_ARG_SIZE (mode, type);
8801 /* If this arg would be passed partially or totally on the stack, then
8802 this routine should return zero. pa_arg_partial_bytes will
8803 handle arguments which are split between regs and stack slots if
8804 the ABI mandates split arguments. */
8805 if (! TARGET_64BIT)
8807 /* The 32-bit ABI does not split arguments. */
8808 if (cum->words + arg_size > max_arg_words)
8809 return NULL_RTX;
8811 else
8813 if (arg_size > 1)
8814 alignment = cum->words & 1;
8815 if (cum->words + alignment >= max_arg_words)
8816 return NULL_RTX;
8819 /* The 32bit ABIs and the 64bit ABIs are rather different,
8820 particularly in their handling of FP registers. We might
8821 be able to cleverly share code between them, but I'm not
8822 going to bother in the hope that splitting them up results
8823 in code that is more easily understood. */
8825 if (TARGET_64BIT)
8827 /* Advance the base registers to their current locations.
8829 Remember, gprs grow towards smaller register numbers while
8830 fprs grow to higher register numbers. Also remember that
8831 although FP regs are 32-bit addressable, we pretend that
8832 the registers are 64-bits wide. */
8833 gpr_reg_base = 26 - cum->words;
8834 fpr_reg_base = 32 + cum->words;
8836 /* Arguments wider than one word and small aggregates need special
8837 treatment. */
8838 if (arg_size > 1
8839 || mode == BLKmode
8840 || (type && AGGREGATE_TYPE_P (type)))
8842 /* Double-extended precision (80-bit), quad-precision (128-bit)
8843 and aggregates including complex numbers are aligned on
8844 128-bit boundaries. The first eight 64-bit argument slots
8845 are associated one-to-one, with general registers r26
8846 through r19, and also with floating-point registers fr4
8847 through fr11. Arguments larger than one word are always
8848 passed in general registers.
8850 Using a PARALLEL with a word mode register results in left
8851 justified data on a big-endian target. */
8853 rtx loc[8];
8854 int i, offset = 0, ub = arg_size;
8856 /* Align the base register. */
8857 gpr_reg_base -= alignment;
8859 ub = MIN (ub, max_arg_words - cum->words - alignment);
8860 for (i = 0; i < ub; i++)
8862 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
8863 gen_rtx_REG (DImode, gpr_reg_base),
8864 GEN_INT (offset));
8865 gpr_reg_base -= 1;
8866 offset += 8;
8869 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
8872 else
8874 /* If the argument is larger than a word, then we know precisely
8875 which registers we must use. */
8876 if (arg_size > 1)
8878 if (cum->words)
8880 gpr_reg_base = 23;
8881 fpr_reg_base = 38;
8883 else
8885 gpr_reg_base = 25;
8886 fpr_reg_base = 34;
8889 /* Structures 5 to 8 bytes in size are passed in the general
8890 registers in the same manner as other non floating-point
8891 objects. The data is right-justified and zero-extended
8892 to 64 bits. This is opposite to the normal justification
8893 used on big endian targets and requires special treatment.
8894 We now define BLOCK_REG_PADDING to pad these objects. */
8895 if (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
8897 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
8898 gen_rtx_REG (DImode, gpr_reg_base),
8899 const0_rtx);
8900 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
8903 else
8905 /* We have a single word (32 bits). A simple computation
8906 will get us the register #s we need. */
8907 gpr_reg_base = 26 - cum->words;
8908 fpr_reg_base = 32 + 2 * cum->words;
8912 /* Determine if the argument needs to be passed in both general and
8913 floating point registers. */
8914 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
8915 /* If we are doing soft-float with portable runtime, then there
8916 is no need to worry about FP regs. */
8917 && !TARGET_SOFT_FLOAT
8918 /* The parameter must be some kind of float, else we can just
8919 pass it in integer registers. */
8920 && FLOAT_MODE_P (mode)
8921 /* The target function must not have a prototype. */
8922 && cum->nargs_prototype <= 0
8923 /* libcalls do not need to pass items in both FP and general
8924 registers. */
8925 && type != NULL_TREE
8926 /* All this hair applies to "outgoing" args only. This includes
8927 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
8928 && !cum->incoming)
8929 /* Also pass outgoing floating arguments in both registers in indirect
8930 calls with the 32 bit ABI and the HP assembler since there is no
8931 way to the specify argument locations in static functions. */
8932 || (!TARGET_64BIT
8933 && !TARGET_GAS
8934 && !cum->incoming
8935 && cum->indirect
8936 && FLOAT_MODE_P (mode)))
8938 retval
8939 = gen_rtx_PARALLEL
8940 (mode,
8941 gen_rtvec (2,
8942 gen_rtx_EXPR_LIST (VOIDmode,
8943 gen_rtx_REG (mode, fpr_reg_base),
8944 const0_rtx),
8945 gen_rtx_EXPR_LIST (VOIDmode,
8946 gen_rtx_REG (mode, gpr_reg_base),
8947 const0_rtx)));
8949 else
8951 /* See if we should pass this parameter in a general register. */
8952 if (TARGET_SOFT_FLOAT
8953 /* Indirect calls in the normal 32bit ABI require all arguments
8954 to be passed in general registers. */
8955 || (!TARGET_PORTABLE_RUNTIME
8956 && !TARGET_64BIT
8957 && !TARGET_ELF32
8958 && cum->indirect)
8959 /* If the parameter is not a floating point parameter, then
8960 it belongs in GPRs. */
8961 || !FLOAT_MODE_P (mode)
8962 /* Structure with single SFmode field belongs in GPR. */
8963 || (type && AGGREGATE_TYPE_P (type)))
8964 retval = gen_rtx_REG (mode, gpr_reg_base);
8965 else
8966 retval = gen_rtx_REG (mode, fpr_reg_base);
8968 return retval;
8972 /* If this arg would be passed totally in registers or totally on the stack,
8973 then this routine should return zero. */
8975 static int
8976 pa_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8977 tree type, bool named ATTRIBUTE_UNUSED)
8979 unsigned int max_arg_words = 8;
8980 unsigned int offset = 0;
8982 if (!TARGET_64BIT)
8983 return 0;
8985 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
8986 offset = 1;
8988 if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
8989 /* Arg fits fully into registers. */
8990 return 0;
8991 else if (cum->words + offset >= max_arg_words)
8992 /* Arg fully on the stack. */
8993 return 0;
8994 else
8995 /* Arg is split. */
8996 return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
9000 /* Return a string to output before text in the current function.
9002 This function is only used with SOM. Because we don't support
9003 named subspaces, we can only create a new subspace or switch back
9004 to the default text subspace. */
9005 const char *
9006 som_text_section_asm_op (void)
9008 if (!TARGET_SOM)
9009 return "";
9011 if (TARGET_GAS)
9013 if (cfun && !cfun->machine->in_nsubspa)
9015 /* We only want to emit a .nsubspa directive once at the
9016 start of the function. */
9017 cfun->machine->in_nsubspa = 1;
9019 /* Create a new subspace for the text. This provides
9020 better stub placement and one-only functions. */
9021 if (cfun->decl
9022 && DECL_ONE_ONLY (cfun->decl)
9023 && !DECL_WEAK (cfun->decl))
9024 return
9025 "\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,SORT=24,COMDAT";
9027 return "\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$";
9029 else
9031 /* There isn't a current function or the body of the current
9032 function has been completed. So, we are changing to the
9033 text section to output debugging information. Do this in
9034 the default text section. We need to forget that we are
9035 in the text section so that the function text_section in
9036 varasm.c will call us the next time around. */
9037 forget_section ();
9041 return "\t.SPACE $TEXT$\n\t.SUBSPA $CODE$";
9044 /* On hpux10, the linker will give an error if we have a reference
9045 in the read-only data section to a symbol defined in a shared
9046 library. Therefore, expressions that might require a reloc can
9047 not be placed in the read-only data section. */
9049 static void
9050 pa_select_section (tree exp, int reloc,
9051 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
9053 if (TREE_CODE (exp) == VAR_DECL
9054 && TREE_READONLY (exp)
9055 && !TREE_THIS_VOLATILE (exp)
9056 && DECL_INITIAL (exp)
9057 && (DECL_INITIAL (exp) == error_mark_node
9058 || TREE_CONSTANT (DECL_INITIAL (exp)))
9059 && !reloc)
9061 if (TARGET_SOM
9062 && DECL_ONE_ONLY (exp)
9063 && !DECL_WEAK (exp))
9064 som_one_only_readonly_data_section ();
9065 else
9066 readonly_data_section ();
9068 else if (CONSTANT_CLASS_P (exp) && !reloc)
9069 readonly_data_section ();
9070 else if (TARGET_SOM
9071 && TREE_CODE (exp) == VAR_DECL
9072 && DECL_ONE_ONLY (exp)
9073 && !DECL_WEAK (exp))
9074 som_one_only_data_section ();
9075 else
9076 data_section ();
9079 static void
9080 pa_globalize_label (FILE *stream, const char *name)
9082 /* We only handle DATA objects here, functions are globalized in
9083 ASM_DECLARE_FUNCTION_NAME. */
9084 if (! FUNCTION_NAME_P (name))
9086 fputs ("\t.EXPORT ", stream);
9087 assemble_name (stream, name);
9088 fputs (",DATA\n", stream);
9092 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
9094 static rtx
9095 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
9096 int incoming ATTRIBUTE_UNUSED)
9098 return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
9101 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9103 bool
9104 pa_return_in_memory (tree type, tree fntype ATTRIBUTE_UNUSED)
9106 /* SOM ABI says that objects larger than 64 bits are returned in memory.
9107 PA64 ABI says that objects larger than 128 bits are returned in memory.
9108 Note, int_size_in_bytes can return -1 if the size of the object is
9109 variable or larger than the maximum value that can be expressed as
9110 a HOST_WIDE_INT. It can also return zero for an empty type. The
9111 simplest way to handle variable and empty types is to pass them in
9112 memory. This avoids problems in defining the boundaries of argument
9113 slots, allocating registers, etc. */
9114 return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
9115 || int_size_in_bytes (type) <= 0);
9118 /* Structure to hold declaration and name of external symbols that are
9119 emitted by GCC. We generate a vector of these symbols and output them
9120 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
9121 This avoids putting out names that are never really used. */
9123 typedef struct extern_symbol GTY(())
9125 tree decl;
9126 const char *name;
9127 } extern_symbol;
9129 /* Define gc'd vector type for extern_symbol. */
9130 DEF_VEC_O(extern_symbol);
9131 DEF_VEC_ALLOC_O(extern_symbol,gc);
9133 /* Vector of extern_symbol pointers. */
9134 static GTY(()) VEC(extern_symbol,gc) *extern_symbols;
9136 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9137 /* Mark DECL (name NAME) as an external reference (assembler output
9138 file FILE). This saves the names to output at the end of the file
9139 if actually referenced. */
9141 void
9142 pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
9144 extern_symbol * p = VEC_safe_push (extern_symbol, gc, extern_symbols, NULL);
9146 gcc_assert (file == asm_out_file);
9147 p->decl = decl;
9148 p->name = name;
9151 /* Output text required at the end of an assembler file.
9152 This includes deferred plabels and .import directives for
9153 all external symbols that were actually referenced. */
9155 static void
9156 pa_hpux_file_end (void)
9158 unsigned int i;
9159 extern_symbol *p;
9161 output_deferred_plabels ();
9163 for (i = 0; VEC_iterate (extern_symbol, extern_symbols, i, p); i++)
9165 tree decl = p->decl;
9167 if (!TREE_ASM_WRITTEN (decl)
9168 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
9169 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
9172 VEC_free (extern_symbol, gc, extern_symbols);
9174 #endif
9176 #include "gt-pa.h"