2009-07-17 Richard Guenther <rguenther@suse.de>
[official-gcc.git] / gcc / config / pa / pa.c
blobed64d5de2fed2232a6a514c9b9532eecf29f317d
1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4 Free Software Foundation, Inc.
5 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "rtl.h"
28 #include "regs.h"
29 #include "hard-reg-set.h"
30 #include "real.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "insn-attr.h"
34 #include "flags.h"
35 #include "tree.h"
36 #include "output.h"
37 #include "except.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "reload.h"
41 #include "integrate.h"
42 #include "function.h"
43 #include "toplev.h"
44 #include "ggc.h"
45 #include "recog.h"
46 #include "predict.h"
47 #include "tm_p.h"
48 #include "target.h"
49 #include "target-def.h"
50 #include "df.h"
52 /* Return nonzero if there is a bypass for the output of
53 OUT_INSN and the fp store IN_INSN. */
54 int
55 hppa_fpstore_bypass_p (rtx out_insn, rtx in_insn)
57 enum machine_mode store_mode;
58 enum machine_mode other_mode;
59 rtx set;
61 if (recog_memoized (in_insn) < 0
62 || (get_attr_type (in_insn) != TYPE_FPSTORE
63 && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
64 || recog_memoized (out_insn) < 0)
65 return 0;
67 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
69 set = single_set (out_insn);
70 if (!set)
71 return 0;
73 other_mode = GET_MODE (SET_SRC (set));
75 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
79 #ifndef DO_FRAME_NOTES
80 #ifdef INCOMING_RETURN_ADDR_RTX
81 #define DO_FRAME_NOTES 1
82 #else
83 #define DO_FRAME_NOTES 0
84 #endif
85 #endif
87 static void copy_reg_pointer (rtx, rtx);
88 static void fix_range (const char *);
89 static bool pa_handle_option (size_t, const char *, int);
90 static int hppa_address_cost (rtx, bool);
91 static bool hppa_rtx_costs (rtx, int, int, int *, bool);
92 static inline rtx force_mode (enum machine_mode, rtx);
93 static void pa_reorg (void);
94 static void pa_combine_instructions (void);
95 static int pa_can_combine_p (rtx, rtx, rtx, int, rtx, rtx, rtx);
96 static bool forward_branch_p (rtx);
97 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
98 static int compute_movmem_length (rtx);
99 static int compute_clrmem_length (rtx);
100 static bool pa_assemble_integer (rtx, unsigned int, int);
101 static void remove_useless_addtr_insns (int);
102 static void store_reg (int, HOST_WIDE_INT, int);
103 static void store_reg_modify (int, int, HOST_WIDE_INT);
104 static void load_reg (int, HOST_WIDE_INT, int);
105 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
106 static void pa_output_function_prologue (FILE *, HOST_WIDE_INT);
107 static void update_total_code_bytes (unsigned int);
108 static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT);
109 static int pa_adjust_cost (rtx, rtx, rtx, int);
110 static int pa_adjust_priority (rtx, int);
111 static int pa_issue_rate (void);
112 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
113 static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
114 ATTRIBUTE_UNUSED;
115 static void pa_encode_section_info (tree, rtx, int);
116 static const char *pa_strip_name_encoding (const char *);
117 static bool pa_function_ok_for_sibcall (tree, tree);
118 static void pa_globalize_label (FILE *, const char *)
119 ATTRIBUTE_UNUSED;
120 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
121 HOST_WIDE_INT, tree);
122 #if !defined(USE_COLLECT2)
123 static void pa_asm_out_constructor (rtx, int);
124 static void pa_asm_out_destructor (rtx, int);
125 #endif
126 static void pa_init_builtins (void);
127 static rtx hppa_builtin_saveregs (void);
128 static void hppa_va_start (tree, rtx);
129 static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
130 static bool pa_scalar_mode_supported_p (enum machine_mode);
131 static bool pa_commutative_p (const_rtx x, int outer_code);
132 static void copy_fp_args (rtx) ATTRIBUTE_UNUSED;
133 static int length_fp_args (rtx) ATTRIBUTE_UNUSED;
134 static rtx hppa_legitimize_address (rtx, rtx, enum machine_mode);
135 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
136 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
137 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
138 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
139 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
140 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
141 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
142 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
143 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
144 static void output_deferred_plabels (void);
145 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
146 #ifdef ASM_OUTPUT_EXTERNAL_REAL
147 static void pa_hpux_file_end (void);
148 #endif
149 #ifdef HPUX_LONG_DOUBLE_LIBRARY
150 static void pa_hpux_init_libfuncs (void);
151 #endif
152 static rtx pa_struct_value_rtx (tree, int);
153 static bool pa_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
154 const_tree, bool);
155 static int pa_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
156 tree, bool);
157 static struct machine_function * pa_init_machine_status (void);
158 static enum reg_class pa_secondary_reload (bool, rtx, enum reg_class,
159 enum machine_mode,
160 secondary_reload_info *);
161 static void pa_extra_live_on_entry (bitmap);
163 /* The following extra sections are only used for SOM. */
164 static GTY(()) section *som_readonly_data_section;
165 static GTY(()) section *som_one_only_readonly_data_section;
166 static GTY(()) section *som_one_only_data_section;
168 /* Which cpu we are scheduling for. */
169 enum processor_type pa_cpu = TARGET_SCHED_DEFAULT;
171 /* The UNIX standard to use for predefines and linking. */
172 int flag_pa_unix = TARGET_HPUX_11_11 ? 1998 : TARGET_HPUX_10_10 ? 1995 : 1993;
174 /* Counts for the number of callee-saved general and floating point
175 registers which were saved by the current function's prologue. */
176 static int gr_saved, fr_saved;
178 /* Boolean indicating whether the return pointer was saved by the
179 current function's prologue. */
180 static bool rp_saved;
182 static rtx find_addr_reg (rtx);
184 /* Keep track of the number of bytes we have output in the CODE subspace
185 during this compilation so we'll know when to emit inline long-calls. */
186 unsigned long total_code_bytes;
188 /* The last address of the previous function plus the number of bytes in
189 associated thunks that have been output. This is used to determine if
190 a thunk can use an IA-relative branch to reach its target function. */
191 static unsigned int last_address;
193 /* Variables to handle plabels that we discover are necessary at assembly
194 output time. They are output after the current function. */
195 struct GTY(()) deferred_plabel
197 rtx internal_label;
198 rtx symbol;
200 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
201 deferred_plabels;
202 static size_t n_deferred_plabels = 0;
205 /* Initialize the GCC target structure. */
207 #undef TARGET_ASM_ALIGNED_HI_OP
208 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
209 #undef TARGET_ASM_ALIGNED_SI_OP
210 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
211 #undef TARGET_ASM_ALIGNED_DI_OP
212 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
213 #undef TARGET_ASM_UNALIGNED_HI_OP
214 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
215 #undef TARGET_ASM_UNALIGNED_SI_OP
216 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
217 #undef TARGET_ASM_UNALIGNED_DI_OP
218 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
219 #undef TARGET_ASM_INTEGER
220 #define TARGET_ASM_INTEGER pa_assemble_integer
222 #undef TARGET_ASM_FUNCTION_PROLOGUE
223 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
224 #undef TARGET_ASM_FUNCTION_EPILOGUE
225 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
227 #undef TARGET_LEGITIMIZE_ADDRESS
228 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
230 #undef TARGET_SCHED_ADJUST_COST
231 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
232 #undef TARGET_SCHED_ADJUST_PRIORITY
233 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
234 #undef TARGET_SCHED_ISSUE_RATE
235 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
237 #undef TARGET_ENCODE_SECTION_INFO
238 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
239 #undef TARGET_STRIP_NAME_ENCODING
240 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
242 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
243 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
245 #undef TARGET_COMMUTATIVE_P
246 #define TARGET_COMMUTATIVE_P pa_commutative_p
248 #undef TARGET_ASM_OUTPUT_MI_THUNK
249 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
250 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
251 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
253 #undef TARGET_ASM_FILE_END
254 #ifdef ASM_OUTPUT_EXTERNAL_REAL
255 #define TARGET_ASM_FILE_END pa_hpux_file_end
256 #else
257 #define TARGET_ASM_FILE_END output_deferred_plabels
258 #endif
260 #if !defined(USE_COLLECT2)
261 #undef TARGET_ASM_CONSTRUCTOR
262 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
263 #undef TARGET_ASM_DESTRUCTOR
264 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
265 #endif
267 #undef TARGET_DEFAULT_TARGET_FLAGS
268 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
269 #undef TARGET_HANDLE_OPTION
270 #define TARGET_HANDLE_OPTION pa_handle_option
272 #undef TARGET_INIT_BUILTINS
273 #define TARGET_INIT_BUILTINS pa_init_builtins
275 #undef TARGET_RTX_COSTS
276 #define TARGET_RTX_COSTS hppa_rtx_costs
277 #undef TARGET_ADDRESS_COST
278 #define TARGET_ADDRESS_COST hppa_address_cost
280 #undef TARGET_MACHINE_DEPENDENT_REORG
281 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
283 #ifdef HPUX_LONG_DOUBLE_LIBRARY
284 #undef TARGET_INIT_LIBFUNCS
285 #define TARGET_INIT_LIBFUNCS pa_hpux_init_libfuncs
286 #endif
288 #undef TARGET_PROMOTE_FUNCTION_RETURN
289 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_const_tree_true
290 #undef TARGET_PROMOTE_PROTOTYPES
291 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
293 #undef TARGET_STRUCT_VALUE_RTX
294 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
295 #undef TARGET_RETURN_IN_MEMORY
296 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
297 #undef TARGET_MUST_PASS_IN_STACK
298 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
299 #undef TARGET_PASS_BY_REFERENCE
300 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
301 #undef TARGET_CALLEE_COPIES
302 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true
303 #undef TARGET_ARG_PARTIAL_BYTES
304 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
306 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
307 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
308 #undef TARGET_EXPAND_BUILTIN_VA_START
309 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
310 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
311 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
313 #undef TARGET_SCALAR_MODE_SUPPORTED_P
314 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
316 #undef TARGET_CANNOT_FORCE_CONST_MEM
317 #define TARGET_CANNOT_FORCE_CONST_MEM pa_tls_referenced_p
319 #undef TARGET_SECONDARY_RELOAD
320 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
322 #undef TARGET_EXTRA_LIVE_ON_ENTRY
323 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
325 struct gcc_target targetm = TARGET_INITIALIZER;
327 /* Parse the -mfixed-range= option string. */
329 static void
330 fix_range (const char *const_str)
332 int i, first, last;
333 char *str, *dash, *comma;
335 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
336 REG2 are either register names or register numbers. The effect
337 of this option is to mark the registers in the range from REG1 to
338 REG2 as ``fixed'' so they won't be used by the compiler. This is
339 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */
341 i = strlen (const_str);
342 str = (char *) alloca (i + 1);
343 memcpy (str, const_str, i + 1);
345 while (1)
347 dash = strchr (str, '-');
348 if (!dash)
350 warning (0, "value of -mfixed-range must have form REG1-REG2");
351 return;
353 *dash = '\0';
355 comma = strchr (dash + 1, ',');
356 if (comma)
357 *comma = '\0';
359 first = decode_reg_name (str);
360 if (first < 0)
362 warning (0, "unknown register name: %s", str);
363 return;
366 last = decode_reg_name (dash + 1);
367 if (last < 0)
369 warning (0, "unknown register name: %s", dash + 1);
370 return;
373 *dash = '-';
375 if (first > last)
377 warning (0, "%s-%s is an empty range", str, dash + 1);
378 return;
381 for (i = first; i <= last; ++i)
382 fixed_regs[i] = call_used_regs[i] = 1;
384 if (!comma)
385 break;
387 *comma = ',';
388 str = comma + 1;
391 /* Check if all floating point registers have been fixed. */
392 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
393 if (!fixed_regs[i])
394 break;
396 if (i > FP_REG_LAST)
397 target_flags |= MASK_DISABLE_FPREGS;
400 /* Implement TARGET_HANDLE_OPTION. */
402 static bool
403 pa_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
405 switch (code)
407 case OPT_mnosnake:
408 case OPT_mpa_risc_1_0:
409 case OPT_march_1_0:
410 target_flags &= ~(MASK_PA_11 | MASK_PA_20);
411 return true;
413 case OPT_msnake:
414 case OPT_mpa_risc_1_1:
415 case OPT_march_1_1:
416 target_flags &= ~MASK_PA_20;
417 target_flags |= MASK_PA_11;
418 return true;
420 case OPT_mpa_risc_2_0:
421 case OPT_march_2_0:
422 target_flags |= MASK_PA_11 | MASK_PA_20;
423 return true;
425 case OPT_mschedule_:
426 if (strcmp (arg, "8000") == 0)
427 pa_cpu = PROCESSOR_8000;
428 else if (strcmp (arg, "7100") == 0)
429 pa_cpu = PROCESSOR_7100;
430 else if (strcmp (arg, "700") == 0)
431 pa_cpu = PROCESSOR_700;
432 else if (strcmp (arg, "7100LC") == 0)
433 pa_cpu = PROCESSOR_7100LC;
434 else if (strcmp (arg, "7200") == 0)
435 pa_cpu = PROCESSOR_7200;
436 else if (strcmp (arg, "7300") == 0)
437 pa_cpu = PROCESSOR_7300;
438 else
439 return false;
440 return true;
442 case OPT_mfixed_range_:
443 fix_range (arg);
444 return true;
446 #if TARGET_HPUX
447 case OPT_munix_93:
448 flag_pa_unix = 1993;
449 return true;
450 #endif
452 #if TARGET_HPUX_10_10
453 case OPT_munix_95:
454 flag_pa_unix = 1995;
455 return true;
456 #endif
458 #if TARGET_HPUX_11_11
459 case OPT_munix_98:
460 flag_pa_unix = 1998;
461 return true;
462 #endif
464 default:
465 return true;
469 void
470 override_options (void)
472 /* Unconditional branches in the delay slot are not compatible with dwarf2
473 call frame information. There is no benefit in using this optimization
474 on PA8000 and later processors. */
475 if (pa_cpu >= PROCESSOR_8000
476 || (! USING_SJLJ_EXCEPTIONS && flag_exceptions)
477 || flag_unwind_tables)
478 target_flags &= ~MASK_JUMP_IN_DELAY;
480 if (flag_pic && TARGET_PORTABLE_RUNTIME)
482 warning (0, "PIC code generation is not supported in the portable runtime model");
485 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
487 warning (0, "PIC code generation is not compatible with fast indirect calls");
490 if (! TARGET_GAS && write_symbols != NO_DEBUG)
492 warning (0, "-g is only supported when using GAS on this processor,");
493 warning (0, "-g option disabled");
494 write_symbols = NO_DEBUG;
497 /* We only support the "big PIC" model now. And we always generate PIC
498 code when in 64bit mode. */
499 if (flag_pic == 1 || TARGET_64BIT)
500 flag_pic = 2;
502 /* We can't guarantee that .dword is available for 32-bit targets. */
503 if (UNITS_PER_WORD == 4)
504 targetm.asm_out.aligned_op.di = NULL;
506 /* The unaligned ops are only available when using GAS. */
507 if (!TARGET_GAS)
509 targetm.asm_out.unaligned_op.hi = NULL;
510 targetm.asm_out.unaligned_op.si = NULL;
511 targetm.asm_out.unaligned_op.di = NULL;
514 init_machine_status = pa_init_machine_status;
517 static void
518 pa_init_builtins (void)
520 #ifdef DONT_HAVE_FPUTC_UNLOCKED
521 built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED] =
522 built_in_decls[(int) BUILT_IN_PUTC_UNLOCKED];
523 implicit_built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED]
524 = implicit_built_in_decls[(int) BUILT_IN_PUTC_UNLOCKED];
525 #endif
526 #if TARGET_HPUX_11
527 if (built_in_decls [BUILT_IN_FINITE])
528 set_user_assembler_name (built_in_decls [BUILT_IN_FINITE], "_Isfinite");
529 if (built_in_decls [BUILT_IN_FINITEF])
530 set_user_assembler_name (built_in_decls [BUILT_IN_FINITEF], "_Isfinitef");
531 #endif
534 /* Function to init struct machine_function.
535 This will be called, via a pointer variable,
536 from push_function_context. */
538 static struct machine_function *
539 pa_init_machine_status (void)
541 return GGC_CNEW (machine_function);
544 /* If FROM is a probable pointer register, mark TO as a probable
545 pointer register with the same pointer alignment as FROM. */
547 static void
548 copy_reg_pointer (rtx to, rtx from)
550 if (REG_POINTER (from))
551 mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
554 /* Return 1 if X contains a symbolic expression. We know these
555 expressions will have one of a few well defined forms, so
556 we need only check those forms. */
558 symbolic_expression_p (rtx x)
561 /* Strip off any HIGH. */
562 if (GET_CODE (x) == HIGH)
563 x = XEXP (x, 0);
565 return (symbolic_operand (x, VOIDmode));
568 /* Accept any constant that can be moved in one instruction into a
569 general register. */
571 cint_ok_for_move (HOST_WIDE_INT ival)
573 /* OK if ldo, ldil, or zdepi, can be used. */
574 return (VAL_14_BITS_P (ival)
575 || ldil_cint_p (ival)
576 || zdepi_cint_p (ival));
579 /* Return truth value of whether OP can be used as an operand in a
580 adddi3 insn. */
582 adddi3_operand (rtx op, enum machine_mode mode)
584 return (register_operand (op, mode)
585 || (GET_CODE (op) == CONST_INT
586 && (TARGET_64BIT ? INT_14_BITS (op) : INT_11_BITS (op))));
589 /* True iff the operand OP can be used as the destination operand of
590 an integer store. This also implies the operand could be used as
591 the source operand of an integer load. Symbolic, lo_sum and indexed
592 memory operands are not allowed. We accept reloading pseudos and
593 other memory operands. */
595 integer_store_memory_operand (rtx op, enum machine_mode mode)
597 return ((reload_in_progress
598 && REG_P (op)
599 && REGNO (op) >= FIRST_PSEUDO_REGISTER
600 && reg_renumber [REGNO (op)] < 0)
601 || (GET_CODE (op) == MEM
602 && (reload_in_progress || memory_address_p (mode, XEXP (op, 0)))
603 && !symbolic_memory_operand (op, VOIDmode)
604 && !IS_LO_SUM_DLT_ADDR_P (XEXP (op, 0))
605 && !IS_INDEX_ADDR_P (XEXP (op, 0))));
608 /* True iff ldil can be used to load this CONST_INT. The least
609 significant 11 bits of the value must be zero and the value must
610 not change sign when extended from 32 to 64 bits. */
612 ldil_cint_p (HOST_WIDE_INT ival)
614 HOST_WIDE_INT x = ival & (((HOST_WIDE_INT) -1 << 31) | 0x7ff);
616 return x == 0 || x == ((HOST_WIDE_INT) -1 << 31);
619 /* True iff zdepi can be used to generate this CONST_INT.
620 zdepi first sign extends a 5-bit signed number to a given field
621 length, then places this field anywhere in a zero. */
623 zdepi_cint_p (unsigned HOST_WIDE_INT x)
625 unsigned HOST_WIDE_INT lsb_mask, t;
627 /* This might not be obvious, but it's at least fast.
628 This function is critical; we don't have the time loops would take. */
629 lsb_mask = x & -x;
630 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
631 /* Return true iff t is a power of two. */
632 return ((t & (t - 1)) == 0);
635 /* True iff depi or extru can be used to compute (reg & mask).
636 Accept bit pattern like these:
637 0....01....1
638 1....10....0
639 1..10..01..1 */
641 and_mask_p (unsigned HOST_WIDE_INT mask)
643 mask = ~mask;
644 mask += mask & -mask;
645 return (mask & (mask - 1)) == 0;
648 /* True iff depi can be used to compute (reg | MASK). */
650 ior_mask_p (unsigned HOST_WIDE_INT mask)
652 mask += mask & -mask;
653 return (mask & (mask - 1)) == 0;
656 /* Legitimize PIC addresses. If the address is already
657 position-independent, we return ORIG. Newly generated
658 position-independent addresses go to REG. If we need more
659 than one register, we lose. */
662 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
664 rtx pic_ref = orig;
666 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
668 /* Labels need special handling. */
669 if (pic_label_operand (orig, mode))
671 rtx insn;
673 /* We do not want to go through the movXX expanders here since that
674 would create recursion.
676 Nor do we really want to call a generator for a named pattern
677 since that requires multiple patterns if we want to support
678 multiple word sizes.
680 So instead we just emit the raw set, which avoids the movXX
681 expanders completely. */
682 mark_reg_pointer (reg, BITS_PER_UNIT);
683 insn = emit_insn (gen_rtx_SET (VOIDmode, reg, orig));
685 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
686 add_reg_note (insn, REG_EQUAL, orig);
688 /* During and after reload, we need to generate a REG_LABEL_OPERAND note
689 and update LABEL_NUSES because this is not done automatically. */
690 if (reload_in_progress || reload_completed)
692 /* Extract LABEL_REF. */
693 if (GET_CODE (orig) == CONST)
694 orig = XEXP (XEXP (orig, 0), 0);
695 /* Extract CODE_LABEL. */
696 orig = XEXP (orig, 0);
697 add_reg_note (insn, REG_LABEL_OPERAND, orig);
698 LABEL_NUSES (orig)++;
700 crtl->uses_pic_offset_table = 1;
701 return reg;
703 if (GET_CODE (orig) == SYMBOL_REF)
705 rtx insn, tmp_reg;
707 gcc_assert (reg);
709 /* Before reload, allocate a temporary register for the intermediate
710 result. This allows the sequence to be deleted when the final
711 result is unused and the insns are trivially dead. */
712 tmp_reg = ((reload_in_progress || reload_completed)
713 ? reg : gen_reg_rtx (Pmode));
715 if (function_label_operand (orig, mode))
717 /* Force function label into memory in word mode. */
718 orig = XEXP (force_const_mem (word_mode, orig), 0);
719 /* Load plabel address from DLT. */
720 emit_move_insn (tmp_reg,
721 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
722 gen_rtx_HIGH (word_mode, orig)));
723 pic_ref
724 = gen_const_mem (Pmode,
725 gen_rtx_LO_SUM (Pmode, tmp_reg,
726 gen_rtx_UNSPEC (Pmode,
727 gen_rtvec (1, orig),
728 UNSPEC_DLTIND14R)));
729 emit_move_insn (reg, pic_ref);
730 /* Now load address of function descriptor. */
731 pic_ref = gen_rtx_MEM (Pmode, reg);
733 else
735 /* Load symbol reference from DLT. */
736 emit_move_insn (tmp_reg,
737 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
738 gen_rtx_HIGH (word_mode, orig)));
739 pic_ref
740 = gen_const_mem (Pmode,
741 gen_rtx_LO_SUM (Pmode, tmp_reg,
742 gen_rtx_UNSPEC (Pmode,
743 gen_rtvec (1, orig),
744 UNSPEC_DLTIND14R)));
747 crtl->uses_pic_offset_table = 1;
748 mark_reg_pointer (reg, BITS_PER_UNIT);
749 insn = emit_move_insn (reg, pic_ref);
751 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
752 set_unique_reg_note (insn, REG_EQUAL, orig);
754 return reg;
756 else if (GET_CODE (orig) == CONST)
758 rtx base;
760 if (GET_CODE (XEXP (orig, 0)) == PLUS
761 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
762 return orig;
764 gcc_assert (reg);
765 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
767 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
768 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
769 base == reg ? 0 : reg);
771 if (GET_CODE (orig) == CONST_INT)
773 if (INT_14_BITS (orig))
774 return plus_constant (base, INTVAL (orig));
775 orig = force_reg (Pmode, orig);
777 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
778 /* Likewise, should we set special REG_NOTEs here? */
781 return pic_ref;
784 static GTY(()) rtx gen_tls_tga;
786 static rtx
787 gen_tls_get_addr (void)
789 if (!gen_tls_tga)
790 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
791 return gen_tls_tga;
794 static rtx
795 hppa_tls_call (rtx arg)
797 rtx ret;
799 ret = gen_reg_rtx (Pmode);
800 emit_library_call_value (gen_tls_get_addr (), ret,
801 LCT_CONST, Pmode, 1, arg, Pmode);
803 return ret;
806 static rtx
807 legitimize_tls_address (rtx addr)
809 rtx ret, insn, tmp, t1, t2, tp;
810 enum tls_model model = SYMBOL_REF_TLS_MODEL (addr);
812 switch (model)
814 case TLS_MODEL_GLOBAL_DYNAMIC:
815 tmp = gen_reg_rtx (Pmode);
816 if (flag_pic)
817 emit_insn (gen_tgd_load_pic (tmp, addr));
818 else
819 emit_insn (gen_tgd_load (tmp, addr));
820 ret = hppa_tls_call (tmp);
821 break;
823 case TLS_MODEL_LOCAL_DYNAMIC:
824 ret = gen_reg_rtx (Pmode);
825 tmp = gen_reg_rtx (Pmode);
826 start_sequence ();
827 if (flag_pic)
828 emit_insn (gen_tld_load_pic (tmp, addr));
829 else
830 emit_insn (gen_tld_load (tmp, addr));
831 t1 = hppa_tls_call (tmp);
832 insn = get_insns ();
833 end_sequence ();
834 t2 = gen_reg_rtx (Pmode);
835 emit_libcall_block (insn, t2, t1,
836 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
837 UNSPEC_TLSLDBASE));
838 emit_insn (gen_tld_offset_load (ret, addr, t2));
839 break;
841 case TLS_MODEL_INITIAL_EXEC:
842 tp = gen_reg_rtx (Pmode);
843 tmp = gen_reg_rtx (Pmode);
844 ret = gen_reg_rtx (Pmode);
845 emit_insn (gen_tp_load (tp));
846 if (flag_pic)
847 emit_insn (gen_tie_load_pic (tmp, addr));
848 else
849 emit_insn (gen_tie_load (tmp, addr));
850 emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
851 break;
853 case TLS_MODEL_LOCAL_EXEC:
854 tp = gen_reg_rtx (Pmode);
855 ret = gen_reg_rtx (Pmode);
856 emit_insn (gen_tp_load (tp));
857 emit_insn (gen_tle_load (ret, addr, tp));
858 break;
860 default:
861 gcc_unreachable ();
864 return ret;
867 /* Try machine-dependent ways of modifying an illegitimate address
868 to be legitimate. If we find one, return the new, valid address.
869 This macro is used in only one place: `memory_address' in explow.c.
871 OLDX is the address as it was before break_out_memory_refs was called.
872 In some cases it is useful to look at this to decide what needs to be done.
874 It is always safe for this macro to do nothing. It exists to recognize
875 opportunities to optimize the output.
877 For the PA, transform:
879 memory(X + <large int>)
881 into:
883 if (<large int> & mask) >= 16
884 Y = (<large int> & ~mask) + mask + 1 Round up.
885 else
886 Y = (<large int> & ~mask) Round down.
887 Z = X + Y
888 memory (Z + (<large int> - Y));
890 This is for CSE to find several similar references, and only use one Z.
892 X can either be a SYMBOL_REF or REG, but because combine cannot
893 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
894 D will not fit in 14 bits.
896 MODE_FLOAT references allow displacements which fit in 5 bits, so use
897 0x1f as the mask.
899 MODE_INT references allow displacements which fit in 14 bits, so use
900 0x3fff as the mask.
902 This relies on the fact that most mode MODE_FLOAT references will use FP
903 registers and most mode MODE_INT references will use integer registers.
904 (In the rare case of an FP register used in an integer MODE, we depend
905 on secondary reloads to clean things up.)
908 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
909 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
910 addressing modes to be used).
912 Put X and Z into registers. Then put the entire expression into
913 a register. */
916 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
917 enum machine_mode mode)
919 rtx orig = x;
921 /* We need to canonicalize the order of operands in unscaled indexed
922 addresses since the code that checks if an address is valid doesn't
923 always try both orders. */
924 if (!TARGET_NO_SPACE_REGS
925 && GET_CODE (x) == PLUS
926 && GET_MODE (x) == Pmode
927 && REG_P (XEXP (x, 0))
928 && REG_P (XEXP (x, 1))
929 && REG_POINTER (XEXP (x, 0))
930 && !REG_POINTER (XEXP (x, 1)))
931 return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
933 if (PA_SYMBOL_REF_TLS_P (x))
934 return legitimize_tls_address (x);
935 else if (flag_pic)
936 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
938 /* Strip off CONST. */
939 if (GET_CODE (x) == CONST)
940 x = XEXP (x, 0);
942 /* Special case. Get the SYMBOL_REF into a register and use indexing.
943 That should always be safe. */
944 if (GET_CODE (x) == PLUS
945 && GET_CODE (XEXP (x, 0)) == REG
946 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
948 rtx reg = force_reg (Pmode, XEXP (x, 1));
949 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
952 /* Note we must reject symbols which represent function addresses
953 since the assembler/linker can't handle arithmetic on plabels. */
954 if (GET_CODE (x) == PLUS
955 && GET_CODE (XEXP (x, 1)) == CONST_INT
956 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
957 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
958 || GET_CODE (XEXP (x, 0)) == REG))
960 rtx int_part, ptr_reg;
961 int newoffset;
962 int offset = INTVAL (XEXP (x, 1));
963 int mask;
965 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
966 ? (INT14_OK_STRICT ? 0x3fff : 0x1f) : 0x3fff);
968 /* Choose which way to round the offset. Round up if we
969 are >= halfway to the next boundary. */
970 if ((offset & mask) >= ((mask + 1) / 2))
971 newoffset = (offset & ~ mask) + mask + 1;
972 else
973 newoffset = (offset & ~ mask);
975 /* If the newoffset will not fit in 14 bits (ldo), then
976 handling this would take 4 or 5 instructions (2 to load
977 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
978 add the new offset and the SYMBOL_REF.) Combine can
979 not handle 4->2 or 5->2 combinations, so do not create
980 them. */
981 if (! VAL_14_BITS_P (newoffset)
982 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
984 rtx const_part = plus_constant (XEXP (x, 0), newoffset);
985 rtx tmp_reg
986 = force_reg (Pmode,
987 gen_rtx_HIGH (Pmode, const_part));
988 ptr_reg
989 = force_reg (Pmode,
990 gen_rtx_LO_SUM (Pmode,
991 tmp_reg, const_part));
993 else
995 if (! VAL_14_BITS_P (newoffset))
996 int_part = force_reg (Pmode, GEN_INT (newoffset));
997 else
998 int_part = GEN_INT (newoffset);
1000 ptr_reg = force_reg (Pmode,
1001 gen_rtx_PLUS (Pmode,
1002 force_reg (Pmode, XEXP (x, 0)),
1003 int_part));
1005 return plus_constant (ptr_reg, offset - newoffset);
1008 /* Handle (plus (mult (a) (shadd_constant)) (b)). */
1010 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
1011 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1012 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
1013 && (OBJECT_P (XEXP (x, 1))
1014 || GET_CODE (XEXP (x, 1)) == SUBREG)
1015 && GET_CODE (XEXP (x, 1)) != CONST)
1017 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1018 rtx reg1, reg2;
1020 reg1 = XEXP (x, 1);
1021 if (GET_CODE (reg1) != REG)
1022 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1024 reg2 = XEXP (XEXP (x, 0), 0);
1025 if (GET_CODE (reg2) != REG)
1026 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1028 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1029 gen_rtx_MULT (Pmode,
1030 reg2,
1031 GEN_INT (val)),
1032 reg1));
1035 /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
1037 Only do so for floating point modes since this is more speculative
1038 and we lose if it's an integer store. */
1039 if (GET_CODE (x) == PLUS
1040 && GET_CODE (XEXP (x, 0)) == PLUS
1041 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
1042 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
1043 && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
1044 && (mode == SFmode || mode == DFmode))
1047 /* First, try and figure out what to use as a base register. */
1048 rtx reg1, reg2, base, idx, orig_base;
1050 reg1 = XEXP (XEXP (x, 0), 1);
1051 reg2 = XEXP (x, 1);
1052 base = NULL_RTX;
1053 idx = NULL_RTX;
1055 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1056 then emit_move_sequence will turn on REG_POINTER so we'll know
1057 it's a base register below. */
1058 if (GET_CODE (reg1) != REG)
1059 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1061 if (GET_CODE (reg2) != REG)
1062 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1064 /* Figure out what the base and index are. */
1066 if (GET_CODE (reg1) == REG
1067 && REG_POINTER (reg1))
1069 base = reg1;
1070 orig_base = XEXP (XEXP (x, 0), 1);
1071 idx = gen_rtx_PLUS (Pmode,
1072 gen_rtx_MULT (Pmode,
1073 XEXP (XEXP (XEXP (x, 0), 0), 0),
1074 XEXP (XEXP (XEXP (x, 0), 0), 1)),
1075 XEXP (x, 1));
1077 else if (GET_CODE (reg2) == REG
1078 && REG_POINTER (reg2))
1080 base = reg2;
1081 orig_base = XEXP (x, 1);
1082 idx = XEXP (x, 0);
1085 if (base == 0)
1086 return orig;
1088 /* If the index adds a large constant, try to scale the
1089 constant so that it can be loaded with only one insn. */
1090 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1091 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1092 / INTVAL (XEXP (XEXP (idx, 0), 1)))
1093 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1095 /* Divide the CONST_INT by the scale factor, then add it to A. */
1096 int val = INTVAL (XEXP (idx, 1));
1098 val /= INTVAL (XEXP (XEXP (idx, 0), 1));
1099 reg1 = XEXP (XEXP (idx, 0), 0);
1100 if (GET_CODE (reg1) != REG)
1101 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1103 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1105 /* We can now generate a simple scaled indexed address. */
1106 return
1107 force_reg
1108 (Pmode, gen_rtx_PLUS (Pmode,
1109 gen_rtx_MULT (Pmode, reg1,
1110 XEXP (XEXP (idx, 0), 1)),
1111 base));
1114 /* If B + C is still a valid base register, then add them. */
1115 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1116 && INTVAL (XEXP (idx, 1)) <= 4096
1117 && INTVAL (XEXP (idx, 1)) >= -4096)
1119 int val = INTVAL (XEXP (XEXP (idx, 0), 1));
1120 rtx reg1, reg2;
1122 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1124 reg2 = XEXP (XEXP (idx, 0), 0);
1125 if (GET_CODE (reg2) != CONST_INT)
1126 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1128 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1129 gen_rtx_MULT (Pmode,
1130 reg2,
1131 GEN_INT (val)),
1132 reg1));
1135 /* Get the index into a register, then add the base + index and
1136 return a register holding the result. */
1138 /* First get A into a register. */
1139 reg1 = XEXP (XEXP (idx, 0), 0);
1140 if (GET_CODE (reg1) != REG)
1141 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1143 /* And get B into a register. */
1144 reg2 = XEXP (idx, 1);
1145 if (GET_CODE (reg2) != REG)
1146 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1148 reg1 = force_reg (Pmode,
1149 gen_rtx_PLUS (Pmode,
1150 gen_rtx_MULT (Pmode, reg1,
1151 XEXP (XEXP (idx, 0), 1)),
1152 reg2));
1154 /* Add the result to our base register and return. */
1155 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1159 /* Uh-oh. We might have an address for x[n-100000]. This needs
1160 special handling to avoid creating an indexed memory address
1161 with x-100000 as the base.
1163 If the constant part is small enough, then it's still safe because
1164 there is a guard page at the beginning and end of the data segment.
1166 Scaled references are common enough that we want to try and rearrange the
1167 terms so that we can use indexing for these addresses too. Only
1168 do the optimization for floatint point modes. */
1170 if (GET_CODE (x) == PLUS
1171 && symbolic_expression_p (XEXP (x, 1)))
1173 /* Ugly. We modify things here so that the address offset specified
1174 by the index expression is computed first, then added to x to form
1175 the entire address. */
1177 rtx regx1, regx2, regy1, regy2, y;
1179 /* Strip off any CONST. */
1180 y = XEXP (x, 1);
1181 if (GET_CODE (y) == CONST)
1182 y = XEXP (y, 0);
1184 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1186 /* See if this looks like
1187 (plus (mult (reg) (shadd_const))
1188 (const (plus (symbol_ref) (const_int))))
1190 Where const_int is small. In that case the const
1191 expression is a valid pointer for indexing.
1193 If const_int is big, but can be divided evenly by shadd_const
1194 and added to (reg). This allows more scaled indexed addresses. */
1195 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1196 && GET_CODE (XEXP (x, 0)) == MULT
1197 && GET_CODE (XEXP (y, 1)) == CONST_INT
1198 && INTVAL (XEXP (y, 1)) >= -4096
1199 && INTVAL (XEXP (y, 1)) <= 4095
1200 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1201 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1203 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1204 rtx reg1, reg2;
1206 reg1 = XEXP (x, 1);
1207 if (GET_CODE (reg1) != REG)
1208 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1210 reg2 = XEXP (XEXP (x, 0), 0);
1211 if (GET_CODE (reg2) != REG)
1212 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1214 return force_reg (Pmode,
1215 gen_rtx_PLUS (Pmode,
1216 gen_rtx_MULT (Pmode,
1217 reg2,
1218 GEN_INT (val)),
1219 reg1));
1221 else if ((mode == DFmode || mode == SFmode)
1222 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1223 && GET_CODE (XEXP (x, 0)) == MULT
1224 && GET_CODE (XEXP (y, 1)) == CONST_INT
1225 && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
1226 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1227 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1229 regx1
1230 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1231 / INTVAL (XEXP (XEXP (x, 0), 1))));
1232 regx2 = XEXP (XEXP (x, 0), 0);
1233 if (GET_CODE (regx2) != REG)
1234 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1235 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1236 regx2, regx1));
1237 return
1238 force_reg (Pmode,
1239 gen_rtx_PLUS (Pmode,
1240 gen_rtx_MULT (Pmode, regx2,
1241 XEXP (XEXP (x, 0), 1)),
1242 force_reg (Pmode, XEXP (y, 0))));
1244 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1245 && INTVAL (XEXP (y, 1)) >= -4096
1246 && INTVAL (XEXP (y, 1)) <= 4095)
1248 /* This is safe because of the guard page at the
1249 beginning and end of the data space. Just
1250 return the original address. */
1251 return orig;
1253 else
1255 /* Doesn't look like one we can optimize. */
1256 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1257 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1258 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1259 regx1 = force_reg (Pmode,
1260 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1261 regx1, regy2));
1262 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1267 return orig;
1270 /* For the HPPA, REG and REG+CONST is cost 0
1271 and addresses involving symbolic constants are cost 2.
1273 PIC addresses are very expensive.
1275 It is no coincidence that this has the same structure
1276 as GO_IF_LEGITIMATE_ADDRESS. */
1278 static int
1279 hppa_address_cost (rtx X,
1280 bool speed ATTRIBUTE_UNUSED)
1282 switch (GET_CODE (X))
1284 case REG:
1285 case PLUS:
1286 case LO_SUM:
1287 return 1;
1288 case HIGH:
1289 return 2;
1290 default:
1291 return 4;
1295 /* Compute a (partial) cost for rtx X. Return true if the complete
1296 cost has been computed, and false if subexpressions should be
1297 scanned. In either case, *TOTAL contains the cost result. */
1299 static bool
1300 hppa_rtx_costs (rtx x, int code, int outer_code, int *total,
1301 bool speed ATTRIBUTE_UNUSED)
1303 switch (code)
1305 case CONST_INT:
1306 if (INTVAL (x) == 0)
1307 *total = 0;
1308 else if (INT_14_BITS (x))
1309 *total = 1;
1310 else
1311 *total = 2;
1312 return true;
1314 case HIGH:
1315 *total = 2;
1316 return true;
1318 case CONST:
1319 case LABEL_REF:
1320 case SYMBOL_REF:
1321 *total = 4;
1322 return true;
1324 case CONST_DOUBLE:
1325 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1326 && outer_code != SET)
1327 *total = 0;
1328 else
1329 *total = 8;
1330 return true;
1332 case MULT:
1333 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1334 *total = COSTS_N_INSNS (3);
1335 else if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1336 *total = COSTS_N_INSNS (8);
1337 else
1338 *total = COSTS_N_INSNS (20);
1339 return true;
1341 case DIV:
1342 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1344 *total = COSTS_N_INSNS (14);
1345 return true;
1347 /* FALLTHRU */
1349 case UDIV:
1350 case MOD:
1351 case UMOD:
1352 *total = COSTS_N_INSNS (60);
1353 return true;
1355 case PLUS: /* this includes shNadd insns */
1356 case MINUS:
1357 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1358 *total = COSTS_N_INSNS (3);
1359 else
1360 *total = COSTS_N_INSNS (1);
1361 return true;
1363 case ASHIFT:
1364 case ASHIFTRT:
1365 case LSHIFTRT:
1366 *total = COSTS_N_INSNS (1);
1367 return true;
1369 default:
1370 return false;
1374 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1375 new rtx with the correct mode. */
1376 static inline rtx
1377 force_mode (enum machine_mode mode, rtx orig)
1379 if (mode == GET_MODE (orig))
1380 return orig;
1382 gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
1384 return gen_rtx_REG (mode, REGNO (orig));
1387 /* Return 1 if *X is a thread-local symbol. */
1389 static int
1390 pa_tls_symbol_ref_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
1392 return PA_SYMBOL_REF_TLS_P (*x);
1395 /* Return 1 if X contains a thread-local symbol. */
1397 bool
1398 pa_tls_referenced_p (rtx x)
1400 if (!TARGET_HAVE_TLS)
1401 return false;
1403 return for_each_rtx (&x, &pa_tls_symbol_ref_1, 0);
1406 /* Emit insns to move operands[1] into operands[0].
1408 Return 1 if we have written out everything that needs to be done to
1409 do the move. Otherwise, return 0 and the caller will emit the move
1410 normally.
1412 Note SCRATCH_REG may not be in the proper mode depending on how it
1413 will be used. This routine is responsible for creating a new copy
1414 of SCRATCH_REG in the proper mode. */
1417 emit_move_sequence (rtx *operands, enum machine_mode mode, rtx scratch_reg)
1419 register rtx operand0 = operands[0];
1420 register rtx operand1 = operands[1];
1421 register rtx tem;
1423 /* We can only handle indexed addresses in the destination operand
1424 of floating point stores. Thus, we need to break out indexed
1425 addresses from the destination operand. */
1426 if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1428 gcc_assert (can_create_pseudo_p ());
1430 tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1431 operand0 = replace_equiv_address (operand0, tem);
1434 /* On targets with non-equivalent space registers, break out unscaled
1435 indexed addresses from the source operand before the final CSE.
1436 We have to do this because the REG_POINTER flag is not correctly
1437 carried through various optimization passes and CSE may substitute
1438 a pseudo without the pointer set for one with the pointer set. As
1439 a result, we loose various opportunities to create insns with
1440 unscaled indexed addresses. */
1441 if (!TARGET_NO_SPACE_REGS
1442 && !cse_not_expected
1443 && GET_CODE (operand1) == MEM
1444 && GET_CODE (XEXP (operand1, 0)) == PLUS
1445 && REG_P (XEXP (XEXP (operand1, 0), 0))
1446 && REG_P (XEXP (XEXP (operand1, 0), 1)))
1447 operand1
1448 = replace_equiv_address (operand1,
1449 copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1451 if (scratch_reg
1452 && reload_in_progress && GET_CODE (operand0) == REG
1453 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1454 operand0 = reg_equiv_mem[REGNO (operand0)];
1455 else if (scratch_reg
1456 && reload_in_progress && GET_CODE (operand0) == SUBREG
1457 && GET_CODE (SUBREG_REG (operand0)) == REG
1458 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1460 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1461 the code which tracks sets/uses for delete_output_reload. */
1462 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1463 reg_equiv_mem [REGNO (SUBREG_REG (operand0))],
1464 SUBREG_BYTE (operand0));
1465 operand0 = alter_subreg (&temp);
1468 if (scratch_reg
1469 && reload_in_progress && GET_CODE (operand1) == REG
1470 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1471 operand1 = reg_equiv_mem[REGNO (operand1)];
1472 else if (scratch_reg
1473 && reload_in_progress && GET_CODE (operand1) == SUBREG
1474 && GET_CODE (SUBREG_REG (operand1)) == REG
1475 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1477 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1478 the code which tracks sets/uses for delete_output_reload. */
1479 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1480 reg_equiv_mem [REGNO (SUBREG_REG (operand1))],
1481 SUBREG_BYTE (operand1));
1482 operand1 = alter_subreg (&temp);
1485 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1486 && ((tem = find_replacement (&XEXP (operand0, 0)))
1487 != XEXP (operand0, 0)))
1488 operand0 = replace_equiv_address (operand0, tem);
1490 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1491 && ((tem = find_replacement (&XEXP (operand1, 0)))
1492 != XEXP (operand1, 0)))
1493 operand1 = replace_equiv_address (operand1, tem);
1495 /* Handle secondary reloads for loads/stores of FP registers from
1496 REG+D addresses where D does not fit in 5 or 14 bits, including
1497 (subreg (mem (addr))) cases. */
1498 if (scratch_reg
1499 && fp_reg_operand (operand0, mode)
1500 && ((GET_CODE (operand1) == MEM
1501 && !memory_address_p ((GET_MODE_SIZE (mode) == 4 ? SFmode : DFmode),
1502 XEXP (operand1, 0)))
1503 || ((GET_CODE (operand1) == SUBREG
1504 && GET_CODE (XEXP (operand1, 0)) == MEM
1505 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1506 ? SFmode : DFmode),
1507 XEXP (XEXP (operand1, 0), 0))))))
1509 if (GET_CODE (operand1) == SUBREG)
1510 operand1 = XEXP (operand1, 0);
1512 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1513 it in WORD_MODE regardless of what mode it was originally given
1514 to us. */
1515 scratch_reg = force_mode (word_mode, scratch_reg);
1517 /* D might not fit in 14 bits either; for such cases load D into
1518 scratch reg. */
1519 if (!memory_address_p (Pmode, XEXP (operand1, 0)))
1521 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1522 emit_move_insn (scratch_reg,
1523 gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
1524 Pmode,
1525 XEXP (XEXP (operand1, 0), 0),
1526 scratch_reg));
1528 else
1529 emit_move_insn (scratch_reg, XEXP (operand1, 0));
1530 emit_insn (gen_rtx_SET (VOIDmode, operand0,
1531 replace_equiv_address (operand1, scratch_reg)));
1532 return 1;
1534 else if (scratch_reg
1535 && fp_reg_operand (operand1, mode)
1536 && ((GET_CODE (operand0) == MEM
1537 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1538 ? SFmode : DFmode),
1539 XEXP (operand0, 0)))
1540 || ((GET_CODE (operand0) == SUBREG)
1541 && GET_CODE (XEXP (operand0, 0)) == MEM
1542 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1543 ? SFmode : DFmode),
1544 XEXP (XEXP (operand0, 0), 0)))))
1546 if (GET_CODE (operand0) == SUBREG)
1547 operand0 = XEXP (operand0, 0);
1549 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1550 it in WORD_MODE regardless of what mode it was originally given
1551 to us. */
1552 scratch_reg = force_mode (word_mode, scratch_reg);
1554 /* D might not fit in 14 bits either; for such cases load D into
1555 scratch reg. */
1556 if (!memory_address_p (Pmode, XEXP (operand0, 0)))
1558 emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
1559 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0,
1560 0)),
1561 Pmode,
1562 XEXP (XEXP (operand0, 0),
1564 scratch_reg));
1566 else
1567 emit_move_insn (scratch_reg, XEXP (operand0, 0));
1568 emit_insn (gen_rtx_SET (VOIDmode,
1569 replace_equiv_address (operand0, scratch_reg),
1570 operand1));
1571 return 1;
1573 /* Handle secondary reloads for loads of FP registers from constant
1574 expressions by forcing the constant into memory.
1576 Use scratch_reg to hold the address of the memory location.
1578 The proper fix is to change PREFERRED_RELOAD_CLASS to return
1579 NO_REGS when presented with a const_int and a register class
1580 containing only FP registers. Doing so unfortunately creates
1581 more problems than it solves. Fix this for 2.5. */
1582 else if (scratch_reg
1583 && CONSTANT_P (operand1)
1584 && fp_reg_operand (operand0, mode))
1586 rtx const_mem, xoperands[2];
1588 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1589 it in WORD_MODE regardless of what mode it was originally given
1590 to us. */
1591 scratch_reg = force_mode (word_mode, scratch_reg);
1593 /* Force the constant into memory and put the address of the
1594 memory location into scratch_reg. */
1595 const_mem = force_const_mem (mode, operand1);
1596 xoperands[0] = scratch_reg;
1597 xoperands[1] = XEXP (const_mem, 0);
1598 emit_move_sequence (xoperands, Pmode, 0);
1600 /* Now load the destination register. */
1601 emit_insn (gen_rtx_SET (mode, operand0,
1602 replace_equiv_address (const_mem, scratch_reg)));
1603 return 1;
1605 /* Handle secondary reloads for SAR. These occur when trying to load
1606 the SAR from memory, FP register, or with a constant. */
1607 else if (scratch_reg
1608 && GET_CODE (operand0) == REG
1609 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1610 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1611 && (GET_CODE (operand1) == MEM
1612 || GET_CODE (operand1) == CONST_INT
1613 || (GET_CODE (operand1) == REG
1614 && FP_REG_CLASS_P (REGNO_REG_CLASS (REGNO (operand1))))))
1616 /* D might not fit in 14 bits either; for such cases load D into
1617 scratch reg. */
1618 if (GET_CODE (operand1) == MEM
1619 && !memory_address_p (Pmode, XEXP (operand1, 0)))
1621 /* We are reloading the address into the scratch register, so we
1622 want to make sure the scratch register is a full register. */
1623 scratch_reg = force_mode (word_mode, scratch_reg);
1625 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1626 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1627 0)),
1628 Pmode,
1629 XEXP (XEXP (operand1, 0),
1631 scratch_reg));
1633 /* Now we are going to load the scratch register from memory,
1634 we want to load it in the same width as the original MEM,
1635 which must be the same as the width of the ultimate destination,
1636 OPERAND0. */
1637 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1639 emit_move_insn (scratch_reg,
1640 replace_equiv_address (operand1, scratch_reg));
1642 else
1644 /* We want to load the scratch register using the same mode as
1645 the ultimate destination. */
1646 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1648 emit_move_insn (scratch_reg, operand1);
1651 /* And emit the insn to set the ultimate destination. We know that
1652 the scratch register has the same mode as the destination at this
1653 point. */
1654 emit_move_insn (operand0, scratch_reg);
1655 return 1;
1657 /* Handle the most common case: storing into a register. */
1658 else if (register_operand (operand0, mode))
1660 if (register_operand (operand1, mode)
1661 || (GET_CODE (operand1) == CONST_INT
1662 && cint_ok_for_move (INTVAL (operand1)))
1663 || (operand1 == CONST0_RTX (mode))
1664 || (GET_CODE (operand1) == HIGH
1665 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1666 /* Only `general_operands' can come here, so MEM is ok. */
1667 || GET_CODE (operand1) == MEM)
1669 /* Various sets are created during RTL generation which don't
1670 have the REG_POINTER flag correctly set. After the CSE pass,
1671 instruction recognition can fail if we don't consistently
1672 set this flag when performing register copies. This should
1673 also improve the opportunities for creating insns that use
1674 unscaled indexing. */
1675 if (REG_P (operand0) && REG_P (operand1))
1677 if (REG_POINTER (operand1)
1678 && !REG_POINTER (operand0)
1679 && !HARD_REGISTER_P (operand0))
1680 copy_reg_pointer (operand0, operand1);
1681 else if (REG_POINTER (operand0)
1682 && !REG_POINTER (operand1)
1683 && !HARD_REGISTER_P (operand1))
1684 copy_reg_pointer (operand1, operand0);
1687 /* When MEMs are broken out, the REG_POINTER flag doesn't
1688 get set. In some cases, we can set the REG_POINTER flag
1689 from the declaration for the MEM. */
1690 if (REG_P (operand0)
1691 && GET_CODE (operand1) == MEM
1692 && !REG_POINTER (operand0))
1694 tree decl = MEM_EXPR (operand1);
1696 /* Set the register pointer flag and register alignment
1697 if the declaration for this memory reference is a
1698 pointer type. Fortran indirect argument references
1699 are ignored. */
1700 if (decl
1701 && !(flag_argument_noalias > 1
1702 && TREE_CODE (decl) == INDIRECT_REF
1703 && TREE_CODE (TREE_OPERAND (decl, 0)) == PARM_DECL))
1705 tree type;
1707 /* If this is a COMPONENT_REF, use the FIELD_DECL from
1708 tree operand 1. */
1709 if (TREE_CODE (decl) == COMPONENT_REF)
1710 decl = TREE_OPERAND (decl, 1);
1712 type = TREE_TYPE (decl);
1713 type = strip_array_types (type);
1715 if (POINTER_TYPE_P (type))
1717 int align;
1719 type = TREE_TYPE (type);
1720 /* Using TYPE_ALIGN_OK is rather conservative as
1721 only the ada frontend actually sets it. */
1722 align = (TYPE_ALIGN_OK (type) ? TYPE_ALIGN (type)
1723 : BITS_PER_UNIT);
1724 mark_reg_pointer (operand0, align);
1729 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1730 return 1;
1733 else if (GET_CODE (operand0) == MEM)
1735 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1736 && !(reload_in_progress || reload_completed))
1738 rtx temp = gen_reg_rtx (DFmode);
1740 emit_insn (gen_rtx_SET (VOIDmode, temp, operand1));
1741 emit_insn (gen_rtx_SET (VOIDmode, operand0, temp));
1742 return 1;
1744 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1746 /* Run this case quickly. */
1747 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1748 return 1;
1750 if (! (reload_in_progress || reload_completed))
1752 operands[0] = validize_mem (operand0);
1753 operands[1] = operand1 = force_reg (mode, operand1);
1757 /* Simplify the source if we need to.
1758 Note we do have to handle function labels here, even though we do
1759 not consider them legitimate constants. Loop optimizations can
1760 call the emit_move_xxx with one as a source. */
1761 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1762 || function_label_operand (operand1, mode)
1763 || (GET_CODE (operand1) == HIGH
1764 && symbolic_operand (XEXP (operand1, 0), mode)))
1766 int ishighonly = 0;
1768 if (GET_CODE (operand1) == HIGH)
1770 ishighonly = 1;
1771 operand1 = XEXP (operand1, 0);
1773 if (symbolic_operand (operand1, mode))
1775 /* Argh. The assembler and linker can't handle arithmetic
1776 involving plabels.
1778 So we force the plabel into memory, load operand0 from
1779 the memory location, then add in the constant part. */
1780 if ((GET_CODE (operand1) == CONST
1781 && GET_CODE (XEXP (operand1, 0)) == PLUS
1782 && function_label_operand (XEXP (XEXP (operand1, 0), 0), Pmode))
1783 || function_label_operand (operand1, mode))
1785 rtx temp, const_part;
1787 /* Figure out what (if any) scratch register to use. */
1788 if (reload_in_progress || reload_completed)
1790 scratch_reg = scratch_reg ? scratch_reg : operand0;
1791 /* SCRATCH_REG will hold an address and maybe the actual
1792 data. We want it in WORD_MODE regardless of what mode it
1793 was originally given to us. */
1794 scratch_reg = force_mode (word_mode, scratch_reg);
1796 else if (flag_pic)
1797 scratch_reg = gen_reg_rtx (Pmode);
1799 if (GET_CODE (operand1) == CONST)
1801 /* Save away the constant part of the expression. */
1802 const_part = XEXP (XEXP (operand1, 0), 1);
1803 gcc_assert (GET_CODE (const_part) == CONST_INT);
1805 /* Force the function label into memory. */
1806 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
1808 else
1810 /* No constant part. */
1811 const_part = NULL_RTX;
1813 /* Force the function label into memory. */
1814 temp = force_const_mem (mode, operand1);
1818 /* Get the address of the memory location. PIC-ify it if
1819 necessary. */
1820 temp = XEXP (temp, 0);
1821 if (flag_pic)
1822 temp = legitimize_pic_address (temp, mode, scratch_reg);
1824 /* Put the address of the memory location into our destination
1825 register. */
1826 operands[1] = temp;
1827 emit_move_sequence (operands, mode, scratch_reg);
1829 /* Now load from the memory location into our destination
1830 register. */
1831 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
1832 emit_move_sequence (operands, mode, scratch_reg);
1834 /* And add back in the constant part. */
1835 if (const_part != NULL_RTX)
1836 expand_inc (operand0, const_part);
1838 return 1;
1841 if (flag_pic)
1843 rtx temp;
1845 if (reload_in_progress || reload_completed)
1847 temp = scratch_reg ? scratch_reg : operand0;
1848 /* TEMP will hold an address and maybe the actual
1849 data. We want it in WORD_MODE regardless of what mode it
1850 was originally given to us. */
1851 temp = force_mode (word_mode, temp);
1853 else
1854 temp = gen_reg_rtx (Pmode);
1856 /* (const (plus (symbol) (const_int))) must be forced to
1857 memory during/after reload if the const_int will not fit
1858 in 14 bits. */
1859 if (GET_CODE (operand1) == CONST
1860 && GET_CODE (XEXP (operand1, 0)) == PLUS
1861 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
1862 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
1863 && (reload_completed || reload_in_progress)
1864 && flag_pic)
1866 rtx const_mem = force_const_mem (mode, operand1);
1867 operands[1] = legitimize_pic_address (XEXP (const_mem, 0),
1868 mode, temp);
1869 operands[1] = replace_equiv_address (const_mem, operands[1]);
1870 emit_move_sequence (operands, mode, temp);
1872 else
1874 operands[1] = legitimize_pic_address (operand1, mode, temp);
1875 if (REG_P (operand0) && REG_P (operands[1]))
1876 copy_reg_pointer (operand0, operands[1]);
1877 emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1]));
1880 /* On the HPPA, references to data space are supposed to use dp,
1881 register 27, but showing it in the RTL inhibits various cse
1882 and loop optimizations. */
1883 else
1885 rtx temp, set;
1887 if (reload_in_progress || reload_completed)
1889 temp = scratch_reg ? scratch_reg : operand0;
1890 /* TEMP will hold an address and maybe the actual
1891 data. We want it in WORD_MODE regardless of what mode it
1892 was originally given to us. */
1893 temp = force_mode (word_mode, temp);
1895 else
1896 temp = gen_reg_rtx (mode);
1898 /* Loading a SYMBOL_REF into a register makes that register
1899 safe to be used as the base in an indexed address.
1901 Don't mark hard registers though. That loses. */
1902 if (GET_CODE (operand0) == REG
1903 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1904 mark_reg_pointer (operand0, BITS_PER_UNIT);
1905 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
1906 mark_reg_pointer (temp, BITS_PER_UNIT);
1908 if (ishighonly)
1909 set = gen_rtx_SET (mode, operand0, temp);
1910 else
1911 set = gen_rtx_SET (VOIDmode,
1912 operand0,
1913 gen_rtx_LO_SUM (mode, temp, operand1));
1915 emit_insn (gen_rtx_SET (VOIDmode,
1916 temp,
1917 gen_rtx_HIGH (mode, operand1)));
1918 emit_insn (set);
1921 return 1;
1923 else if (pa_tls_referenced_p (operand1))
1925 rtx tmp = operand1;
1926 rtx addend = NULL;
1928 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
1930 addend = XEXP (XEXP (tmp, 0), 1);
1931 tmp = XEXP (XEXP (tmp, 0), 0);
1934 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
1935 tmp = legitimize_tls_address (tmp);
1936 if (addend)
1938 tmp = gen_rtx_PLUS (mode, tmp, addend);
1939 tmp = force_operand (tmp, operands[0]);
1941 operands[1] = tmp;
1943 else if (GET_CODE (operand1) != CONST_INT
1944 || !cint_ok_for_move (INTVAL (operand1)))
1946 rtx insn, temp;
1947 rtx op1 = operand1;
1948 HOST_WIDE_INT value = 0;
1949 HOST_WIDE_INT insv = 0;
1950 int insert = 0;
1952 if (GET_CODE (operand1) == CONST_INT)
1953 value = INTVAL (operand1);
1955 if (TARGET_64BIT
1956 && GET_CODE (operand1) == CONST_INT
1957 && HOST_BITS_PER_WIDE_INT > 32
1958 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
1960 HOST_WIDE_INT nval;
1962 /* Extract the low order 32 bits of the value and sign extend.
1963 If the new value is the same as the original value, we can
1964 can use the original value as-is. If the new value is
1965 different, we use it and insert the most-significant 32-bits
1966 of the original value into the final result. */
1967 nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
1968 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
1969 if (value != nval)
1971 #if HOST_BITS_PER_WIDE_INT > 32
1972 insv = value >= 0 ? value >> 32 : ~(~value >> 32);
1973 #endif
1974 insert = 1;
1975 value = nval;
1976 operand1 = GEN_INT (nval);
1980 if (reload_in_progress || reload_completed)
1981 temp = scratch_reg ? scratch_reg : operand0;
1982 else
1983 temp = gen_reg_rtx (mode);
1985 /* We don't directly split DImode constants on 32-bit targets
1986 because PLUS uses an 11-bit immediate and the insn sequence
1987 generated is not as efficient as the one using HIGH/LO_SUM. */
1988 if (GET_CODE (operand1) == CONST_INT
1989 && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
1990 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
1991 && !insert)
1993 /* Directly break constant into high and low parts. This
1994 provides better optimization opportunities because various
1995 passes recognize constants split with PLUS but not LO_SUM.
1996 We use a 14-bit signed low part except when the addition
1997 of 0x4000 to the high part might change the sign of the
1998 high part. */
1999 HOST_WIDE_INT low = value & 0x3fff;
2000 HOST_WIDE_INT high = value & ~ 0x3fff;
2002 if (low >= 0x2000)
2004 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
2005 high += 0x2000;
2006 else
2007 high += 0x4000;
2010 low = value - high;
2012 emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (high)));
2013 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
2015 else
2017 emit_insn (gen_rtx_SET (VOIDmode, temp,
2018 gen_rtx_HIGH (mode, operand1)));
2019 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
2022 insn = emit_move_insn (operands[0], operands[1]);
2024 /* Now insert the most significant 32 bits of the value
2025 into the register. When we don't have a second register
2026 available, it could take up to nine instructions to load
2027 a 64-bit integer constant. Prior to reload, we force
2028 constants that would take more than three instructions
2029 to load to the constant pool. During and after reload,
2030 we have to handle all possible values. */
2031 if (insert)
2033 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2034 register and the value to be inserted is outside the
2035 range that can be loaded with three depdi instructions. */
2036 if (temp != operand0 && (insv >= 16384 || insv < -16384))
2038 operand1 = GEN_INT (insv);
2040 emit_insn (gen_rtx_SET (VOIDmode, temp,
2041 gen_rtx_HIGH (mode, operand1)));
2042 emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
2043 emit_insn (gen_insv (operand0, GEN_INT (32),
2044 const0_rtx, temp));
2046 else
2048 int len = 5, pos = 27;
2050 /* Insert the bits using the depdi instruction. */
2051 while (pos >= 0)
2053 HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2054 HOST_WIDE_INT sign = v5 < 0;
2056 /* Left extend the insertion. */
2057 insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2058 while (pos > 0 && (insv & 1) == sign)
2060 insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2061 len += 1;
2062 pos -= 1;
2065 emit_insn (gen_insv (operand0, GEN_INT (len),
2066 GEN_INT (pos), GEN_INT (v5)));
2068 len = pos > 0 && pos < 5 ? pos : 5;
2069 pos -= len;
2074 set_unique_reg_note (insn, REG_EQUAL, op1);
2076 return 1;
2079 /* Now have insn-emit do whatever it normally does. */
2080 return 0;
2083 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2084 it will need a link/runtime reloc). */
2087 reloc_needed (tree exp)
2089 int reloc = 0;
2091 switch (TREE_CODE (exp))
2093 case ADDR_EXPR:
2094 return 1;
2096 case POINTER_PLUS_EXPR:
2097 case PLUS_EXPR:
2098 case MINUS_EXPR:
2099 reloc = reloc_needed (TREE_OPERAND (exp, 0));
2100 reloc |= reloc_needed (TREE_OPERAND (exp, 1));
2101 break;
2103 CASE_CONVERT:
2104 case NON_LVALUE_EXPR:
2105 reloc = reloc_needed (TREE_OPERAND (exp, 0));
2106 break;
2108 case CONSTRUCTOR:
2110 tree value;
2111 unsigned HOST_WIDE_INT ix;
2113 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2114 if (value)
2115 reloc |= reloc_needed (value);
2117 break;
2119 case ERROR_MARK:
2120 break;
2122 default:
2123 break;
2125 return reloc;
2128 /* Does operand (which is a symbolic_operand) live in text space?
2129 If so, SYMBOL_REF_FLAG, which is set by pa_encode_section_info,
2130 will be true. */
2133 read_only_operand (rtx operand, enum machine_mode mode ATTRIBUTE_UNUSED)
2135 if (GET_CODE (operand) == CONST)
2136 operand = XEXP (XEXP (operand, 0), 0);
2137 if (flag_pic)
2139 if (GET_CODE (operand) == SYMBOL_REF)
2140 return SYMBOL_REF_FLAG (operand) && !CONSTANT_POOL_ADDRESS_P (operand);
2142 else
2144 if (GET_CODE (operand) == SYMBOL_REF)
2145 return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand);
2147 return 1;
2151 /* Return the best assembler insn template
2152 for moving operands[1] into operands[0] as a fullword. */
2153 const char *
2154 singlemove_string (rtx *operands)
2156 HOST_WIDE_INT intval;
2158 if (GET_CODE (operands[0]) == MEM)
2159 return "stw %r1,%0";
2160 if (GET_CODE (operands[1]) == MEM)
2161 return "ldw %1,%0";
2162 if (GET_CODE (operands[1]) == CONST_DOUBLE)
2164 long i;
2165 REAL_VALUE_TYPE d;
2167 gcc_assert (GET_MODE (operands[1]) == SFmode);
2169 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2170 bit pattern. */
2171 REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
2172 REAL_VALUE_TO_TARGET_SINGLE (d, i);
2174 operands[1] = GEN_INT (i);
2175 /* Fall through to CONST_INT case. */
2177 if (GET_CODE (operands[1]) == CONST_INT)
2179 intval = INTVAL (operands[1]);
2181 if (VAL_14_BITS_P (intval))
2182 return "ldi %1,%0";
2183 else if ((intval & 0x7ff) == 0)
2184 return "ldil L'%1,%0";
2185 else if (zdepi_cint_p (intval))
2186 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2187 else
2188 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2190 return "copy %1,%0";
2194 /* Compute position (in OP[1]) and width (in OP[2])
2195 useful for copying IMM to a register using the zdepi
2196 instructions. Store the immediate value to insert in OP[0]. */
2197 static void
2198 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2200 int lsb, len;
2202 /* Find the least significant set bit in IMM. */
2203 for (lsb = 0; lsb < 32; lsb++)
2205 if ((imm & 1) != 0)
2206 break;
2207 imm >>= 1;
2210 /* Choose variants based on *sign* of the 5-bit field. */
2211 if ((imm & 0x10) == 0)
2212 len = (lsb <= 28) ? 4 : 32 - lsb;
2213 else
2215 /* Find the width of the bitstring in IMM. */
2216 for (len = 5; len < 32; len++)
2218 if ((imm & (1 << len)) == 0)
2219 break;
2222 /* Sign extend IMM as a 5-bit value. */
2223 imm = (imm & 0xf) - 0x10;
2226 op[0] = imm;
2227 op[1] = 31 - lsb;
2228 op[2] = len;
2231 /* Compute position (in OP[1]) and width (in OP[2])
2232 useful for copying IMM to a register using the depdi,z
2233 instructions. Store the immediate value to insert in OP[0]. */
2234 void
2235 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2237 HOST_WIDE_INT lsb, len;
2239 /* Find the least significant set bit in IMM. */
2240 for (lsb = 0; lsb < HOST_BITS_PER_WIDE_INT; lsb++)
2242 if ((imm & 1) != 0)
2243 break;
2244 imm >>= 1;
2247 /* Choose variants based on *sign* of the 5-bit field. */
2248 if ((imm & 0x10) == 0)
2249 len = ((lsb <= HOST_BITS_PER_WIDE_INT - 4)
2250 ? 4 : HOST_BITS_PER_WIDE_INT - lsb);
2251 else
2253 /* Find the width of the bitstring in IMM. */
2254 for (len = 5; len < HOST_BITS_PER_WIDE_INT; len++)
2256 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2257 break;
2260 /* Sign extend IMM as a 5-bit value. */
2261 imm = (imm & 0xf) - 0x10;
2264 op[0] = imm;
2265 op[1] = 63 - lsb;
2266 op[2] = len;
2269 /* Output assembler code to perform a doubleword move insn
2270 with operands OPERANDS. */
2272 const char *
2273 output_move_double (rtx *operands)
2275 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2276 rtx latehalf[2];
2277 rtx addreg0 = 0, addreg1 = 0;
2279 /* First classify both operands. */
2281 if (REG_P (operands[0]))
2282 optype0 = REGOP;
2283 else if (offsettable_memref_p (operands[0]))
2284 optype0 = OFFSOP;
2285 else if (GET_CODE (operands[0]) == MEM)
2286 optype0 = MEMOP;
2287 else
2288 optype0 = RNDOP;
2290 if (REG_P (operands[1]))
2291 optype1 = REGOP;
2292 else if (CONSTANT_P (operands[1]))
2293 optype1 = CNSTOP;
2294 else if (offsettable_memref_p (operands[1]))
2295 optype1 = OFFSOP;
2296 else if (GET_CODE (operands[1]) == MEM)
2297 optype1 = MEMOP;
2298 else
2299 optype1 = RNDOP;
2301 /* Check for the cases that the operand constraints are not
2302 supposed to allow to happen. */
2303 gcc_assert (optype0 == REGOP || optype1 == REGOP);
2305 /* Handle copies between general and floating registers. */
2307 if (optype0 == REGOP && optype1 == REGOP
2308 && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2310 if (FP_REG_P (operands[0]))
2312 output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2313 output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2314 return "{fldds|fldd} -16(%%sp),%0";
2316 else
2318 output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2319 output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2320 return "{ldws|ldw} -12(%%sp),%R0";
2324 /* Handle auto decrementing and incrementing loads and stores
2325 specifically, since the structure of the function doesn't work
2326 for them without major modification. Do it better when we learn
2327 this port about the general inc/dec addressing of PA.
2328 (This was written by tege. Chide him if it doesn't work.) */
2330 if (optype0 == MEMOP)
2332 /* We have to output the address syntax ourselves, since print_operand
2333 doesn't deal with the addresses we want to use. Fix this later. */
2335 rtx addr = XEXP (operands[0], 0);
2336 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2338 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2340 operands[0] = XEXP (addr, 0);
2341 gcc_assert (GET_CODE (operands[1]) == REG
2342 && GET_CODE (operands[0]) == REG);
2344 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2346 /* No overlap between high target register and address
2347 register. (We do this in a non-obvious way to
2348 save a register file writeback) */
2349 if (GET_CODE (addr) == POST_INC)
2350 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2351 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2353 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2355 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2357 operands[0] = XEXP (addr, 0);
2358 gcc_assert (GET_CODE (operands[1]) == REG
2359 && GET_CODE (operands[0]) == REG);
2361 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2362 /* No overlap between high target register and address
2363 register. (We do this in a non-obvious way to save a
2364 register file writeback) */
2365 if (GET_CODE (addr) == PRE_INC)
2366 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2367 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2370 if (optype1 == MEMOP)
2372 /* We have to output the address syntax ourselves, since print_operand
2373 doesn't deal with the addresses we want to use. Fix this later. */
2375 rtx addr = XEXP (operands[1], 0);
2376 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2378 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2380 operands[1] = XEXP (addr, 0);
2381 gcc_assert (GET_CODE (operands[0]) == REG
2382 && GET_CODE (operands[1]) == REG);
2384 if (!reg_overlap_mentioned_p (high_reg, addr))
2386 /* No overlap between high target register and address
2387 register. (We do this in a non-obvious way to
2388 save a register file writeback) */
2389 if (GET_CODE (addr) == POST_INC)
2390 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2391 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2393 else
2395 /* This is an undefined situation. We should load into the
2396 address register *and* update that register. Probably
2397 we don't need to handle this at all. */
2398 if (GET_CODE (addr) == POST_INC)
2399 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2400 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2403 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2405 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2407 operands[1] = XEXP (addr, 0);
2408 gcc_assert (GET_CODE (operands[0]) == REG
2409 && GET_CODE (operands[1]) == REG);
2411 if (!reg_overlap_mentioned_p (high_reg, addr))
2413 /* No overlap between high target register and address
2414 register. (We do this in a non-obvious way to
2415 save a register file writeback) */
2416 if (GET_CODE (addr) == PRE_INC)
2417 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2418 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2420 else
2422 /* This is an undefined situation. We should load into the
2423 address register *and* update that register. Probably
2424 we don't need to handle this at all. */
2425 if (GET_CODE (addr) == PRE_INC)
2426 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2427 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2430 else if (GET_CODE (addr) == PLUS
2431 && GET_CODE (XEXP (addr, 0)) == MULT)
2433 rtx xoperands[4];
2434 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2436 if (!reg_overlap_mentioned_p (high_reg, addr))
2438 xoperands[0] = high_reg;
2439 xoperands[1] = XEXP (addr, 1);
2440 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2441 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2442 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2443 xoperands);
2444 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2446 else
2448 xoperands[0] = high_reg;
2449 xoperands[1] = XEXP (addr, 1);
2450 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2451 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2452 output_asm_insn ("{sh%O3addl %2,%1,%R0|shladd,l %2,%O3,%1,%R0}",
2453 xoperands);
2454 return "ldw 0(%R0),%0\n\tldw 4(%R0),%R0";
2459 /* If an operand is an unoffsettable memory ref, find a register
2460 we can increment temporarily to make it refer to the second word. */
2462 if (optype0 == MEMOP)
2463 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2465 if (optype1 == MEMOP)
2466 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2468 /* Ok, we can do one word at a time.
2469 Normally we do the low-numbered word first.
2471 In either case, set up in LATEHALF the operands to use
2472 for the high-numbered word and in some cases alter the
2473 operands in OPERANDS to be suitable for the low-numbered word. */
2475 if (optype0 == REGOP)
2476 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2477 else if (optype0 == OFFSOP)
2478 latehalf[0] = adjust_address (operands[0], SImode, 4);
2479 else
2480 latehalf[0] = operands[0];
2482 if (optype1 == REGOP)
2483 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2484 else if (optype1 == OFFSOP)
2485 latehalf[1] = adjust_address (operands[1], SImode, 4);
2486 else if (optype1 == CNSTOP)
2487 split_double (operands[1], &operands[1], &latehalf[1]);
2488 else
2489 latehalf[1] = operands[1];
2491 /* If the first move would clobber the source of the second one,
2492 do them in the other order.
2494 This can happen in two cases:
2496 mem -> register where the first half of the destination register
2497 is the same register used in the memory's address. Reload
2498 can create such insns.
2500 mem in this case will be either register indirect or register
2501 indirect plus a valid offset.
2503 register -> register move where REGNO(dst) == REGNO(src + 1)
2504 someone (Tim/Tege?) claimed this can happen for parameter loads.
2506 Handle mem -> register case first. */
2507 if (optype0 == REGOP
2508 && (optype1 == MEMOP || optype1 == OFFSOP)
2509 && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
2510 operands[1], 0))
2512 /* Do the late half first. */
2513 if (addreg1)
2514 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2515 output_asm_insn (singlemove_string (latehalf), latehalf);
2517 /* Then clobber. */
2518 if (addreg1)
2519 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2520 return singlemove_string (operands);
2523 /* Now handle register -> register case. */
2524 if (optype0 == REGOP && optype1 == REGOP
2525 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2527 output_asm_insn (singlemove_string (latehalf), latehalf);
2528 return singlemove_string (operands);
2531 /* Normal case: do the two words, low-numbered first. */
2533 output_asm_insn (singlemove_string (operands), operands);
2535 /* Make any unoffsettable addresses point at high-numbered word. */
2536 if (addreg0)
2537 output_asm_insn ("ldo 4(%0),%0", &addreg0);
2538 if (addreg1)
2539 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2541 /* Do that word. */
2542 output_asm_insn (singlemove_string (latehalf), latehalf);
2544 /* Undo the adds we just did. */
2545 if (addreg0)
2546 output_asm_insn ("ldo -4(%0),%0", &addreg0);
2547 if (addreg1)
2548 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2550 return "";
2553 const char *
2554 output_fp_move_double (rtx *operands)
2556 if (FP_REG_P (operands[0]))
2558 if (FP_REG_P (operands[1])
2559 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2560 output_asm_insn ("fcpy,dbl %f1,%0", operands);
2561 else
2562 output_asm_insn ("fldd%F1 %1,%0", operands);
2564 else if (FP_REG_P (operands[1]))
2566 output_asm_insn ("fstd%F0 %1,%0", operands);
2568 else
2570 rtx xoperands[2];
2572 gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2574 /* This is a pain. You have to be prepared to deal with an
2575 arbitrary address here including pre/post increment/decrement.
2577 so avoid this in the MD. */
2578 gcc_assert (GET_CODE (operands[0]) == REG);
2580 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2581 xoperands[0] = operands[0];
2582 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2584 return "";
2587 /* Return a REG that occurs in ADDR with coefficient 1.
2588 ADDR can be effectively incremented by incrementing REG. */
2590 static rtx
2591 find_addr_reg (rtx addr)
2593 while (GET_CODE (addr) == PLUS)
2595 if (GET_CODE (XEXP (addr, 0)) == REG)
2596 addr = XEXP (addr, 0);
2597 else if (GET_CODE (XEXP (addr, 1)) == REG)
2598 addr = XEXP (addr, 1);
2599 else if (CONSTANT_P (XEXP (addr, 0)))
2600 addr = XEXP (addr, 1);
2601 else if (CONSTANT_P (XEXP (addr, 1)))
2602 addr = XEXP (addr, 0);
2603 else
2604 gcc_unreachable ();
2606 gcc_assert (GET_CODE (addr) == REG);
2607 return addr;
2610 /* Emit code to perform a block move.
2612 OPERANDS[0] is the destination pointer as a REG, clobbered.
2613 OPERANDS[1] is the source pointer as a REG, clobbered.
2614 OPERANDS[2] is a register for temporary storage.
2615 OPERANDS[3] is a register for temporary storage.
2616 OPERANDS[4] is the size as a CONST_INT
2617 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2618 OPERANDS[6] is another temporary register. */
2620 const char *
2621 output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2623 int align = INTVAL (operands[5]);
2624 unsigned long n_bytes = INTVAL (operands[4]);
2626 /* We can't move more than a word at a time because the PA
2627 has no longer integer move insns. (Could use fp mem ops?) */
2628 if (align > (TARGET_64BIT ? 8 : 4))
2629 align = (TARGET_64BIT ? 8 : 4);
2631 /* Note that we know each loop below will execute at least twice
2632 (else we would have open-coded the copy). */
2633 switch (align)
2635 case 8:
2636 /* Pre-adjust the loop counter. */
2637 operands[4] = GEN_INT (n_bytes - 16);
2638 output_asm_insn ("ldi %4,%2", operands);
2640 /* Copying loop. */
2641 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2642 output_asm_insn ("ldd,ma 8(%1),%6", operands);
2643 output_asm_insn ("std,ma %3,8(%0)", operands);
2644 output_asm_insn ("addib,>= -16,%2,.-12", operands);
2645 output_asm_insn ("std,ma %6,8(%0)", operands);
2647 /* Handle the residual. There could be up to 7 bytes of
2648 residual to copy! */
2649 if (n_bytes % 16 != 0)
2651 operands[4] = GEN_INT (n_bytes % 8);
2652 if (n_bytes % 16 >= 8)
2653 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2654 if (n_bytes % 8 != 0)
2655 output_asm_insn ("ldd 0(%1),%6", operands);
2656 if (n_bytes % 16 >= 8)
2657 output_asm_insn ("std,ma %3,8(%0)", operands);
2658 if (n_bytes % 8 != 0)
2659 output_asm_insn ("stdby,e %6,%4(%0)", operands);
2661 return "";
2663 case 4:
2664 /* Pre-adjust the loop counter. */
2665 operands[4] = GEN_INT (n_bytes - 8);
2666 output_asm_insn ("ldi %4,%2", operands);
2668 /* Copying loop. */
2669 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2670 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2671 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2672 output_asm_insn ("addib,>= -8,%2,.-12", operands);
2673 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2675 /* Handle the residual. There could be up to 7 bytes of
2676 residual to copy! */
2677 if (n_bytes % 8 != 0)
2679 operands[4] = GEN_INT (n_bytes % 4);
2680 if (n_bytes % 8 >= 4)
2681 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2682 if (n_bytes % 4 != 0)
2683 output_asm_insn ("ldw 0(%1),%6", operands);
2684 if (n_bytes % 8 >= 4)
2685 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2686 if (n_bytes % 4 != 0)
2687 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2689 return "";
2691 case 2:
2692 /* Pre-adjust the loop counter. */
2693 operands[4] = GEN_INT (n_bytes - 4);
2694 output_asm_insn ("ldi %4,%2", operands);
2696 /* Copying loop. */
2697 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2698 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2699 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2700 output_asm_insn ("addib,>= -4,%2,.-12", operands);
2701 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2703 /* Handle the residual. */
2704 if (n_bytes % 4 != 0)
2706 if (n_bytes % 4 >= 2)
2707 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2708 if (n_bytes % 2 != 0)
2709 output_asm_insn ("ldb 0(%1),%6", operands);
2710 if (n_bytes % 4 >= 2)
2711 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2712 if (n_bytes % 2 != 0)
2713 output_asm_insn ("stb %6,0(%0)", operands);
2715 return "";
2717 case 1:
2718 /* Pre-adjust the loop counter. */
2719 operands[4] = GEN_INT (n_bytes - 2);
2720 output_asm_insn ("ldi %4,%2", operands);
2722 /* Copying loop. */
2723 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2724 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2725 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2726 output_asm_insn ("addib,>= -2,%2,.-12", operands);
2727 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2729 /* Handle the residual. */
2730 if (n_bytes % 2 != 0)
2732 output_asm_insn ("ldb 0(%1),%3", operands);
2733 output_asm_insn ("stb %3,0(%0)", operands);
2735 return "";
2737 default:
2738 gcc_unreachable ();
2742 /* Count the number of insns necessary to handle this block move.
2744 Basic structure is the same as emit_block_move, except that we
2745 count insns rather than emit them. */
2747 static int
2748 compute_movmem_length (rtx insn)
2750 rtx pat = PATTERN (insn);
2751 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2752 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2753 unsigned int n_insns = 0;
2755 /* We can't move more than four bytes at a time because the PA
2756 has no longer integer move insns. (Could use fp mem ops?) */
2757 if (align > (TARGET_64BIT ? 8 : 4))
2758 align = (TARGET_64BIT ? 8 : 4);
2760 /* The basic copying loop. */
2761 n_insns = 6;
2763 /* Residuals. */
2764 if (n_bytes % (2 * align) != 0)
2766 if ((n_bytes % (2 * align)) >= align)
2767 n_insns += 2;
2769 if ((n_bytes % align) != 0)
2770 n_insns += 2;
2773 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2774 return n_insns * 4;
2777 /* Emit code to perform a block clear.
2779 OPERANDS[0] is the destination pointer as a REG, clobbered.
2780 OPERANDS[1] is a register for temporary storage.
2781 OPERANDS[2] is the size as a CONST_INT
2782 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
2784 const char *
2785 output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2787 int align = INTVAL (operands[3]);
2788 unsigned long n_bytes = INTVAL (operands[2]);
2790 /* We can't clear more than a word at a time because the PA
2791 has no longer integer move insns. */
2792 if (align > (TARGET_64BIT ? 8 : 4))
2793 align = (TARGET_64BIT ? 8 : 4);
2795 /* Note that we know each loop below will execute at least twice
2796 (else we would have open-coded the copy). */
2797 switch (align)
2799 case 8:
2800 /* Pre-adjust the loop counter. */
2801 operands[2] = GEN_INT (n_bytes - 16);
2802 output_asm_insn ("ldi %2,%1", operands);
2804 /* Loop. */
2805 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2806 output_asm_insn ("addib,>= -16,%1,.-4", operands);
2807 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2809 /* Handle the residual. There could be up to 7 bytes of
2810 residual to copy! */
2811 if (n_bytes % 16 != 0)
2813 operands[2] = GEN_INT (n_bytes % 8);
2814 if (n_bytes % 16 >= 8)
2815 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2816 if (n_bytes % 8 != 0)
2817 output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
2819 return "";
2821 case 4:
2822 /* Pre-adjust the loop counter. */
2823 operands[2] = GEN_INT (n_bytes - 8);
2824 output_asm_insn ("ldi %2,%1", operands);
2826 /* Loop. */
2827 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2828 output_asm_insn ("addib,>= -8,%1,.-4", operands);
2829 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2831 /* Handle the residual. There could be up to 7 bytes of
2832 residual to copy! */
2833 if (n_bytes % 8 != 0)
2835 operands[2] = GEN_INT (n_bytes % 4);
2836 if (n_bytes % 8 >= 4)
2837 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2838 if (n_bytes % 4 != 0)
2839 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
2841 return "";
2843 case 2:
2844 /* Pre-adjust the loop counter. */
2845 operands[2] = GEN_INT (n_bytes - 4);
2846 output_asm_insn ("ldi %2,%1", operands);
2848 /* Loop. */
2849 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2850 output_asm_insn ("addib,>= -4,%1,.-4", operands);
2851 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2853 /* Handle the residual. */
2854 if (n_bytes % 4 != 0)
2856 if (n_bytes % 4 >= 2)
2857 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2858 if (n_bytes % 2 != 0)
2859 output_asm_insn ("stb %%r0,0(%0)", operands);
2861 return "";
2863 case 1:
2864 /* Pre-adjust the loop counter. */
2865 operands[2] = GEN_INT (n_bytes - 2);
2866 output_asm_insn ("ldi %2,%1", operands);
2868 /* Loop. */
2869 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2870 output_asm_insn ("addib,>= -2,%1,.-4", operands);
2871 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2873 /* Handle the residual. */
2874 if (n_bytes % 2 != 0)
2875 output_asm_insn ("stb %%r0,0(%0)", operands);
2877 return "";
2879 default:
2880 gcc_unreachable ();
2884 /* Count the number of insns necessary to handle this block move.
2886 Basic structure is the same as emit_block_move, except that we
2887 count insns rather than emit them. */
2889 static int
2890 compute_clrmem_length (rtx insn)
2892 rtx pat = PATTERN (insn);
2893 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
2894 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
2895 unsigned int n_insns = 0;
2897 /* We can't clear more than a word at a time because the PA
2898 has no longer integer move insns. */
2899 if (align > (TARGET_64BIT ? 8 : 4))
2900 align = (TARGET_64BIT ? 8 : 4);
2902 /* The basic loop. */
2903 n_insns = 4;
2905 /* Residuals. */
2906 if (n_bytes % (2 * align) != 0)
2908 if ((n_bytes % (2 * align)) >= align)
2909 n_insns++;
2911 if ((n_bytes % align) != 0)
2912 n_insns++;
2915 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2916 return n_insns * 4;
2920 const char *
2921 output_and (rtx *operands)
2923 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2925 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2926 int ls0, ls1, ms0, p, len;
2928 for (ls0 = 0; ls0 < 32; ls0++)
2929 if ((mask & (1 << ls0)) == 0)
2930 break;
2932 for (ls1 = ls0; ls1 < 32; ls1++)
2933 if ((mask & (1 << ls1)) != 0)
2934 break;
2936 for (ms0 = ls1; ms0 < 32; ms0++)
2937 if ((mask & (1 << ms0)) == 0)
2938 break;
2940 gcc_assert (ms0 == 32);
2942 if (ls1 == 32)
2944 len = ls0;
2946 gcc_assert (len);
2948 operands[2] = GEN_INT (len);
2949 return "{extru|extrw,u} %1,31,%2,%0";
2951 else
2953 /* We could use this `depi' for the case above as well, but `depi'
2954 requires one more register file access than an `extru'. */
2956 p = 31 - ls0;
2957 len = ls1 - ls0;
2959 operands[2] = GEN_INT (p);
2960 operands[3] = GEN_INT (len);
2961 return "{depi|depwi} 0,%2,%3,%0";
2964 else
2965 return "and %1,%2,%0";
2968 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
2969 storing the result in operands[0]. */
2970 const char *
2971 output_64bit_and (rtx *operands)
2973 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2975 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2976 int ls0, ls1, ms0, p, len;
2978 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
2979 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
2980 break;
2982 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
2983 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
2984 break;
2986 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
2987 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
2988 break;
2990 gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
2992 if (ls1 == HOST_BITS_PER_WIDE_INT)
2994 len = ls0;
2996 gcc_assert (len);
2998 operands[2] = GEN_INT (len);
2999 return "extrd,u %1,63,%2,%0";
3001 else
3003 /* We could use this `depi' for the case above as well, but `depi'
3004 requires one more register file access than an `extru'. */
3006 p = 63 - ls0;
3007 len = ls1 - ls0;
3009 operands[2] = GEN_INT (p);
3010 operands[3] = GEN_INT (len);
3011 return "depdi 0,%2,%3,%0";
3014 else
3015 return "and %1,%2,%0";
3018 const char *
3019 output_ior (rtx *operands)
3021 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3022 int bs0, bs1, p, len;
3024 if (INTVAL (operands[2]) == 0)
3025 return "copy %1,%0";
3027 for (bs0 = 0; bs0 < 32; bs0++)
3028 if ((mask & (1 << bs0)) != 0)
3029 break;
3031 for (bs1 = bs0; bs1 < 32; bs1++)
3032 if ((mask & (1 << bs1)) == 0)
3033 break;
3035 gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3037 p = 31 - bs0;
3038 len = bs1 - bs0;
3040 operands[2] = GEN_INT (p);
3041 operands[3] = GEN_INT (len);
3042 return "{depi|depwi} -1,%2,%3,%0";
3045 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3046 storing the result in operands[0]. */
3047 const char *
3048 output_64bit_ior (rtx *operands)
3050 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3051 int bs0, bs1, p, len;
3053 if (INTVAL (operands[2]) == 0)
3054 return "copy %1,%0";
3056 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
3057 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
3058 break;
3060 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
3061 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
3062 break;
3064 gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
3065 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3067 p = 63 - bs0;
3068 len = bs1 - bs0;
3070 operands[2] = GEN_INT (p);
3071 operands[3] = GEN_INT (len);
3072 return "depdi -1,%2,%3,%0";
3075 /* Target hook for assembling integer objects. This code handles
3076 aligned SI and DI integers specially since function references
3077 must be preceded by P%. */
3079 static bool
3080 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
3082 if (size == UNITS_PER_WORD
3083 && aligned_p
3084 && function_label_operand (x, VOIDmode))
3086 fputs (size == 8? "\t.dword\tP%" : "\t.word\tP%", asm_out_file);
3087 output_addr_const (asm_out_file, x);
3088 fputc ('\n', asm_out_file);
3089 return true;
3091 return default_assemble_integer (x, size, aligned_p);
3094 /* Output an ascii string. */
3095 void
3096 output_ascii (FILE *file, const char *p, int size)
3098 int i;
3099 int chars_output;
3100 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
3102 /* The HP assembler can only take strings of 256 characters at one
3103 time. This is a limitation on input line length, *not* the
3104 length of the string. Sigh. Even worse, it seems that the
3105 restriction is in number of input characters (see \xnn &
3106 \whatever). So we have to do this very carefully. */
3108 fputs ("\t.STRING \"", file);
3110 chars_output = 0;
3111 for (i = 0; i < size; i += 4)
3113 int co = 0;
3114 int io = 0;
3115 for (io = 0, co = 0; io < MIN (4, size - i); io++)
3117 register unsigned int c = (unsigned char) p[i + io];
3119 if (c == '\"' || c == '\\')
3120 partial_output[co++] = '\\';
3121 if (c >= ' ' && c < 0177)
3122 partial_output[co++] = c;
3123 else
3125 unsigned int hexd;
3126 partial_output[co++] = '\\';
3127 partial_output[co++] = 'x';
3128 hexd = c / 16 - 0 + '0';
3129 if (hexd > '9')
3130 hexd -= '9' - 'a' + 1;
3131 partial_output[co++] = hexd;
3132 hexd = c % 16 - 0 + '0';
3133 if (hexd > '9')
3134 hexd -= '9' - 'a' + 1;
3135 partial_output[co++] = hexd;
3138 if (chars_output + co > 243)
3140 fputs ("\"\n\t.STRING \"", file);
3141 chars_output = 0;
3143 fwrite (partial_output, 1, (size_t) co, file);
3144 chars_output += co;
3145 co = 0;
3147 fputs ("\"\n", file);
3150 /* Try to rewrite floating point comparisons & branches to avoid
3151 useless add,tr insns.
3153 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3154 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3155 first attempt to remove useless add,tr insns. It is zero
3156 for the second pass as reorg sometimes leaves bogus REG_DEAD
3157 notes lying around.
3159 When CHECK_NOTES is zero we can only eliminate add,tr insns
3160 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3161 instructions. */
3162 static void
3163 remove_useless_addtr_insns (int check_notes)
3165 rtx insn;
3166 static int pass = 0;
3168 /* This is fairly cheap, so always run it when optimizing. */
3169 if (optimize > 0)
3171 int fcmp_count = 0;
3172 int fbranch_count = 0;
3174 /* Walk all the insns in this function looking for fcmp & fbranch
3175 instructions. Keep track of how many of each we find. */
3176 for (insn = get_insns (); insn; insn = next_insn (insn))
3178 rtx tmp;
3180 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
3181 if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
3182 continue;
3184 tmp = PATTERN (insn);
3186 /* It must be a set. */
3187 if (GET_CODE (tmp) != SET)
3188 continue;
3190 /* If the destination is CCFP, then we've found an fcmp insn. */
3191 tmp = SET_DEST (tmp);
3192 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3194 fcmp_count++;
3195 continue;
3198 tmp = PATTERN (insn);
3199 /* If this is an fbranch instruction, bump the fbranch counter. */
3200 if (GET_CODE (tmp) == SET
3201 && SET_DEST (tmp) == pc_rtx
3202 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3203 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3204 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3205 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3207 fbranch_count++;
3208 continue;
3213 /* Find all floating point compare + branch insns. If possible,
3214 reverse the comparison & the branch to avoid add,tr insns. */
3215 for (insn = get_insns (); insn; insn = next_insn (insn))
3217 rtx tmp, next;
3219 /* Ignore anything that isn't an INSN. */
3220 if (GET_CODE (insn) != INSN)
3221 continue;
3223 tmp = PATTERN (insn);
3225 /* It must be a set. */
3226 if (GET_CODE (tmp) != SET)
3227 continue;
3229 /* The destination must be CCFP, which is register zero. */
3230 tmp = SET_DEST (tmp);
3231 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3232 continue;
3234 /* INSN should be a set of CCFP.
3236 See if the result of this insn is used in a reversed FP
3237 conditional branch. If so, reverse our condition and
3238 the branch. Doing so avoids useless add,tr insns. */
3239 next = next_insn (insn);
3240 while (next)
3242 /* Jumps, calls and labels stop our search. */
3243 if (GET_CODE (next) == JUMP_INSN
3244 || GET_CODE (next) == CALL_INSN
3245 || GET_CODE (next) == CODE_LABEL)
3246 break;
3248 /* As does another fcmp insn. */
3249 if (GET_CODE (next) == INSN
3250 && GET_CODE (PATTERN (next)) == SET
3251 && GET_CODE (SET_DEST (PATTERN (next))) == REG
3252 && REGNO (SET_DEST (PATTERN (next))) == 0)
3253 break;
3255 next = next_insn (next);
3258 /* Is NEXT_INSN a branch? */
3259 if (next
3260 && GET_CODE (next) == JUMP_INSN)
3262 rtx pattern = PATTERN (next);
3264 /* If it a reversed fp conditional branch (e.g. uses add,tr)
3265 and CCFP dies, then reverse our conditional and the branch
3266 to avoid the add,tr. */
3267 if (GET_CODE (pattern) == SET
3268 && SET_DEST (pattern) == pc_rtx
3269 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3270 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3271 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3272 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3273 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3274 && (fcmp_count == fbranch_count
3275 || (check_notes
3276 && find_regno_note (next, REG_DEAD, 0))))
3278 /* Reverse the branch. */
3279 tmp = XEXP (SET_SRC (pattern), 1);
3280 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3281 XEXP (SET_SRC (pattern), 2) = tmp;
3282 INSN_CODE (next) = -1;
3284 /* Reverse our condition. */
3285 tmp = PATTERN (insn);
3286 PUT_CODE (XEXP (tmp, 1),
3287 (reverse_condition_maybe_unordered
3288 (GET_CODE (XEXP (tmp, 1)))));
3294 pass = !pass;
3298 /* You may have trouble believing this, but this is the 32 bit HP-PA
3299 stack layout. Wow.
3301 Offset Contents
3303 Variable arguments (optional; any number may be allocated)
3305 SP-(4*(N+9)) arg word N
3307 SP-56 arg word 5
3308 SP-52 arg word 4
3310 Fixed arguments (must be allocated; may remain unused)
3312 SP-48 arg word 3
3313 SP-44 arg word 2
3314 SP-40 arg word 1
3315 SP-36 arg word 0
3317 Frame Marker
3319 SP-32 External Data Pointer (DP)
3320 SP-28 External sr4
3321 SP-24 External/stub RP (RP')
3322 SP-20 Current RP
3323 SP-16 Static Link
3324 SP-12 Clean up
3325 SP-8 Calling Stub RP (RP'')
3326 SP-4 Previous SP
3328 Top of Frame
3330 SP-0 Stack Pointer (points to next available address)
3334 /* This function saves registers as follows. Registers marked with ' are
3335 this function's registers (as opposed to the previous function's).
3336 If a frame_pointer isn't needed, r4 is saved as a general register;
3337 the space for the frame pointer is still allocated, though, to keep
3338 things simple.
3341 Top of Frame
3343 SP (FP') Previous FP
3344 SP + 4 Alignment filler (sigh)
3345 SP + 8 Space for locals reserved here.
3349 SP + n All call saved register used.
3353 SP + o All call saved fp registers used.
3357 SP + p (SP') points to next available address.
3361 /* Global variables set by output_function_prologue(). */
3362 /* Size of frame. Need to know this to emit return insns from
3363 leaf procedures. */
3364 static HOST_WIDE_INT actual_fsize, local_fsize;
3365 static int save_fregs;
3367 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3368 Handle case where DISP > 8k by using the add_high_const patterns.
3370 Note in DISP > 8k case, we will leave the high part of the address
3371 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3373 static void
3374 store_reg (int reg, HOST_WIDE_INT disp, int base)
3376 rtx insn, dest, src, basereg;
3378 src = gen_rtx_REG (word_mode, reg);
3379 basereg = gen_rtx_REG (Pmode, base);
3380 if (VAL_14_BITS_P (disp))
3382 dest = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3383 insn = emit_move_insn (dest, src);
3385 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3387 rtx delta = GEN_INT (disp);
3388 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3390 emit_move_insn (tmpreg, delta);
3391 insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3392 if (DO_FRAME_NOTES)
3394 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3395 gen_rtx_SET (VOIDmode, tmpreg,
3396 gen_rtx_PLUS (Pmode, basereg, delta)));
3397 RTX_FRAME_RELATED_P (insn) = 1;
3399 dest = gen_rtx_MEM (word_mode, tmpreg);
3400 insn = emit_move_insn (dest, src);
3402 else
3404 rtx delta = GEN_INT (disp);
3405 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3406 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3408 emit_move_insn (tmpreg, high);
3409 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3410 insn = emit_move_insn (dest, src);
3411 if (DO_FRAME_NOTES)
3412 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3413 gen_rtx_SET (VOIDmode,
3414 gen_rtx_MEM (word_mode,
3415 gen_rtx_PLUS (word_mode,
3416 basereg,
3417 delta)),
3418 src));
3421 if (DO_FRAME_NOTES)
3422 RTX_FRAME_RELATED_P (insn) = 1;
3425 /* Emit RTL to store REG at the memory location specified by BASE and then
3426 add MOD to BASE. MOD must be <= 8k. */
3428 static void
3429 store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3431 rtx insn, basereg, srcreg, delta;
3433 gcc_assert (VAL_14_BITS_P (mod));
3435 basereg = gen_rtx_REG (Pmode, base);
3436 srcreg = gen_rtx_REG (word_mode, reg);
3437 delta = GEN_INT (mod);
3439 insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3440 if (DO_FRAME_NOTES)
3442 RTX_FRAME_RELATED_P (insn) = 1;
3444 /* RTX_FRAME_RELATED_P must be set on each frame related set
3445 in a parallel with more than one element. */
3446 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3447 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3451 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3452 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3453 whether to add a frame note or not.
3455 In the DISP > 8k case, we leave the high part of the address in %r1.
3456 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3458 static void
3459 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3461 rtx insn;
3463 if (VAL_14_BITS_P (disp))
3465 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3466 plus_constant (gen_rtx_REG (Pmode, base), disp));
3468 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3470 rtx basereg = gen_rtx_REG (Pmode, base);
3471 rtx delta = GEN_INT (disp);
3472 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3474 emit_move_insn (tmpreg, delta);
3475 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3476 gen_rtx_PLUS (Pmode, tmpreg, basereg));
3477 if (DO_FRAME_NOTES)
3478 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3479 gen_rtx_SET (VOIDmode, tmpreg,
3480 gen_rtx_PLUS (Pmode, basereg, delta)));
3482 else
3484 rtx basereg = gen_rtx_REG (Pmode, base);
3485 rtx delta = GEN_INT (disp);
3486 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3488 emit_move_insn (tmpreg,
3489 gen_rtx_PLUS (Pmode, basereg,
3490 gen_rtx_HIGH (Pmode, delta)));
3491 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3492 gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3495 if (DO_FRAME_NOTES && note)
3496 RTX_FRAME_RELATED_P (insn) = 1;
3499 HOST_WIDE_INT
3500 compute_frame_size (HOST_WIDE_INT size, int *fregs_live)
3502 int freg_saved = 0;
3503 int i, j;
3505 /* The code in hppa_expand_prologue and hppa_expand_epilogue must
3506 be consistent with the rounding and size calculation done here.
3507 Change them at the same time. */
3509 /* We do our own stack alignment. First, round the size of the
3510 stack locals up to a word boundary. */
3511 size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3513 /* Space for previous frame pointer + filler. If any frame is
3514 allocated, we need to add in the STARTING_FRAME_OFFSET. We
3515 waste some space here for the sake of HP compatibility. The
3516 first slot is only used when the frame pointer is needed. */
3517 if (size || frame_pointer_needed)
3518 size += STARTING_FRAME_OFFSET;
3520 /* If the current function calls __builtin_eh_return, then we need
3521 to allocate stack space for registers that will hold data for
3522 the exception handler. */
3523 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3525 unsigned int i;
3527 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3528 continue;
3529 size += i * UNITS_PER_WORD;
3532 /* Account for space used by the callee general register saves. */
3533 for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3534 if (df_regs_ever_live_p (i))
3535 size += UNITS_PER_WORD;
3537 /* Account for space used by the callee floating point register saves. */
3538 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3539 if (df_regs_ever_live_p (i)
3540 || (!TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3542 freg_saved = 1;
3544 /* We always save both halves of the FP register, so always
3545 increment the frame size by 8 bytes. */
3546 size += 8;
3549 /* If any of the floating registers are saved, account for the
3550 alignment needed for the floating point register save block. */
3551 if (freg_saved)
3553 size = (size + 7) & ~7;
3554 if (fregs_live)
3555 *fregs_live = 1;
3558 /* The various ABIs include space for the outgoing parameters in the
3559 size of the current function's stack frame. We don't need to align
3560 for the outgoing arguments as their alignment is set by the final
3561 rounding for the frame as a whole. */
3562 size += crtl->outgoing_args_size;
3564 /* Allocate space for the fixed frame marker. This space must be
3565 allocated for any function that makes calls or allocates
3566 stack space. */
3567 if (!current_function_is_leaf || size)
3568 size += TARGET_64BIT ? 48 : 32;
3570 /* Finally, round to the preferred stack boundary. */
3571 return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3572 & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3575 /* Generate the assembly code for function entry. FILE is a stdio
3576 stream to output the code to. SIZE is an int: how many units of
3577 temporary storage to allocate.
3579 Refer to the array `regs_ever_live' to determine which registers to
3580 save; `regs_ever_live[I]' is nonzero if register number I is ever
3581 used in the function. This function is responsible for knowing
3582 which registers should not be saved even if used. */
3584 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3585 of memory. If any fpu reg is used in the function, we allocate
3586 such a block here, at the bottom of the frame, just in case it's needed.
3588 If this function is a leaf procedure, then we may choose not
3589 to do a "save" insn. The decision about whether or not
3590 to do this is made in regclass.c. */
3592 static void
3593 pa_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3595 /* The function's label and associated .PROC must never be
3596 separated and must be output *after* any profiling declarations
3597 to avoid changing spaces/subspaces within a procedure. */
3598 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3599 fputs ("\t.PROC\n", file);
3601 /* hppa_expand_prologue does the dirty work now. We just need
3602 to output the assembler directives which denote the start
3603 of a function. */
3604 fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
3605 if (current_function_is_leaf)
3606 fputs (",NO_CALLS", file);
3607 else
3608 fputs (",CALLS", file);
3609 if (rp_saved)
3610 fputs (",SAVE_RP", file);
3612 /* The SAVE_SP flag is used to indicate that register %r3 is stored
3613 at the beginning of the frame and that it is used as the frame
3614 pointer for the frame. We do this because our current frame
3615 layout doesn't conform to that specified in the HP runtime
3616 documentation and we need a way to indicate to programs such as
3617 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
3618 isn't used by HP compilers but is supported by the assembler.
3619 However, SAVE_SP is supposed to indicate that the previous stack
3620 pointer has been saved in the frame marker. */
3621 if (frame_pointer_needed)
3622 fputs (",SAVE_SP", file);
3624 /* Pass on information about the number of callee register saves
3625 performed in the prologue.
3627 The compiler is supposed to pass the highest register number
3628 saved, the assembler then has to adjust that number before
3629 entering it into the unwind descriptor (to account for any
3630 caller saved registers with lower register numbers than the
3631 first callee saved register). */
3632 if (gr_saved)
3633 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3635 if (fr_saved)
3636 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3638 fputs ("\n\t.ENTRY\n", file);
3640 remove_useless_addtr_insns (0);
3643 void
3644 hppa_expand_prologue (void)
3646 int merge_sp_adjust_with_store = 0;
3647 HOST_WIDE_INT size = get_frame_size ();
3648 HOST_WIDE_INT offset;
3649 int i;
3650 rtx insn, tmpreg;
3652 gr_saved = 0;
3653 fr_saved = 0;
3654 save_fregs = 0;
3656 /* Compute total size for frame pointer, filler, locals and rounding to
3657 the next word boundary. Similar code appears in compute_frame_size
3658 and must be changed in tandem with this code. */
3659 local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3660 if (local_fsize || frame_pointer_needed)
3661 local_fsize += STARTING_FRAME_OFFSET;
3663 actual_fsize = compute_frame_size (size, &save_fregs);
3665 /* Compute a few things we will use often. */
3666 tmpreg = gen_rtx_REG (word_mode, 1);
3668 /* Save RP first. The calling conventions manual states RP will
3669 always be stored into the caller's frame at sp - 20 or sp - 16
3670 depending on which ABI is in use. */
3671 if (df_regs_ever_live_p (2) || crtl->calls_eh_return)
3673 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3674 rp_saved = true;
3676 else
3677 rp_saved = false;
3679 /* Allocate the local frame and set up the frame pointer if needed. */
3680 if (actual_fsize != 0)
3682 if (frame_pointer_needed)
3684 /* Copy the old frame pointer temporarily into %r1. Set up the
3685 new stack pointer, then store away the saved old frame pointer
3686 into the stack at sp and at the same time update the stack
3687 pointer by actual_fsize bytes. Two versions, first
3688 handles small (<8k) frames. The second handles large (>=8k)
3689 frames. */
3690 insn = emit_move_insn (tmpreg, frame_pointer_rtx);
3691 if (DO_FRAME_NOTES)
3692 RTX_FRAME_RELATED_P (insn) = 1;
3694 insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
3695 if (DO_FRAME_NOTES)
3696 RTX_FRAME_RELATED_P (insn) = 1;
3698 if (VAL_14_BITS_P (actual_fsize))
3699 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
3700 else
3702 /* It is incorrect to store the saved frame pointer at *sp,
3703 then increment sp (writes beyond the current stack boundary).
3705 So instead use stwm to store at *sp and post-increment the
3706 stack pointer as an atomic operation. Then increment sp to
3707 finish allocating the new frame. */
3708 HOST_WIDE_INT adjust1 = 8192 - 64;
3709 HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
3711 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
3712 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3713 adjust2, 1);
3716 /* We set SAVE_SP in frames that need a frame pointer. Thus,
3717 we need to store the previous stack pointer (frame pointer)
3718 into the frame marker on targets that use the HP unwind
3719 library. This allows the HP unwind library to be used to
3720 unwind GCC frames. However, we are not fully compatible
3721 with the HP library because our frame layout differs from
3722 that specified in the HP runtime specification.
3724 We don't want a frame note on this instruction as the frame
3725 marker moves during dynamic stack allocation.
3727 This instruction also serves as a blockage to prevent
3728 register spills from being scheduled before the stack
3729 pointer is raised. This is necessary as we store
3730 registers using the frame pointer as a base register,
3731 and the frame pointer is set before sp is raised. */
3732 if (TARGET_HPUX_UNWIND_LIBRARY)
3734 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
3735 GEN_INT (TARGET_64BIT ? -8 : -4));
3737 emit_move_insn (gen_rtx_MEM (word_mode, addr),
3738 frame_pointer_rtx);
3740 else
3741 emit_insn (gen_blockage ());
3743 /* no frame pointer needed. */
3744 else
3746 /* In some cases we can perform the first callee register save
3747 and allocating the stack frame at the same time. If so, just
3748 make a note of it and defer allocating the frame until saving
3749 the callee registers. */
3750 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
3751 merge_sp_adjust_with_store = 1;
3752 /* Can not optimize. Adjust the stack frame by actual_fsize
3753 bytes. */
3754 else
3755 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3756 actual_fsize, 1);
3760 /* Normal register save.
3762 Do not save the frame pointer in the frame_pointer_needed case. It
3763 was done earlier. */
3764 if (frame_pointer_needed)
3766 offset = local_fsize;
3768 /* Saving the EH return data registers in the frame is the simplest
3769 way to get the frame unwind information emitted. We put them
3770 just before the general registers. */
3771 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3773 unsigned int i, regno;
3775 for (i = 0; ; ++i)
3777 regno = EH_RETURN_DATA_REGNO (i);
3778 if (regno == INVALID_REGNUM)
3779 break;
3781 store_reg (regno, offset, FRAME_POINTER_REGNUM);
3782 offset += UNITS_PER_WORD;
3786 for (i = 18; i >= 4; i--)
3787 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
3789 store_reg (i, offset, FRAME_POINTER_REGNUM);
3790 offset += UNITS_PER_WORD;
3791 gr_saved++;
3793 /* Account for %r3 which is saved in a special place. */
3794 gr_saved++;
3796 /* No frame pointer needed. */
3797 else
3799 offset = local_fsize - actual_fsize;
3801 /* Saving the EH return data registers in the frame is the simplest
3802 way to get the frame unwind information emitted. */
3803 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3805 unsigned int i, regno;
3807 for (i = 0; ; ++i)
3809 regno = EH_RETURN_DATA_REGNO (i);
3810 if (regno == INVALID_REGNUM)
3811 break;
3813 /* If merge_sp_adjust_with_store is nonzero, then we can
3814 optimize the first save. */
3815 if (merge_sp_adjust_with_store)
3817 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
3818 merge_sp_adjust_with_store = 0;
3820 else
3821 store_reg (regno, offset, STACK_POINTER_REGNUM);
3822 offset += UNITS_PER_WORD;
3826 for (i = 18; i >= 3; i--)
3827 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
3829 /* If merge_sp_adjust_with_store is nonzero, then we can
3830 optimize the first GR save. */
3831 if (merge_sp_adjust_with_store)
3833 store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
3834 merge_sp_adjust_with_store = 0;
3836 else
3837 store_reg (i, offset, STACK_POINTER_REGNUM);
3838 offset += UNITS_PER_WORD;
3839 gr_saved++;
3842 /* If we wanted to merge the SP adjustment with a GR save, but we never
3843 did any GR saves, then just emit the adjustment here. */
3844 if (merge_sp_adjust_with_store)
3845 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3846 actual_fsize, 1);
3849 /* The hppa calling conventions say that %r19, the pic offset
3850 register, is saved at sp - 32 (in this function's frame)
3851 when generating PIC code. FIXME: What is the correct thing
3852 to do for functions which make no calls and allocate no
3853 frame? Do we need to allocate a frame, or can we just omit
3854 the save? For now we'll just omit the save.
3856 We don't want a note on this insn as the frame marker can
3857 move if there is a dynamic stack allocation. */
3858 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
3860 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
3862 emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
3866 /* Align pointer properly (doubleword boundary). */
3867 offset = (offset + 7) & ~7;
3869 /* Floating point register store. */
3870 if (save_fregs)
3872 rtx base;
3874 /* First get the frame or stack pointer to the start of the FP register
3875 save area. */
3876 if (frame_pointer_needed)
3878 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
3879 base = frame_pointer_rtx;
3881 else
3883 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
3884 base = stack_pointer_rtx;
3887 /* Now actually save the FP registers. */
3888 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3890 if (df_regs_ever_live_p (i)
3891 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3893 rtx addr, insn, reg;
3894 addr = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
3895 reg = gen_rtx_REG (DFmode, i);
3896 insn = emit_move_insn (addr, reg);
3897 if (DO_FRAME_NOTES)
3899 RTX_FRAME_RELATED_P (insn) = 1;
3900 if (TARGET_64BIT)
3902 rtx mem = gen_rtx_MEM (DFmode,
3903 plus_constant (base, offset));
3904 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3905 gen_rtx_SET (VOIDmode, mem, reg));
3907 else
3909 rtx meml = gen_rtx_MEM (SFmode,
3910 plus_constant (base, offset));
3911 rtx memr = gen_rtx_MEM (SFmode,
3912 plus_constant (base, offset + 4));
3913 rtx regl = gen_rtx_REG (SFmode, i);
3914 rtx regr = gen_rtx_REG (SFmode, i + 1);
3915 rtx setl = gen_rtx_SET (VOIDmode, meml, regl);
3916 rtx setr = gen_rtx_SET (VOIDmode, memr, regr);
3917 rtvec vec;
3919 RTX_FRAME_RELATED_P (setl) = 1;
3920 RTX_FRAME_RELATED_P (setr) = 1;
3921 vec = gen_rtvec (2, setl, setr);
3922 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3923 gen_rtx_SEQUENCE (VOIDmode, vec));
3926 offset += GET_MODE_SIZE (DFmode);
3927 fr_saved++;
3933 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
3934 Handle case where DISP > 8k by using the add_high_const patterns. */
3936 static void
3937 load_reg (int reg, HOST_WIDE_INT disp, int base)
3939 rtx dest = gen_rtx_REG (word_mode, reg);
3940 rtx basereg = gen_rtx_REG (Pmode, base);
3941 rtx src;
3943 if (VAL_14_BITS_P (disp))
3944 src = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3945 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3947 rtx delta = GEN_INT (disp);
3948 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3950 emit_move_insn (tmpreg, delta);
3951 if (TARGET_DISABLE_INDEXING)
3953 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3954 src = gen_rtx_MEM (word_mode, tmpreg);
3956 else
3957 src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3959 else
3961 rtx delta = GEN_INT (disp);
3962 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3963 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3965 emit_move_insn (tmpreg, high);
3966 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3969 emit_move_insn (dest, src);
3972 /* Update the total code bytes output to the text section. */
3974 static void
3975 update_total_code_bytes (unsigned int nbytes)
3977 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
3978 && !IN_NAMED_SECTION_P (cfun->decl))
3980 unsigned int old_total = total_code_bytes;
3982 total_code_bytes += nbytes;
3984 /* Be prepared to handle overflows. */
3985 if (old_total > total_code_bytes)
3986 total_code_bytes = UINT_MAX;
3990 /* This function generates the assembly code for function exit.
3991 Args are as for output_function_prologue ().
3993 The function epilogue should not depend on the current stack
3994 pointer! It should use the frame pointer only. This is mandatory
3995 because of alloca; we also take advantage of it to omit stack
3996 adjustments before returning. */
3998 static void
3999 pa_output_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4001 rtx insn = get_last_insn ();
4003 last_address = 0;
4005 /* hppa_expand_epilogue does the dirty work now. We just need
4006 to output the assembler directives which denote the end
4007 of a function.
4009 To make debuggers happy, emit a nop if the epilogue was completely
4010 eliminated due to a volatile call as the last insn in the
4011 current function. That way the return address (in %r2) will
4012 always point to a valid instruction in the current function. */
4014 /* Get the last real insn. */
4015 if (GET_CODE (insn) == NOTE)
4016 insn = prev_real_insn (insn);
4018 /* If it is a sequence, then look inside. */
4019 if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
4020 insn = XVECEXP (PATTERN (insn), 0, 0);
4022 /* If insn is a CALL_INSN, then it must be a call to a volatile
4023 function (otherwise there would be epilogue insns). */
4024 if (insn && GET_CODE (insn) == CALL_INSN)
4026 fputs ("\tnop\n", file);
4027 last_address += 4;
4030 fputs ("\t.EXIT\n\t.PROCEND\n", file);
4032 if (TARGET_SOM && TARGET_GAS)
4034 /* We done with this subspace except possibly for some additional
4035 debug information. Forget that we are in this subspace to ensure
4036 that the next function is output in its own subspace. */
4037 in_section = NULL;
4038 cfun->machine->in_nsubspa = 2;
4041 if (INSN_ADDRESSES_SET_P ())
4043 insn = get_last_nonnote_insn ();
4044 last_address += INSN_ADDRESSES (INSN_UID (insn));
4045 if (INSN_P (insn))
4046 last_address += insn_default_length (insn);
4047 last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4048 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
4050 else
4051 last_address = UINT_MAX;
4053 /* Finally, update the total number of code bytes output so far. */
4054 update_total_code_bytes (last_address);
4057 void
4058 hppa_expand_epilogue (void)
4060 rtx tmpreg;
4061 HOST_WIDE_INT offset;
4062 HOST_WIDE_INT ret_off = 0;
4063 int i;
4064 int merge_sp_adjust_with_load = 0;
4066 /* We will use this often. */
4067 tmpreg = gen_rtx_REG (word_mode, 1);
4069 /* Try to restore RP early to avoid load/use interlocks when
4070 RP gets used in the return (bv) instruction. This appears to still
4071 be necessary even when we schedule the prologue and epilogue. */
4072 if (rp_saved)
4074 ret_off = TARGET_64BIT ? -16 : -20;
4075 if (frame_pointer_needed)
4077 load_reg (2, ret_off, FRAME_POINTER_REGNUM);
4078 ret_off = 0;
4080 else
4082 /* No frame pointer, and stack is smaller than 8k. */
4083 if (VAL_14_BITS_P (ret_off - actual_fsize))
4085 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4086 ret_off = 0;
4091 /* General register restores. */
4092 if (frame_pointer_needed)
4094 offset = local_fsize;
4096 /* If the current function calls __builtin_eh_return, then we need
4097 to restore the saved EH data registers. */
4098 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4100 unsigned int i, regno;
4102 for (i = 0; ; ++i)
4104 regno = EH_RETURN_DATA_REGNO (i);
4105 if (regno == INVALID_REGNUM)
4106 break;
4108 load_reg (regno, offset, FRAME_POINTER_REGNUM);
4109 offset += UNITS_PER_WORD;
4113 for (i = 18; i >= 4; i--)
4114 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4116 load_reg (i, offset, FRAME_POINTER_REGNUM);
4117 offset += UNITS_PER_WORD;
4120 else
4122 offset = local_fsize - actual_fsize;
4124 /* If the current function calls __builtin_eh_return, then we need
4125 to restore the saved EH data registers. */
4126 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4128 unsigned int i, regno;
4130 for (i = 0; ; ++i)
4132 regno = EH_RETURN_DATA_REGNO (i);
4133 if (regno == INVALID_REGNUM)
4134 break;
4136 /* Only for the first load.
4137 merge_sp_adjust_with_load holds the register load
4138 with which we will merge the sp adjustment. */
4139 if (merge_sp_adjust_with_load == 0
4140 && local_fsize == 0
4141 && VAL_14_BITS_P (-actual_fsize))
4142 merge_sp_adjust_with_load = regno;
4143 else
4144 load_reg (regno, offset, STACK_POINTER_REGNUM);
4145 offset += UNITS_PER_WORD;
4149 for (i = 18; i >= 3; i--)
4151 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4153 /* Only for the first load.
4154 merge_sp_adjust_with_load holds the register load
4155 with which we will merge the sp adjustment. */
4156 if (merge_sp_adjust_with_load == 0
4157 && local_fsize == 0
4158 && VAL_14_BITS_P (-actual_fsize))
4159 merge_sp_adjust_with_load = i;
4160 else
4161 load_reg (i, offset, STACK_POINTER_REGNUM);
4162 offset += UNITS_PER_WORD;
4167 /* Align pointer properly (doubleword boundary). */
4168 offset = (offset + 7) & ~7;
4170 /* FP register restores. */
4171 if (save_fregs)
4173 /* Adjust the register to index off of. */
4174 if (frame_pointer_needed)
4175 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
4176 else
4177 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4179 /* Actually do the restores now. */
4180 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4181 if (df_regs_ever_live_p (i)
4182 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4184 rtx src = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
4185 rtx dest = gen_rtx_REG (DFmode, i);
4186 emit_move_insn (dest, src);
4190 /* Emit a blockage insn here to keep these insns from being moved to
4191 an earlier spot in the epilogue, or into the main instruction stream.
4193 This is necessary as we must not cut the stack back before all the
4194 restores are finished. */
4195 emit_insn (gen_blockage ());
4197 /* Reset stack pointer (and possibly frame pointer). The stack
4198 pointer is initially set to fp + 64 to avoid a race condition. */
4199 if (frame_pointer_needed)
4201 rtx delta = GEN_INT (-64);
4203 set_reg_plus_d (STACK_POINTER_REGNUM, FRAME_POINTER_REGNUM, 64, 0);
4204 emit_insn (gen_pre_load (frame_pointer_rtx, stack_pointer_rtx, delta));
4206 /* If we were deferring a callee register restore, do it now. */
4207 else if (merge_sp_adjust_with_load)
4209 rtx delta = GEN_INT (-actual_fsize);
4210 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4212 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4214 else if (actual_fsize != 0)
4215 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4216 - actual_fsize, 0);
4218 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4219 frame greater than 8k), do so now. */
4220 if (ret_off != 0)
4221 load_reg (2, ret_off, STACK_POINTER_REGNUM);
4223 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4225 rtx sa = EH_RETURN_STACKADJ_RTX;
4227 emit_insn (gen_blockage ());
4228 emit_insn (TARGET_64BIT
4229 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4230 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4235 hppa_pic_save_rtx (void)
4237 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4240 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4241 #define NO_DEFERRED_PROFILE_COUNTERS 0
4242 #endif
4245 /* Vector of funcdef numbers. */
4246 static VEC(int,heap) *funcdef_nos;
4248 /* Output deferred profile counters. */
4249 static void
4250 output_deferred_profile_counters (void)
4252 unsigned int i;
4253 int align, n;
4255 if (VEC_empty (int, funcdef_nos))
4256 return;
4258 switch_to_section (data_section);
4259 align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4260 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4262 for (i = 0; VEC_iterate (int, funcdef_nos, i, n); i++)
4264 targetm.asm_out.internal_label (asm_out_file, "LP", n);
4265 assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4268 VEC_free (int, heap, funcdef_nos);
4271 void
4272 hppa_profile_hook (int label_no)
4274 /* We use SImode for the address of the function in both 32 and
4275 64-bit code to avoid having to provide DImode versions of the
4276 lcla2 and load_offset_label_address insn patterns. */
4277 rtx reg = gen_reg_rtx (SImode);
4278 rtx label_rtx = gen_label_rtx ();
4279 rtx begin_label_rtx, call_insn;
4280 char begin_label_name[16];
4282 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4283 label_no);
4284 begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4286 if (TARGET_64BIT)
4287 emit_move_insn (arg_pointer_rtx,
4288 gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
4289 GEN_INT (64)));
4291 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4293 /* The address of the function is loaded into %r25 with an instruction-
4294 relative sequence that avoids the use of relocations. The sequence
4295 is split so that the load_offset_label_address instruction can
4296 occupy the delay slot of the call to _mcount. */
4297 if (TARGET_PA_20)
4298 emit_insn (gen_lcla2 (reg, label_rtx));
4299 else
4300 emit_insn (gen_lcla1 (reg, label_rtx));
4302 emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25),
4303 reg, begin_label_rtx, label_rtx));
4305 #if !NO_DEFERRED_PROFILE_COUNTERS
4307 rtx count_label_rtx, addr, r24;
4308 char count_label_name[16];
4310 VEC_safe_push (int, heap, funcdef_nos, label_no);
4311 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4312 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name));
4314 addr = force_reg (Pmode, count_label_rtx);
4315 r24 = gen_rtx_REG (Pmode, 24);
4316 emit_move_insn (r24, addr);
4318 call_insn =
4319 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4320 gen_rtx_SYMBOL_REF (Pmode,
4321 "_mcount")),
4322 GEN_INT (TARGET_64BIT ? 24 : 12)));
4324 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4326 #else
4328 call_insn =
4329 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4330 gen_rtx_SYMBOL_REF (Pmode,
4331 "_mcount")),
4332 GEN_INT (TARGET_64BIT ? 16 : 8)));
4334 #endif
4336 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4337 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4339 /* Indicate the _mcount call cannot throw, nor will it execute a
4340 non-local goto. */
4341 add_reg_note (call_insn, REG_EH_REGION, constm1_rtx);
4344 /* Fetch the return address for the frame COUNT steps up from
4345 the current frame, after the prologue. FRAMEADDR is the
4346 frame pointer of the COUNT frame.
4348 We want to ignore any export stub remnants here. To handle this,
4349 we examine the code at the return address, and if it is an export
4350 stub, we return a memory rtx for the stub return address stored
4351 at frame-24.
4353 The value returned is used in two different ways:
4355 1. To find a function's caller.
4357 2. To change the return address for a function.
4359 This function handles most instances of case 1; however, it will
4360 fail if there are two levels of stubs to execute on the return
4361 path. The only way I believe that can happen is if the return value
4362 needs a parameter relocation, which never happens for C code.
4364 This function handles most instances of case 2; however, it will
4365 fail if we did not originally have stub code on the return path
4366 but will need stub code on the new return path. This can happen if
4367 the caller & callee are both in the main program, but the new
4368 return location is in a shared library. */
4371 return_addr_rtx (int count, rtx frameaddr)
4373 rtx label;
4374 rtx rp;
4375 rtx saved_rp;
4376 rtx ins;
4378 /* Instruction stream at the normal return address for the export stub:
4380 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4381 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4382 0x00011820 | stub+16: mtsp r1,sr0
4383 0xe0400002 | stub+20: be,n 0(sr0,rp)
4385 0xe0400002 must be specified as -532676606 so that it won't be
4386 rejected as an invalid immediate operand on 64-bit hosts. */
4388 HOST_WIDE_INT insns[4] = {0x4bc23fd1, 0x004010a1, 0x00011820, -532676606};
4389 int i;
4391 if (count != 0)
4392 return NULL_RTX;
4394 rp = get_hard_reg_initial_val (Pmode, 2);
4396 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4397 return rp;
4399 /* If there is no export stub then just use the value saved from
4400 the return pointer register. */
4402 saved_rp = gen_reg_rtx (Pmode);
4403 emit_move_insn (saved_rp, rp);
4405 /* Get pointer to the instruction stream. We have to mask out the
4406 privilege level from the two low order bits of the return address
4407 pointer here so that ins will point to the start of the first
4408 instruction that would have been executed if we returned. */
4409 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4410 label = gen_label_rtx ();
4412 /* Check the instruction stream at the normal return address for the
4413 export stub. If it is an export stub, than our return address is
4414 really in -24[frameaddr]. */
4416 for (i = 0; i < 3; i++)
4418 rtx op0 = gen_rtx_MEM (SImode, plus_constant (ins, i * 4));
4419 rtx op1 = GEN_INT (insns[i]);
4420 emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label);
4423 /* Here we know that our return address points to an export
4424 stub. We don't want to return the address of the export stub,
4425 but rather the return address of the export stub. That return
4426 address is stored at -24[frameaddr]. */
4428 emit_move_insn (saved_rp,
4429 gen_rtx_MEM (Pmode,
4430 memory_address (Pmode,
4431 plus_constant (frameaddr,
4432 -24))));
4434 emit_label (label);
4436 return saved_rp;
4439 void
4440 emit_bcond_fp (rtx operands[])
4442 enum rtx_code code = GET_CODE (operands[0]);
4443 rtx operand0 = operands[1];
4444 rtx operand1 = operands[2];
4445 rtx label = operands[3];
4447 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0),
4448 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1)));
4450 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4451 gen_rtx_IF_THEN_ELSE (VOIDmode,
4452 gen_rtx_fmt_ee (NE,
4453 VOIDmode,
4454 gen_rtx_REG (CCFPmode, 0),
4455 const0_rtx),
4456 gen_rtx_LABEL_REF (VOIDmode, label),
4457 pc_rtx)));
4461 /* Adjust the cost of a scheduling dependency. Return the new cost of
4462 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4464 static int
4465 pa_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
4467 enum attr_type attr_type;
4469 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4470 true dependencies as they are described with bypasses now. */
4471 if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0)
4472 return cost;
4474 if (! recog_memoized (insn))
4475 return 0;
4477 attr_type = get_attr_type (insn);
4479 switch (REG_NOTE_KIND (link))
4481 case REG_DEP_ANTI:
4482 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4483 cycles later. */
4485 if (attr_type == TYPE_FPLOAD)
4487 rtx pat = PATTERN (insn);
4488 rtx dep_pat = PATTERN (dep_insn);
4489 if (GET_CODE (pat) == PARALLEL)
4491 /* This happens for the fldXs,mb patterns. */
4492 pat = XVECEXP (pat, 0, 0);
4494 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4495 /* If this happens, we have to extend this to schedule
4496 optimally. Return 0 for now. */
4497 return 0;
4499 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4501 if (! recog_memoized (dep_insn))
4502 return 0;
4503 switch (get_attr_type (dep_insn))
4505 case TYPE_FPALU:
4506 case TYPE_FPMULSGL:
4507 case TYPE_FPMULDBL:
4508 case TYPE_FPDIVSGL:
4509 case TYPE_FPDIVDBL:
4510 case TYPE_FPSQRTSGL:
4511 case TYPE_FPSQRTDBL:
4512 /* A fpload can't be issued until one cycle before a
4513 preceding arithmetic operation has finished if
4514 the target of the fpload is any of the sources
4515 (or destination) of the arithmetic operation. */
4516 return insn_default_latency (dep_insn) - 1;
4518 default:
4519 return 0;
4523 else if (attr_type == TYPE_FPALU)
4525 rtx pat = PATTERN (insn);
4526 rtx dep_pat = PATTERN (dep_insn);
4527 if (GET_CODE (pat) == PARALLEL)
4529 /* This happens for the fldXs,mb patterns. */
4530 pat = XVECEXP (pat, 0, 0);
4532 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4533 /* If this happens, we have to extend this to schedule
4534 optimally. Return 0 for now. */
4535 return 0;
4537 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4539 if (! recog_memoized (dep_insn))
4540 return 0;
4541 switch (get_attr_type (dep_insn))
4543 case TYPE_FPDIVSGL:
4544 case TYPE_FPDIVDBL:
4545 case TYPE_FPSQRTSGL:
4546 case TYPE_FPSQRTDBL:
4547 /* An ALU flop can't be issued until two cycles before a
4548 preceding divide or sqrt operation has finished if
4549 the target of the ALU flop is any of the sources
4550 (or destination) of the divide or sqrt operation. */
4551 return insn_default_latency (dep_insn) - 2;
4553 default:
4554 return 0;
4559 /* For other anti dependencies, the cost is 0. */
4560 return 0;
4562 case REG_DEP_OUTPUT:
4563 /* Output dependency; DEP_INSN writes a register that INSN writes some
4564 cycles later. */
4565 if (attr_type == TYPE_FPLOAD)
4567 rtx pat = PATTERN (insn);
4568 rtx dep_pat = PATTERN (dep_insn);
4569 if (GET_CODE (pat) == PARALLEL)
4571 /* This happens for the fldXs,mb patterns. */
4572 pat = XVECEXP (pat, 0, 0);
4574 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4575 /* If this happens, we have to extend this to schedule
4576 optimally. Return 0 for now. */
4577 return 0;
4579 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4581 if (! recog_memoized (dep_insn))
4582 return 0;
4583 switch (get_attr_type (dep_insn))
4585 case TYPE_FPALU:
4586 case TYPE_FPMULSGL:
4587 case TYPE_FPMULDBL:
4588 case TYPE_FPDIVSGL:
4589 case TYPE_FPDIVDBL:
4590 case TYPE_FPSQRTSGL:
4591 case TYPE_FPSQRTDBL:
4592 /* A fpload can't be issued until one cycle before a
4593 preceding arithmetic operation has finished if
4594 the target of the fpload is the destination of the
4595 arithmetic operation.
4597 Exception: For PA7100LC, PA7200 and PA7300, the cost
4598 is 3 cycles, unless they bundle together. We also
4599 pay the penalty if the second insn is a fpload. */
4600 return insn_default_latency (dep_insn) - 1;
4602 default:
4603 return 0;
4607 else if (attr_type == TYPE_FPALU)
4609 rtx pat = PATTERN (insn);
4610 rtx dep_pat = PATTERN (dep_insn);
4611 if (GET_CODE (pat) == PARALLEL)
4613 /* This happens for the fldXs,mb patterns. */
4614 pat = XVECEXP (pat, 0, 0);
4616 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4617 /* If this happens, we have to extend this to schedule
4618 optimally. Return 0 for now. */
4619 return 0;
4621 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4623 if (! recog_memoized (dep_insn))
4624 return 0;
4625 switch (get_attr_type (dep_insn))
4627 case TYPE_FPDIVSGL:
4628 case TYPE_FPDIVDBL:
4629 case TYPE_FPSQRTSGL:
4630 case TYPE_FPSQRTDBL:
4631 /* An ALU flop can't be issued until two cycles before a
4632 preceding divide or sqrt operation has finished if
4633 the target of the ALU flop is also the target of
4634 the divide or sqrt operation. */
4635 return insn_default_latency (dep_insn) - 2;
4637 default:
4638 return 0;
4643 /* For other output dependencies, the cost is 0. */
4644 return 0;
4646 default:
4647 gcc_unreachable ();
4651 /* Adjust scheduling priorities. We use this to try and keep addil
4652 and the next use of %r1 close together. */
4653 static int
4654 pa_adjust_priority (rtx insn, int priority)
4656 rtx set = single_set (insn);
4657 rtx src, dest;
4658 if (set)
4660 src = SET_SRC (set);
4661 dest = SET_DEST (set);
4662 if (GET_CODE (src) == LO_SUM
4663 && symbolic_operand (XEXP (src, 1), VOIDmode)
4664 && ! read_only_operand (XEXP (src, 1), VOIDmode))
4665 priority >>= 3;
4667 else if (GET_CODE (src) == MEM
4668 && GET_CODE (XEXP (src, 0)) == LO_SUM
4669 && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
4670 && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
4671 priority >>= 1;
4673 else if (GET_CODE (dest) == MEM
4674 && GET_CODE (XEXP (dest, 0)) == LO_SUM
4675 && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
4676 && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
4677 priority >>= 3;
4679 return priority;
4682 /* The 700 can only issue a single insn at a time.
4683 The 7XXX processors can issue two insns at a time.
4684 The 8000 can issue 4 insns at a time. */
4685 static int
4686 pa_issue_rate (void)
4688 switch (pa_cpu)
4690 case PROCESSOR_700: return 1;
4691 case PROCESSOR_7100: return 2;
4692 case PROCESSOR_7100LC: return 2;
4693 case PROCESSOR_7200: return 2;
4694 case PROCESSOR_7300: return 2;
4695 case PROCESSOR_8000: return 4;
4697 default:
4698 gcc_unreachable ();
4704 /* Return any length adjustment needed by INSN which already has its length
4705 computed as LENGTH. Return zero if no adjustment is necessary.
4707 For the PA: function calls, millicode calls, and backwards short
4708 conditional branches with unfilled delay slots need an adjustment by +1
4709 (to account for the NOP which will be inserted into the instruction stream).
4711 Also compute the length of an inline block move here as it is too
4712 complicated to express as a length attribute in pa.md. */
4714 pa_adjust_insn_length (rtx insn, int length)
4716 rtx pat = PATTERN (insn);
4718 /* Jumps inside switch tables which have unfilled delay slots need
4719 adjustment. */
4720 if (GET_CODE (insn) == JUMP_INSN
4721 && GET_CODE (pat) == PARALLEL
4722 && get_attr_type (insn) == TYPE_BTABLE_BRANCH)
4723 return 4;
4724 /* Millicode insn with an unfilled delay slot. */
4725 else if (GET_CODE (insn) == INSN
4726 && GET_CODE (pat) != SEQUENCE
4727 && GET_CODE (pat) != USE
4728 && GET_CODE (pat) != CLOBBER
4729 && get_attr_type (insn) == TYPE_MILLI)
4730 return 4;
4731 /* Block move pattern. */
4732 else if (GET_CODE (insn) == INSN
4733 && GET_CODE (pat) == PARALLEL
4734 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4735 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4736 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
4737 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
4738 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
4739 return compute_movmem_length (insn) - 4;
4740 /* Block clear pattern. */
4741 else if (GET_CODE (insn) == INSN
4742 && GET_CODE (pat) == PARALLEL
4743 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4744 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4745 && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
4746 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
4747 return compute_clrmem_length (insn) - 4;
4748 /* Conditional branch with an unfilled delay slot. */
4749 else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
4751 /* Adjust a short backwards conditional with an unfilled delay slot. */
4752 if (GET_CODE (pat) == SET
4753 && length == 4
4754 && JUMP_LABEL (insn) != NULL_RTX
4755 && ! forward_branch_p (insn))
4756 return 4;
4757 else if (GET_CODE (pat) == PARALLEL
4758 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
4759 && length == 4)
4760 return 4;
4761 /* Adjust dbra insn with short backwards conditional branch with
4762 unfilled delay slot -- only for case where counter is in a
4763 general register register. */
4764 else if (GET_CODE (pat) == PARALLEL
4765 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
4766 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
4767 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
4768 && length == 4
4769 && ! forward_branch_p (insn))
4770 return 4;
4771 else
4772 return 0;
4774 return 0;
4777 /* Print operand X (an rtx) in assembler syntax to file FILE.
4778 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
4779 For `%' followed by punctuation, CODE is the punctuation and X is null. */
4781 void
4782 print_operand (FILE *file, rtx x, int code)
4784 switch (code)
4786 case '#':
4787 /* Output a 'nop' if there's nothing for the delay slot. */
4788 if (dbr_sequence_length () == 0)
4789 fputs ("\n\tnop", file);
4790 return;
4791 case '*':
4792 /* Output a nullification completer if there's nothing for the */
4793 /* delay slot or nullification is requested. */
4794 if (dbr_sequence_length () == 0 ||
4795 (final_sequence &&
4796 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
4797 fputs (",n", file);
4798 return;
4799 case 'R':
4800 /* Print out the second register name of a register pair.
4801 I.e., R (6) => 7. */
4802 fputs (reg_names[REGNO (x) + 1], file);
4803 return;
4804 case 'r':
4805 /* A register or zero. */
4806 if (x == const0_rtx
4807 || (x == CONST0_RTX (DFmode))
4808 || (x == CONST0_RTX (SFmode)))
4810 fputs ("%r0", file);
4811 return;
4813 else
4814 break;
4815 case 'f':
4816 /* A register or zero (floating point). */
4817 if (x == const0_rtx
4818 || (x == CONST0_RTX (DFmode))
4819 || (x == CONST0_RTX (SFmode)))
4821 fputs ("%fr0", file);
4822 return;
4824 else
4825 break;
4826 case 'A':
4828 rtx xoperands[2];
4830 xoperands[0] = XEXP (XEXP (x, 0), 0);
4831 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
4832 output_global_address (file, xoperands[1], 0);
4833 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
4834 return;
4837 case 'C': /* Plain (C)ondition */
4838 case 'X':
4839 switch (GET_CODE (x))
4841 case EQ:
4842 fputs ("=", file); break;
4843 case NE:
4844 fputs ("<>", file); break;
4845 case GT:
4846 fputs (">", file); break;
4847 case GE:
4848 fputs (">=", file); break;
4849 case GEU:
4850 fputs (">>=", file); break;
4851 case GTU:
4852 fputs (">>", file); break;
4853 case LT:
4854 fputs ("<", file); break;
4855 case LE:
4856 fputs ("<=", file); break;
4857 case LEU:
4858 fputs ("<<=", file); break;
4859 case LTU:
4860 fputs ("<<", file); break;
4861 default:
4862 gcc_unreachable ();
4864 return;
4865 case 'N': /* Condition, (N)egated */
4866 switch (GET_CODE (x))
4868 case EQ:
4869 fputs ("<>", file); break;
4870 case NE:
4871 fputs ("=", file); break;
4872 case GT:
4873 fputs ("<=", file); break;
4874 case GE:
4875 fputs ("<", file); break;
4876 case GEU:
4877 fputs ("<<", file); break;
4878 case GTU:
4879 fputs ("<<=", file); break;
4880 case LT:
4881 fputs (">=", file); break;
4882 case LE:
4883 fputs (">", file); break;
4884 case LEU:
4885 fputs (">>", file); break;
4886 case LTU:
4887 fputs (">>=", file); break;
4888 default:
4889 gcc_unreachable ();
4891 return;
4892 /* For floating point comparisons. Note that the output
4893 predicates are the complement of the desired mode. The
4894 conditions for GT, GE, LT, LE and LTGT cause an invalid
4895 operation exception if the result is unordered and this
4896 exception is enabled in the floating-point status register. */
4897 case 'Y':
4898 switch (GET_CODE (x))
4900 case EQ:
4901 fputs ("!=", file); break;
4902 case NE:
4903 fputs ("=", file); break;
4904 case GT:
4905 fputs ("!>", file); break;
4906 case GE:
4907 fputs ("!>=", file); break;
4908 case LT:
4909 fputs ("!<", file); break;
4910 case LE:
4911 fputs ("!<=", file); break;
4912 case LTGT:
4913 fputs ("!<>", file); break;
4914 case UNLE:
4915 fputs ("!?<=", file); break;
4916 case UNLT:
4917 fputs ("!?<", file); break;
4918 case UNGE:
4919 fputs ("!?>=", file); break;
4920 case UNGT:
4921 fputs ("!?>", file); break;
4922 case UNEQ:
4923 fputs ("!?=", file); break;
4924 case UNORDERED:
4925 fputs ("!?", file); break;
4926 case ORDERED:
4927 fputs ("?", file); break;
4928 default:
4929 gcc_unreachable ();
4931 return;
4932 case 'S': /* Condition, operands are (S)wapped. */
4933 switch (GET_CODE (x))
4935 case EQ:
4936 fputs ("=", file); break;
4937 case NE:
4938 fputs ("<>", file); break;
4939 case GT:
4940 fputs ("<", file); break;
4941 case GE:
4942 fputs ("<=", file); break;
4943 case GEU:
4944 fputs ("<<=", file); break;
4945 case GTU:
4946 fputs ("<<", file); break;
4947 case LT:
4948 fputs (">", file); break;
4949 case LE:
4950 fputs (">=", file); break;
4951 case LEU:
4952 fputs (">>=", file); break;
4953 case LTU:
4954 fputs (">>", file); break;
4955 default:
4956 gcc_unreachable ();
4958 return;
4959 case 'B': /* Condition, (B)oth swapped and negate. */
4960 switch (GET_CODE (x))
4962 case EQ:
4963 fputs ("<>", file); break;
4964 case NE:
4965 fputs ("=", file); break;
4966 case GT:
4967 fputs (">=", file); break;
4968 case GE:
4969 fputs (">", file); break;
4970 case GEU:
4971 fputs (">>", file); break;
4972 case GTU:
4973 fputs (">>=", file); break;
4974 case LT:
4975 fputs ("<=", file); break;
4976 case LE:
4977 fputs ("<", file); break;
4978 case LEU:
4979 fputs ("<<", file); break;
4980 case LTU:
4981 fputs ("<<=", file); break;
4982 default:
4983 gcc_unreachable ();
4985 return;
4986 case 'k':
4987 gcc_assert (GET_CODE (x) == CONST_INT);
4988 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
4989 return;
4990 case 'Q':
4991 gcc_assert (GET_CODE (x) == CONST_INT);
4992 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
4993 return;
4994 case 'L':
4995 gcc_assert (GET_CODE (x) == CONST_INT);
4996 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
4997 return;
4998 case 'O':
4999 gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
5000 fprintf (file, "%d", exact_log2 (INTVAL (x)));
5001 return;
5002 case 'p':
5003 gcc_assert (GET_CODE (x) == CONST_INT);
5004 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
5005 return;
5006 case 'P':
5007 gcc_assert (GET_CODE (x) == CONST_INT);
5008 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
5009 return;
5010 case 'I':
5011 if (GET_CODE (x) == CONST_INT)
5012 fputs ("i", file);
5013 return;
5014 case 'M':
5015 case 'F':
5016 switch (GET_CODE (XEXP (x, 0)))
5018 case PRE_DEC:
5019 case PRE_INC:
5020 if (ASSEMBLER_DIALECT == 0)
5021 fputs ("s,mb", file);
5022 else
5023 fputs (",mb", file);
5024 break;
5025 case POST_DEC:
5026 case POST_INC:
5027 if (ASSEMBLER_DIALECT == 0)
5028 fputs ("s,ma", file);
5029 else
5030 fputs (",ma", file);
5031 break;
5032 case PLUS:
5033 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5034 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5036 if (ASSEMBLER_DIALECT == 0)
5037 fputs ("x", file);
5039 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5040 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5042 if (ASSEMBLER_DIALECT == 0)
5043 fputs ("x,s", file);
5044 else
5045 fputs (",s", file);
5047 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
5048 fputs ("s", file);
5049 break;
5050 default:
5051 if (code == 'F' && ASSEMBLER_DIALECT == 0)
5052 fputs ("s", file);
5053 break;
5055 return;
5056 case 'G':
5057 output_global_address (file, x, 0);
5058 return;
5059 case 'H':
5060 output_global_address (file, x, 1);
5061 return;
5062 case 0: /* Don't do anything special */
5063 break;
5064 case 'Z':
5066 unsigned op[3];
5067 compute_zdepwi_operands (INTVAL (x), op);
5068 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5069 return;
5071 case 'z':
5073 unsigned op[3];
5074 compute_zdepdi_operands (INTVAL (x), op);
5075 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5076 return;
5078 case 'c':
5079 /* We can get here from a .vtable_inherit due to our
5080 CONSTANT_ADDRESS_P rejecting perfectly good constant
5081 addresses. */
5082 break;
5083 default:
5084 gcc_unreachable ();
5086 if (GET_CODE (x) == REG)
5088 fputs (reg_names [REGNO (x)], file);
5089 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5091 fputs ("R", file);
5092 return;
5094 if (FP_REG_P (x)
5095 && GET_MODE_SIZE (GET_MODE (x)) <= 4
5096 && (REGNO (x) & 1) == 0)
5097 fputs ("L", file);
5099 else if (GET_CODE (x) == MEM)
5101 int size = GET_MODE_SIZE (GET_MODE (x));
5102 rtx base = NULL_RTX;
5103 switch (GET_CODE (XEXP (x, 0)))
5105 case PRE_DEC:
5106 case POST_DEC:
5107 base = XEXP (XEXP (x, 0), 0);
5108 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5109 break;
5110 case PRE_INC:
5111 case POST_INC:
5112 base = XEXP (XEXP (x, 0), 0);
5113 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5114 break;
5115 case PLUS:
5116 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5117 fprintf (file, "%s(%s)",
5118 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5119 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5120 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5121 fprintf (file, "%s(%s)",
5122 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5123 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5124 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5125 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5127 /* Because the REG_POINTER flag can get lost during reload,
5128 GO_IF_LEGITIMATE_ADDRESS canonicalizes the order of the
5129 index and base registers in the combined move patterns. */
5130 rtx base = XEXP (XEXP (x, 0), 1);
5131 rtx index = XEXP (XEXP (x, 0), 0);
5133 fprintf (file, "%s(%s)",
5134 reg_names [REGNO (index)], reg_names [REGNO (base)]);
5136 else
5137 output_address (XEXP (x, 0));
5138 break;
5139 default:
5140 output_address (XEXP (x, 0));
5141 break;
5144 else
5145 output_addr_const (file, x);
5148 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
5150 void
5151 output_global_address (FILE *file, rtx x, int round_constant)
5154 /* Imagine (high (const (plus ...))). */
5155 if (GET_CODE (x) == HIGH)
5156 x = XEXP (x, 0);
5158 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5159 output_addr_const (file, x);
5160 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5162 output_addr_const (file, x);
5163 fputs ("-$global$", file);
5165 else if (GET_CODE (x) == CONST)
5167 const char *sep = "";
5168 int offset = 0; /* assembler wants -$global$ at end */
5169 rtx base = NULL_RTX;
5171 switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
5173 case SYMBOL_REF:
5174 base = XEXP (XEXP (x, 0), 0);
5175 output_addr_const (file, base);
5176 break;
5177 case CONST_INT:
5178 offset = INTVAL (XEXP (XEXP (x, 0), 0));
5179 break;
5180 default:
5181 gcc_unreachable ();
5184 switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
5186 case SYMBOL_REF:
5187 base = XEXP (XEXP (x, 0), 1);
5188 output_addr_const (file, base);
5189 break;
5190 case CONST_INT:
5191 offset = INTVAL (XEXP (XEXP (x, 0), 1));
5192 break;
5193 default:
5194 gcc_unreachable ();
5197 /* How bogus. The compiler is apparently responsible for
5198 rounding the constant if it uses an LR field selector.
5200 The linker and/or assembler seem a better place since
5201 they have to do this kind of thing already.
5203 If we fail to do this, HP's optimizing linker may eliminate
5204 an addil, but not update the ldw/stw/ldo instruction that
5205 uses the result of the addil. */
5206 if (round_constant)
5207 offset = ((offset + 0x1000) & ~0x1fff);
5209 switch (GET_CODE (XEXP (x, 0)))
5211 case PLUS:
5212 if (offset < 0)
5214 offset = -offset;
5215 sep = "-";
5217 else
5218 sep = "+";
5219 break;
5221 case MINUS:
5222 gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5223 sep = "-";
5224 break;
5226 default:
5227 gcc_unreachable ();
5230 if (!read_only_operand (base, VOIDmode) && !flag_pic)
5231 fputs ("-$global$", file);
5232 if (offset)
5233 fprintf (file, "%s%d", sep, offset);
5235 else
5236 output_addr_const (file, x);
5239 /* Output boilerplate text to appear at the beginning of the file.
5240 There are several possible versions. */
5241 #define aputs(x) fputs(x, asm_out_file)
5242 static inline void
5243 pa_file_start_level (void)
5245 if (TARGET_64BIT)
5246 aputs ("\t.LEVEL 2.0w\n");
5247 else if (TARGET_PA_20)
5248 aputs ("\t.LEVEL 2.0\n");
5249 else if (TARGET_PA_11)
5250 aputs ("\t.LEVEL 1.1\n");
5251 else
5252 aputs ("\t.LEVEL 1.0\n");
5255 static inline void
5256 pa_file_start_space (int sortspace)
5258 aputs ("\t.SPACE $PRIVATE$");
5259 if (sortspace)
5260 aputs (",SORT=16");
5261 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31"
5262 "\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5263 "\n\t.SPACE $TEXT$");
5264 if (sortspace)
5265 aputs (",SORT=8");
5266 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5267 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5270 static inline void
5271 pa_file_start_file (int want_version)
5273 if (write_symbols != NO_DEBUG)
5275 output_file_directive (asm_out_file, main_input_filename);
5276 if (want_version)
5277 aputs ("\t.version\t\"01.01\"\n");
5281 static inline void
5282 pa_file_start_mcount (const char *aswhat)
5284 if (profile_flag)
5285 fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5288 static void
5289 pa_elf_file_start (void)
5291 pa_file_start_level ();
5292 pa_file_start_mcount ("ENTRY");
5293 pa_file_start_file (0);
5296 static void
5297 pa_som_file_start (void)
5299 pa_file_start_level ();
5300 pa_file_start_space (0);
5301 aputs ("\t.IMPORT $global$,DATA\n"
5302 "\t.IMPORT $$dyncall,MILLICODE\n");
5303 pa_file_start_mcount ("CODE");
5304 pa_file_start_file (0);
5307 static void
5308 pa_linux_file_start (void)
5310 pa_file_start_file (1);
5311 pa_file_start_level ();
5312 pa_file_start_mcount ("CODE");
5315 static void
5316 pa_hpux64_gas_file_start (void)
5318 pa_file_start_level ();
5319 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5320 if (profile_flag)
5321 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5322 #endif
5323 pa_file_start_file (1);
5326 static void
5327 pa_hpux64_hpas_file_start (void)
5329 pa_file_start_level ();
5330 pa_file_start_space (1);
5331 pa_file_start_mcount ("CODE");
5332 pa_file_start_file (0);
5334 #undef aputs
5336 /* Search the deferred plabel list for SYMBOL and return its internal
5337 label. If an entry for SYMBOL is not found, a new entry is created. */
5340 get_deferred_plabel (rtx symbol)
5342 const char *fname = XSTR (symbol, 0);
5343 size_t i;
5345 /* See if we have already put this function on the list of deferred
5346 plabels. This list is generally small, so a liner search is not
5347 too ugly. If it proves too slow replace it with something faster. */
5348 for (i = 0; i < n_deferred_plabels; i++)
5349 if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
5350 break;
5352 /* If the deferred plabel list is empty, or this entry was not found
5353 on the list, create a new entry on the list. */
5354 if (deferred_plabels == NULL || i == n_deferred_plabels)
5356 tree id;
5358 if (deferred_plabels == 0)
5359 deferred_plabels = (struct deferred_plabel *)
5360 ggc_alloc (sizeof (struct deferred_plabel));
5361 else
5362 deferred_plabels = (struct deferred_plabel *)
5363 ggc_realloc (deferred_plabels,
5364 ((n_deferred_plabels + 1)
5365 * sizeof (struct deferred_plabel)));
5367 i = n_deferred_plabels++;
5368 deferred_plabels[i].internal_label = gen_label_rtx ();
5369 deferred_plabels[i].symbol = symbol;
5371 /* Gross. We have just implicitly taken the address of this
5372 function. Mark it in the same manner as assemble_name. */
5373 id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5374 if (id)
5375 mark_referenced (id);
5378 return deferred_plabels[i].internal_label;
5381 static void
5382 output_deferred_plabels (void)
5384 size_t i;
5386 /* If we have some deferred plabels, then we need to switch into the
5387 data or readonly data section, and align it to a 4 byte boundary
5388 before outputting the deferred plabels. */
5389 if (n_deferred_plabels)
5391 switch_to_section (flag_pic ? data_section : readonly_data_section);
5392 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5395 /* Now output the deferred plabels. */
5396 for (i = 0; i < n_deferred_plabels; i++)
5398 targetm.asm_out.internal_label (asm_out_file, "L",
5399 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5400 assemble_integer (deferred_plabels[i].symbol,
5401 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5405 #ifdef HPUX_LONG_DOUBLE_LIBRARY
5406 /* Initialize optabs to point to HPUX long double emulation routines. */
5407 static void
5408 pa_hpux_init_libfuncs (void)
5410 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5411 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5412 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5413 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5414 set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5415 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5416 set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5417 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5418 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5420 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5421 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5422 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5423 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5424 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5425 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5426 set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5428 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5429 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5430 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5431 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5433 set_conv_libfunc (sfix_optab, SImode, TFmode, TARGET_64BIT
5434 ? "__U_Qfcnvfxt_quad_to_sgl"
5435 : "_U_Qfcnvfxt_quad_to_sgl");
5436 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
5437 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_usgl");
5438 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_udbl");
5440 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
5441 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
5442 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_U_Qfcnvxf_usgl_to_quad");
5443 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxf_udbl_to_quad");
5445 #endif
5447 /* HP's millicode routines mean something special to the assembler.
5448 Keep track of which ones we have used. */
5450 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5451 static void import_milli (enum millicodes);
5452 static char imported[(int) end1000];
5453 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5454 static const char import_string[] = ".IMPORT $$....,MILLICODE";
5455 #define MILLI_START 10
5457 static void
5458 import_milli (enum millicodes code)
5460 char str[sizeof (import_string)];
5462 if (!imported[(int) code])
5464 imported[(int) code] = 1;
5465 strcpy (str, import_string);
5466 strncpy (str + MILLI_START, milli_names[(int) code], 4);
5467 output_asm_insn (str, 0);
5471 /* The register constraints have put the operands and return value in
5472 the proper registers. */
5474 const char *
5475 output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx insn)
5477 import_milli (mulI);
5478 return output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5481 /* Emit the rtl for doing a division by a constant. */
5483 /* Do magic division millicodes exist for this value? */
5484 const int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5486 /* We'll use an array to keep track of the magic millicodes and
5487 whether or not we've used them already. [n][0] is signed, [n][1] is
5488 unsigned. */
5490 static int div_milli[16][2];
5493 emit_hpdiv_const (rtx *operands, int unsignedp)
5495 if (GET_CODE (operands[2]) == CONST_INT
5496 && INTVAL (operands[2]) > 0
5497 && INTVAL (operands[2]) < 16
5498 && magic_milli[INTVAL (operands[2])])
5500 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5502 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
5503 emit
5504 (gen_rtx_PARALLEL
5505 (VOIDmode,
5506 gen_rtvec (6, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29),
5507 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5508 SImode,
5509 gen_rtx_REG (SImode, 26),
5510 operands[2])),
5511 gen_rtx_CLOBBER (VOIDmode, operands[4]),
5512 gen_rtx_CLOBBER (VOIDmode, operands[3]),
5513 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5514 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
5515 gen_rtx_CLOBBER (VOIDmode, ret))));
5516 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
5517 return 1;
5519 return 0;
5522 const char *
5523 output_div_insn (rtx *operands, int unsignedp, rtx insn)
5525 int divisor;
5527 /* If the divisor is a constant, try to use one of the special
5528 opcodes .*/
5529 if (GET_CODE (operands[0]) == CONST_INT)
5531 static char buf[100];
5532 divisor = INTVAL (operands[0]);
5533 if (!div_milli[divisor][unsignedp])
5535 div_milli[divisor][unsignedp] = 1;
5536 if (unsignedp)
5537 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
5538 else
5539 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
5541 if (unsignedp)
5543 sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
5544 INTVAL (operands[0]));
5545 return output_millicode_call (insn,
5546 gen_rtx_SYMBOL_REF (SImode, buf));
5548 else
5550 sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
5551 INTVAL (operands[0]));
5552 return output_millicode_call (insn,
5553 gen_rtx_SYMBOL_REF (SImode, buf));
5556 /* Divisor isn't a special constant. */
5557 else
5559 if (unsignedp)
5561 import_milli (divU);
5562 return output_millicode_call (insn,
5563 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
5565 else
5567 import_milli (divI);
5568 return output_millicode_call (insn,
5569 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
5574 /* Output a $$rem millicode to do mod. */
5576 const char *
5577 output_mod_insn (int unsignedp, rtx insn)
5579 if (unsignedp)
5581 import_milli (remU);
5582 return output_millicode_call (insn,
5583 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
5585 else
5587 import_milli (remI);
5588 return output_millicode_call (insn,
5589 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
5593 void
5594 output_arg_descriptor (rtx call_insn)
5596 const char *arg_regs[4];
5597 enum machine_mode arg_mode;
5598 rtx link;
5599 int i, output_flag = 0;
5600 int regno;
5602 /* We neither need nor want argument location descriptors for the
5603 64bit runtime environment or the ELF32 environment. */
5604 if (TARGET_64BIT || TARGET_ELF32)
5605 return;
5607 for (i = 0; i < 4; i++)
5608 arg_regs[i] = 0;
5610 /* Specify explicitly that no argument relocations should take place
5611 if using the portable runtime calling conventions. */
5612 if (TARGET_PORTABLE_RUNTIME)
5614 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5615 asm_out_file);
5616 return;
5619 gcc_assert (GET_CODE (call_insn) == CALL_INSN);
5620 for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
5621 link; link = XEXP (link, 1))
5623 rtx use = XEXP (link, 0);
5625 if (! (GET_CODE (use) == USE
5626 && GET_CODE (XEXP (use, 0)) == REG
5627 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5628 continue;
5630 arg_mode = GET_MODE (XEXP (use, 0));
5631 regno = REGNO (XEXP (use, 0));
5632 if (regno >= 23 && regno <= 26)
5634 arg_regs[26 - regno] = "GR";
5635 if (arg_mode == DImode)
5636 arg_regs[25 - regno] = "GR";
5638 else if (regno >= 32 && regno <= 39)
5640 if (arg_mode == SFmode)
5641 arg_regs[(regno - 32) / 2] = "FR";
5642 else
5644 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
5645 arg_regs[(regno - 34) / 2] = "FR";
5646 arg_regs[(regno - 34) / 2 + 1] = "FU";
5647 #else
5648 arg_regs[(regno - 34) / 2] = "FU";
5649 arg_regs[(regno - 34) / 2 + 1] = "FR";
5650 #endif
5654 fputs ("\t.CALL ", asm_out_file);
5655 for (i = 0; i < 4; i++)
5657 if (arg_regs[i])
5659 if (output_flag++)
5660 fputc (',', asm_out_file);
5661 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
5664 fputc ('\n', asm_out_file);
5667 static enum reg_class
5668 pa_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
5669 enum machine_mode mode, secondary_reload_info *sri)
5671 int is_symbolic, regno;
5673 /* Handle the easy stuff first. */
5674 if (rclass == R1_REGS)
5675 return NO_REGS;
5677 if (REG_P (x))
5679 regno = REGNO (x);
5680 if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
5681 return NO_REGS;
5683 else
5684 regno = -1;
5686 /* If we have something like (mem (mem (...)), we can safely assume the
5687 inner MEM will end up in a general register after reloading, so there's
5688 no need for a secondary reload. */
5689 if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
5690 return NO_REGS;
5692 /* Trying to load a constant into a FP register during PIC code
5693 generation requires %r1 as a scratch register. */
5694 if (flag_pic
5695 && (mode == SImode || mode == DImode)
5696 && FP_REG_CLASS_P (rclass)
5697 && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
5699 sri->icode = (mode == SImode ? CODE_FOR_reload_insi_r1
5700 : CODE_FOR_reload_indi_r1);
5701 return NO_REGS;
5704 /* Profiling showed the PA port spends about 1.3% of its compilation
5705 time in true_regnum from calls inside pa_secondary_reload_class. */
5706 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
5707 regno = true_regnum (x);
5709 /* In order to allow 14-bit displacements in integer loads and stores,
5710 we need to prevent reload from generating out of range integer mode
5711 loads and stores to the floating point registers. Previously, we
5712 used to call for a secondary reload and have emit_move_sequence()
5713 fix the instruction sequence. However, reload occasionally wouldn't
5714 generate the reload and we would end up with an invalid REG+D memory
5715 address. So, now we use an intermediate general register for most
5716 memory loads and stores. */
5717 if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
5718 && GET_MODE_CLASS (mode) == MODE_INT
5719 && FP_REG_CLASS_P (rclass))
5721 /* Reload passes (mem:SI (reg/f:DI 30 %r30) when it wants to check
5722 the secondary reload needed for a pseudo. It never passes a
5723 REG+D address. */
5724 if (GET_CODE (x) == MEM)
5726 x = XEXP (x, 0);
5728 /* We don't need an intermediate for indexed and LO_SUM DLT
5729 memory addresses. When INT14_OK_STRICT is true, it might
5730 appear that we could directly allow register indirect
5731 memory addresses. However, this doesn't work because we
5732 don't support SUBREGs in floating-point register copies
5733 and reload doesn't tell us when it's going to use a SUBREG. */
5734 if (IS_INDEX_ADDR_P (x)
5735 || IS_LO_SUM_DLT_ADDR_P (x))
5736 return NO_REGS;
5738 /* Otherwise, we need an intermediate general register. */
5739 return GENERAL_REGS;
5742 /* Request a secondary reload with a general scratch register
5743 for everthing else. ??? Could symbolic operands be handled
5744 directly when generating non-pic PA 2.0 code? */
5745 sri->icode = in_p ? reload_in_optab[mode] : reload_out_optab[mode];
5746 return NO_REGS;
5749 /* We need a secondary register (GPR) for copies between the SAR
5750 and anything other than a general register. */
5751 if (rclass == SHIFT_REGS && (regno <= 0 || regno >= 32))
5753 sri->icode = in_p ? reload_in_optab[mode] : reload_out_optab[mode];
5754 return NO_REGS;
5757 /* A SAR<->FP register copy requires a secondary register (GPR) as
5758 well as secondary memory. */
5759 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
5760 && (REGNO_REG_CLASS (regno) == SHIFT_REGS
5761 && FP_REG_CLASS_P (rclass)))
5763 sri->icode = in_p ? reload_in_optab[mode] : reload_out_optab[mode];
5764 return NO_REGS;
5767 /* Secondary reloads of symbolic operands require %r1 as a scratch
5768 register when we're generating PIC code and when the operand isn't
5769 readonly. */
5770 if (GET_CODE (x) == HIGH)
5771 x = XEXP (x, 0);
5773 /* Profiling has showed GCC spends about 2.6% of its compilation
5774 time in symbolic_operand from calls inside pa_secondary_reload_class.
5775 So, we use an inline copy to avoid useless work. */
5776 switch (GET_CODE (x))
5778 rtx op;
5780 case SYMBOL_REF:
5781 is_symbolic = !SYMBOL_REF_TLS_MODEL (x);
5782 break;
5783 case LABEL_REF:
5784 is_symbolic = 1;
5785 break;
5786 case CONST:
5787 op = XEXP (x, 0);
5788 is_symbolic = (((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
5789 && !SYMBOL_REF_TLS_MODEL (XEXP (op, 0)))
5790 || GET_CODE (XEXP (op, 0)) == LABEL_REF)
5791 && GET_CODE (XEXP (op, 1)) == CONST_INT);
5792 break;
5793 default:
5794 is_symbolic = 0;
5795 break;
5798 if (is_symbolic && (flag_pic || !read_only_operand (x, VOIDmode)))
5800 gcc_assert (mode == SImode || mode == DImode);
5801 sri->icode = (mode == SImode ? CODE_FOR_reload_insi_r1
5802 : CODE_FOR_reload_indi_r1);
5805 return NO_REGS;
5808 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY. The argument pointer
5809 is only marked as live on entry by df-scan when it is a fixed
5810 register. It isn't a fixed register in the 64-bit runtime,
5811 so we need to mark it here. */
5813 static void
5814 pa_extra_live_on_entry (bitmap regs)
5816 if (TARGET_64BIT)
5817 bitmap_set_bit (regs, ARG_POINTER_REGNUM);
5820 /* Implement EH_RETURN_HANDLER_RTX. The MEM needs to be volatile
5821 to prevent it from being deleted. */
5824 pa_eh_return_handler_rtx (void)
5826 rtx tmp;
5828 tmp = gen_rtx_PLUS (word_mode, frame_pointer_rtx,
5829 TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20));
5830 tmp = gen_rtx_MEM (word_mode, tmp);
5831 tmp->volatil = 1;
5832 return tmp;
5835 /* In the 32-bit runtime, arguments larger than eight bytes are passed
5836 by invisible reference. As a GCC extension, we also pass anything
5837 with a zero or variable size by reference.
5839 The 64-bit runtime does not describe passing any types by invisible
5840 reference. The internals of GCC can't currently handle passing
5841 empty structures, and zero or variable length arrays when they are
5842 not passed entirely on the stack or by reference. Thus, as a GCC
5843 extension, we pass these types by reference. The HP compiler doesn't
5844 support these types, so hopefully there shouldn't be any compatibility
5845 issues. This may have to be revisited when HP releases a C99 compiler
5846 or updates the ABI. */
5848 static bool
5849 pa_pass_by_reference (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED,
5850 enum machine_mode mode, const_tree type,
5851 bool named ATTRIBUTE_UNUSED)
5853 HOST_WIDE_INT size;
5855 if (type)
5856 size = int_size_in_bytes (type);
5857 else
5858 size = GET_MODE_SIZE (mode);
5860 if (TARGET_64BIT)
5861 return size <= 0;
5862 else
5863 return size <= 0 || size > 8;
5866 enum direction
5867 function_arg_padding (enum machine_mode mode, const_tree type)
5869 if (mode == BLKmode
5870 || (TARGET_64BIT
5871 && type
5872 && (AGGREGATE_TYPE_P (type)
5873 || TREE_CODE (type) == COMPLEX_TYPE
5874 || TREE_CODE (type) == VECTOR_TYPE)))
5876 /* Return none if justification is not required. */
5877 if (type
5878 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
5879 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
5880 return none;
5882 /* The directions set here are ignored when a BLKmode argument larger
5883 than a word is placed in a register. Different code is used for
5884 the stack and registers. This makes it difficult to have a
5885 consistent data representation for both the stack and registers.
5886 For both runtimes, the justification and padding for arguments on
5887 the stack and in registers should be identical. */
5888 if (TARGET_64BIT)
5889 /* The 64-bit runtime specifies left justification for aggregates. */
5890 return upward;
5891 else
5892 /* The 32-bit runtime architecture specifies right justification.
5893 When the argument is passed on the stack, the argument is padded
5894 with garbage on the left. The HP compiler pads with zeros. */
5895 return downward;
5898 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
5899 return downward;
5900 else
5901 return none;
5905 /* Do what is necessary for `va_start'. We look at the current function
5906 to determine if stdargs or varargs is used and fill in an initial
5907 va_list. A pointer to this constructor is returned. */
5909 static rtx
5910 hppa_builtin_saveregs (void)
5912 rtx offset, dest;
5913 tree fntype = TREE_TYPE (current_function_decl);
5914 int argadj = ((!(TYPE_ARG_TYPES (fntype) != 0
5915 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
5916 != void_type_node)))
5917 ? UNITS_PER_WORD : 0);
5919 if (argadj)
5920 offset = plus_constant (crtl->args.arg_offset_rtx, argadj);
5921 else
5922 offset = crtl->args.arg_offset_rtx;
5924 if (TARGET_64BIT)
5926 int i, off;
5928 /* Adjust for varargs/stdarg differences. */
5929 if (argadj)
5930 offset = plus_constant (crtl->args.arg_offset_rtx, -argadj);
5931 else
5932 offset = crtl->args.arg_offset_rtx;
5934 /* We need to save %r26 .. %r19 inclusive starting at offset -64
5935 from the incoming arg pointer and growing to larger addresses. */
5936 for (i = 26, off = -64; i >= 19; i--, off += 8)
5937 emit_move_insn (gen_rtx_MEM (word_mode,
5938 plus_constant (arg_pointer_rtx, off)),
5939 gen_rtx_REG (word_mode, i));
5941 /* The incoming args pointer points just beyond the flushback area;
5942 normally this is not a serious concern. However, when we are doing
5943 varargs/stdargs we want to make the arg pointer point to the start
5944 of the incoming argument area. */
5945 emit_move_insn (virtual_incoming_args_rtx,
5946 plus_constant (arg_pointer_rtx, -64));
5948 /* Now return a pointer to the first anonymous argument. */
5949 return copy_to_reg (expand_binop (Pmode, add_optab,
5950 virtual_incoming_args_rtx,
5951 offset, 0, 0, OPTAB_LIB_WIDEN));
5954 /* Store general registers on the stack. */
5955 dest = gen_rtx_MEM (BLKmode,
5956 plus_constant (crtl->args.internal_arg_pointer,
5957 -16));
5958 set_mem_alias_set (dest, get_varargs_alias_set ());
5959 set_mem_align (dest, BITS_PER_WORD);
5960 move_block_from_reg (23, dest, 4);
5962 /* move_block_from_reg will emit code to store the argument registers
5963 individually as scalar stores.
5965 However, other insns may later load from the same addresses for
5966 a structure load (passing a struct to a varargs routine).
5968 The alias code assumes that such aliasing can never happen, so we
5969 have to keep memory referencing insns from moving up beyond the
5970 last argument register store. So we emit a blockage insn here. */
5971 emit_insn (gen_blockage ());
5973 return copy_to_reg (expand_binop (Pmode, add_optab,
5974 crtl->args.internal_arg_pointer,
5975 offset, 0, 0, OPTAB_LIB_WIDEN));
5978 static void
5979 hppa_va_start (tree valist, rtx nextarg)
5981 nextarg = expand_builtin_saveregs ();
5982 std_expand_builtin_va_start (valist, nextarg);
5985 static tree
5986 hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
5987 gimple_seq *post_p)
5989 if (TARGET_64BIT)
5991 /* Args grow upward. We can use the generic routines. */
5992 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
5994 else /* !TARGET_64BIT */
5996 tree ptr = build_pointer_type (type);
5997 tree valist_type;
5998 tree t, u;
5999 unsigned int size, ofs;
6000 bool indirect;
6002 indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0);
6003 if (indirect)
6005 type = ptr;
6006 ptr = build_pointer_type (type);
6008 size = int_size_in_bytes (type);
6009 valist_type = TREE_TYPE (valist);
6011 /* Args grow down. Not handled by generic routines. */
6013 u = fold_convert (sizetype, size_in_bytes (type));
6014 u = fold_build1 (NEGATE_EXPR, sizetype, u);
6015 t = build2 (POINTER_PLUS_EXPR, valist_type, valist, u);
6017 /* Copied from va-pa.h, but we probably don't need to align to
6018 word size, since we generate and preserve that invariant. */
6019 u = size_int (size > 4 ? -8 : -4);
6020 t = fold_convert (sizetype, t);
6021 t = build2 (BIT_AND_EXPR, sizetype, t, u);
6022 t = fold_convert (valist_type, t);
6024 t = build2 (MODIFY_EXPR, valist_type, valist, t);
6026 ofs = (8 - size) % 4;
6027 if (ofs != 0)
6029 u = size_int (ofs);
6030 t = build2 (POINTER_PLUS_EXPR, valist_type, t, u);
6033 t = fold_convert (ptr, t);
6034 t = build_va_arg_indirect_ref (t);
6036 if (indirect)
6037 t = build_va_arg_indirect_ref (t);
6039 return t;
6043 /* True if MODE is valid for the target. By "valid", we mean able to
6044 be manipulated in non-trivial ways. In particular, this means all
6045 the arithmetic is supported.
6047 Currently, TImode is not valid as the HP 64-bit runtime documentation
6048 doesn't document the alignment and calling conventions for this type.
6049 Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
6050 2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE. */
6052 static bool
6053 pa_scalar_mode_supported_p (enum machine_mode mode)
6055 int precision = GET_MODE_PRECISION (mode);
6057 switch (GET_MODE_CLASS (mode))
6059 case MODE_PARTIAL_INT:
6060 case MODE_INT:
6061 if (precision == CHAR_TYPE_SIZE)
6062 return true;
6063 if (precision == SHORT_TYPE_SIZE)
6064 return true;
6065 if (precision == INT_TYPE_SIZE)
6066 return true;
6067 if (precision == LONG_TYPE_SIZE)
6068 return true;
6069 if (precision == LONG_LONG_TYPE_SIZE)
6070 return true;
6071 return false;
6073 case MODE_FLOAT:
6074 if (precision == FLOAT_TYPE_SIZE)
6075 return true;
6076 if (precision == DOUBLE_TYPE_SIZE)
6077 return true;
6078 if (precision == LONG_DOUBLE_TYPE_SIZE)
6079 return true;
6080 return false;
6082 case MODE_DECIMAL_FLOAT:
6083 return false;
6085 default:
6086 gcc_unreachable ();
6090 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
6091 it branches to the next real instruction. Otherwise, return FALSE. */
6093 static bool
6094 branch_to_delay_slot_p (rtx insn)
6096 if (dbr_sequence_length ())
6097 return FALSE;
6099 return next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn);
6102 /* Return TRUE if INSN, a jump insn, needs a nop in its delay slot.
6104 This occurs when INSN has an unfilled delay slot and is followed
6105 by an ASM_INPUT. Disaster can occur if the ASM_INPUT is empty and
6106 the jump branches into the delay slot. So, we add a nop in the delay
6107 slot just to be safe. This messes up our instruction count, but we
6108 don't know how big the ASM_INPUT insn is anyway. */
6110 static bool
6111 branch_needs_nop_p (rtx insn)
6113 rtx next_insn;
6115 if (dbr_sequence_length ())
6116 return FALSE;
6118 next_insn = next_real_insn (insn);
6119 return GET_CODE (PATTERN (next_insn)) == ASM_INPUT;
6122 /* This routine handles all the normal conditional branch sequences we
6123 might need to generate. It handles compare immediate vs compare
6124 register, nullification of delay slots, varying length branches,
6125 negated branches, and all combinations of the above. It returns the
6126 output appropriate to emit the branch corresponding to all given
6127 parameters. */
6129 const char *
6130 output_cbranch (rtx *operands, int negated, rtx insn)
6132 static char buf[100];
6133 int useskip = 0;
6134 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6135 int length = get_attr_length (insn);
6136 int xdelay;
6138 /* A conditional branch to the following instruction (e.g. the delay slot)
6139 is asking for a disaster. This can happen when not optimizing and
6140 when jump optimization fails.
6142 While it is usually safe to emit nothing, this can fail if the
6143 preceding instruction is a nullified branch with an empty delay
6144 slot and the same branch target as this branch. We could check
6145 for this but jump optimization should eliminate nop jumps. It
6146 is always safe to emit a nop. */
6147 if (branch_to_delay_slot_p (insn))
6148 return "nop";
6150 /* The doubleword form of the cmpib instruction doesn't have the LEU
6151 and GTU conditions while the cmpb instruction does. Since we accept
6152 zero for cmpb, we must ensure that we use cmpb for the comparison. */
6153 if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6154 operands[2] = gen_rtx_REG (DImode, 0);
6155 if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
6156 operands[1] = gen_rtx_REG (DImode, 0);
6158 /* If this is a long branch with its delay slot unfilled, set `nullify'
6159 as it can nullify the delay slot and save a nop. */
6160 if (length == 8 && dbr_sequence_length () == 0)
6161 nullify = 1;
6163 /* If this is a short forward conditional branch which did not get
6164 its delay slot filled, the delay slot can still be nullified. */
6165 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6166 nullify = forward_branch_p (insn);
6168 /* A forward branch over a single nullified insn can be done with a
6169 comclr instruction. This avoids a single cycle penalty due to
6170 mis-predicted branch if we fall through (branch not taken). */
6171 if (length == 4
6172 && next_real_insn (insn) != 0
6173 && get_attr_length (next_real_insn (insn)) == 4
6174 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6175 && nullify)
6176 useskip = 1;
6178 switch (length)
6180 /* All short conditional branches except backwards with an unfilled
6181 delay slot. */
6182 case 4:
6183 if (useskip)
6184 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6185 else
6186 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6187 if (GET_MODE (operands[1]) == DImode)
6188 strcat (buf, "*");
6189 if (negated)
6190 strcat (buf, "%B3");
6191 else
6192 strcat (buf, "%S3");
6193 if (useskip)
6194 strcat (buf, " %2,%r1,%%r0");
6195 else if (nullify)
6197 if (branch_needs_nop_p (insn))
6198 strcat (buf, ",n %2,%r1,%0%#");
6199 else
6200 strcat (buf, ",n %2,%r1,%0");
6202 else
6203 strcat (buf, " %2,%r1,%0");
6204 break;
6206 /* All long conditionals. Note a short backward branch with an
6207 unfilled delay slot is treated just like a long backward branch
6208 with an unfilled delay slot. */
6209 case 8:
6210 /* Handle weird backwards branch with a filled delay slot
6211 which is nullified. */
6212 if (dbr_sequence_length () != 0
6213 && ! forward_branch_p (insn)
6214 && nullify)
6216 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6217 if (GET_MODE (operands[1]) == DImode)
6218 strcat (buf, "*");
6219 if (negated)
6220 strcat (buf, "%S3");
6221 else
6222 strcat (buf, "%B3");
6223 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6225 /* Handle short backwards branch with an unfilled delay slot.
6226 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6227 taken and untaken branches. */
6228 else if (dbr_sequence_length () == 0
6229 && ! forward_branch_p (insn)
6230 && INSN_ADDRESSES_SET_P ()
6231 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6232 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6234 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6235 if (GET_MODE (operands[1]) == DImode)
6236 strcat (buf, "*");
6237 if (negated)
6238 strcat (buf, "%B3 %2,%r1,%0%#");
6239 else
6240 strcat (buf, "%S3 %2,%r1,%0%#");
6242 else
6244 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6245 if (GET_MODE (operands[1]) == DImode)
6246 strcat (buf, "*");
6247 if (negated)
6248 strcat (buf, "%S3");
6249 else
6250 strcat (buf, "%B3");
6251 if (nullify)
6252 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6253 else
6254 strcat (buf, " %2,%r1,%%r0\n\tb %0");
6256 break;
6258 default:
6259 /* The reversed conditional branch must branch over one additional
6260 instruction if the delay slot is filled and needs to be extracted
6261 by output_lbranch. If the delay slot is empty or this is a
6262 nullified forward branch, the instruction after the reversed
6263 condition branch must be nullified. */
6264 if (dbr_sequence_length () == 0
6265 || (nullify && forward_branch_p (insn)))
6267 nullify = 1;
6268 xdelay = 0;
6269 operands[4] = GEN_INT (length);
6271 else
6273 xdelay = 1;
6274 operands[4] = GEN_INT (length + 4);
6277 /* Create a reversed conditional branch which branches around
6278 the following insns. */
6279 if (GET_MODE (operands[1]) != DImode)
6281 if (nullify)
6283 if (negated)
6284 strcpy (buf,
6285 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6286 else
6287 strcpy (buf,
6288 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6290 else
6292 if (negated)
6293 strcpy (buf,
6294 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6295 else
6296 strcpy (buf,
6297 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6300 else
6302 if (nullify)
6304 if (negated)
6305 strcpy (buf,
6306 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6307 else
6308 strcpy (buf,
6309 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6311 else
6313 if (negated)
6314 strcpy (buf,
6315 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6316 else
6317 strcpy (buf,
6318 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6322 output_asm_insn (buf, operands);
6323 return output_lbranch (operands[0], insn, xdelay);
6325 return buf;
6328 /* This routine handles output of long unconditional branches that
6329 exceed the maximum range of a simple branch instruction. Since
6330 we don't have a register available for the branch, we save register
6331 %r1 in the frame marker, load the branch destination DEST into %r1,
6332 execute the branch, and restore %r1 in the delay slot of the branch.
6334 Since long branches may have an insn in the delay slot and the
6335 delay slot is used to restore %r1, we in general need to extract
6336 this insn and execute it before the branch. However, to facilitate
6337 use of this function by conditional branches, we also provide an
6338 option to not extract the delay insn so that it will be emitted
6339 after the long branch. So, if there is an insn in the delay slot,
6340 it is extracted if XDELAY is nonzero.
6342 The lengths of the various long-branch sequences are 20, 16 and 24
6343 bytes for the portable runtime, non-PIC and PIC cases, respectively. */
6345 const char *
6346 output_lbranch (rtx dest, rtx insn, int xdelay)
6348 rtx xoperands[2];
6350 xoperands[0] = dest;
6352 /* First, free up the delay slot. */
6353 if (xdelay && dbr_sequence_length () != 0)
6355 /* We can't handle a jump in the delay slot. */
6356 gcc_assert (GET_CODE (NEXT_INSN (insn)) != JUMP_INSN);
6358 final_scan_insn (NEXT_INSN (insn), asm_out_file,
6359 optimize, 0, NULL);
6361 /* Now delete the delay insn. */
6362 SET_INSN_DELETED (NEXT_INSN (insn));
6365 /* Output an insn to save %r1. The runtime documentation doesn't
6366 specify whether the "Clean Up" slot in the callers frame can
6367 be clobbered by the callee. It isn't copied by HP's builtin
6368 alloca, so this suggests that it can be clobbered if necessary.
6369 The "Static Link" location is copied by HP builtin alloca, so
6370 we avoid using it. Using the cleanup slot might be a problem
6371 if we have to interoperate with languages that pass cleanup
6372 information. However, it should be possible to handle these
6373 situations with GCC's asm feature.
6375 The "Current RP" slot is reserved for the called procedure, so
6376 we try to use it when we don't have a frame of our own. It's
6377 rather unlikely that we won't have a frame when we need to emit
6378 a very long branch.
6380 Really the way to go long term is a register scavenger; goto
6381 the target of the jump and find a register which we can use
6382 as a scratch to hold the value in %r1. Then, we wouldn't have
6383 to free up the delay slot or clobber a slot that may be needed
6384 for other purposes. */
6385 if (TARGET_64BIT)
6387 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6388 /* Use the return pointer slot in the frame marker. */
6389 output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
6390 else
6391 /* Use the slot at -40 in the frame marker since HP builtin
6392 alloca doesn't copy it. */
6393 output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
6395 else
6397 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6398 /* Use the return pointer slot in the frame marker. */
6399 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
6400 else
6401 /* Use the "Clean Up" slot in the frame marker. In GCC,
6402 the only other use of this location is for copying a
6403 floating point double argument from a floating-point
6404 register to two general registers. The copy is done
6405 as an "atomic" operation when outputting a call, so it
6406 won't interfere with our using the location here. */
6407 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
6410 if (TARGET_PORTABLE_RUNTIME)
6412 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6413 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6414 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6416 else if (flag_pic)
6418 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6419 if (TARGET_SOM || !TARGET_GAS)
6421 xoperands[1] = gen_label_rtx ();
6422 output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands);
6423 targetm.asm_out.internal_label (asm_out_file, "L",
6424 CODE_LABEL_NUMBER (xoperands[1]));
6425 output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands);
6427 else
6429 output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands);
6430 output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
6432 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6434 else
6435 /* Now output a very long branch to the original target. */
6436 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
6438 /* Now restore the value of %r1 in the delay slot. */
6439 if (TARGET_64BIT)
6441 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6442 return "ldd -16(%%r30),%%r1";
6443 else
6444 return "ldd -40(%%r30),%%r1";
6446 else
6448 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6449 return "ldw -20(%%r30),%%r1";
6450 else
6451 return "ldw -12(%%r30),%%r1";
6455 /* This routine handles all the branch-on-bit conditional branch sequences we
6456 might need to generate. It handles nullification of delay slots,
6457 varying length branches, negated branches and all combinations of the
6458 above. it returns the appropriate output template to emit the branch. */
6460 const char *
6461 output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn, int which)
6463 static char buf[100];
6464 int useskip = 0;
6465 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6466 int length = get_attr_length (insn);
6467 int xdelay;
6469 /* A conditional branch to the following instruction (e.g. the delay slot) is
6470 asking for a disaster. I do not think this can happen as this pattern
6471 is only used when optimizing; jump optimization should eliminate the
6472 jump. But be prepared just in case. */
6474 if (branch_to_delay_slot_p (insn))
6475 return "nop";
6477 /* If this is a long branch with its delay slot unfilled, set `nullify'
6478 as it can nullify the delay slot and save a nop. */
6479 if (length == 8 && dbr_sequence_length () == 0)
6480 nullify = 1;
6482 /* If this is a short forward conditional branch which did not get
6483 its delay slot filled, the delay slot can still be nullified. */
6484 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6485 nullify = forward_branch_p (insn);
6487 /* A forward branch over a single nullified insn can be done with a
6488 extrs instruction. This avoids a single cycle penalty due to
6489 mis-predicted branch if we fall through (branch not taken). */
6491 if (length == 4
6492 && next_real_insn (insn) != 0
6493 && get_attr_length (next_real_insn (insn)) == 4
6494 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6495 && nullify)
6496 useskip = 1;
6498 switch (length)
6501 /* All short conditional branches except backwards with an unfilled
6502 delay slot. */
6503 case 4:
6504 if (useskip)
6505 strcpy (buf, "{extrs,|extrw,s,}");
6506 else
6507 strcpy (buf, "bb,");
6508 if (useskip && GET_MODE (operands[0]) == DImode)
6509 strcpy (buf, "extrd,s,*");
6510 else if (GET_MODE (operands[0]) == DImode)
6511 strcpy (buf, "bb,*");
6512 if ((which == 0 && negated)
6513 || (which == 1 && ! negated))
6514 strcat (buf, ">=");
6515 else
6516 strcat (buf, "<");
6517 if (useskip)
6518 strcat (buf, " %0,%1,1,%%r0");
6519 else if (nullify && negated)
6521 if (branch_needs_nop_p (insn))
6522 strcat (buf, ",n %0,%1,%3%#");
6523 else
6524 strcat (buf, ",n %0,%1,%3");
6526 else if (nullify && ! negated)
6528 if (branch_needs_nop_p (insn))
6529 strcat (buf, ",n %0,%1,%2%#");
6530 else
6531 strcat (buf, ",n %0,%1,%2");
6533 else if (! nullify && negated)
6534 strcat (buf, " %0,%1,%3");
6535 else if (! nullify && ! negated)
6536 strcat (buf, " %0,%1,%2");
6537 break;
6539 /* All long conditionals. Note a short backward branch with an
6540 unfilled delay slot is treated just like a long backward branch
6541 with an unfilled delay slot. */
6542 case 8:
6543 /* Handle weird backwards branch with a filled delay slot
6544 which is nullified. */
6545 if (dbr_sequence_length () != 0
6546 && ! forward_branch_p (insn)
6547 && nullify)
6549 strcpy (buf, "bb,");
6550 if (GET_MODE (operands[0]) == DImode)
6551 strcat (buf, "*");
6552 if ((which == 0 && negated)
6553 || (which == 1 && ! negated))
6554 strcat (buf, "<");
6555 else
6556 strcat (buf, ">=");
6557 if (negated)
6558 strcat (buf, ",n %0,%1,.+12\n\tb %3");
6559 else
6560 strcat (buf, ",n %0,%1,.+12\n\tb %2");
6562 /* Handle short backwards branch with an unfilled delay slot.
6563 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6564 taken and untaken branches. */
6565 else if (dbr_sequence_length () == 0
6566 && ! forward_branch_p (insn)
6567 && INSN_ADDRESSES_SET_P ()
6568 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6569 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6571 strcpy (buf, "bb,");
6572 if (GET_MODE (operands[0]) == DImode)
6573 strcat (buf, "*");
6574 if ((which == 0 && negated)
6575 || (which == 1 && ! negated))
6576 strcat (buf, ">=");
6577 else
6578 strcat (buf, "<");
6579 if (negated)
6580 strcat (buf, " %0,%1,%3%#");
6581 else
6582 strcat (buf, " %0,%1,%2%#");
6584 else
6586 if (GET_MODE (operands[0]) == DImode)
6587 strcpy (buf, "extrd,s,*");
6588 else
6589 strcpy (buf, "{extrs,|extrw,s,}");
6590 if ((which == 0 && negated)
6591 || (which == 1 && ! negated))
6592 strcat (buf, "<");
6593 else
6594 strcat (buf, ">=");
6595 if (nullify && negated)
6596 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
6597 else if (nullify && ! negated)
6598 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
6599 else if (negated)
6600 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
6601 else
6602 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
6604 break;
6606 default:
6607 /* The reversed conditional branch must branch over one additional
6608 instruction if the delay slot is filled and needs to be extracted
6609 by output_lbranch. If the delay slot is empty or this is a
6610 nullified forward branch, the instruction after the reversed
6611 condition branch must be nullified. */
6612 if (dbr_sequence_length () == 0
6613 || (nullify && forward_branch_p (insn)))
6615 nullify = 1;
6616 xdelay = 0;
6617 operands[4] = GEN_INT (length);
6619 else
6621 xdelay = 1;
6622 operands[4] = GEN_INT (length + 4);
6625 if (GET_MODE (operands[0]) == DImode)
6626 strcpy (buf, "bb,*");
6627 else
6628 strcpy (buf, "bb,");
6629 if ((which == 0 && negated)
6630 || (which == 1 && !negated))
6631 strcat (buf, "<");
6632 else
6633 strcat (buf, ">=");
6634 if (nullify)
6635 strcat (buf, ",n %0,%1,.+%4");
6636 else
6637 strcat (buf, " %0,%1,.+%4");
6638 output_asm_insn (buf, operands);
6639 return output_lbranch (negated ? operands[3] : operands[2],
6640 insn, xdelay);
6642 return buf;
6645 /* This routine handles all the branch-on-variable-bit conditional branch
6646 sequences we might need to generate. It handles nullification of delay
6647 slots, varying length branches, negated branches and all combinations
6648 of the above. it returns the appropriate output template to emit the
6649 branch. */
6651 const char *
6652 output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn, int which)
6654 static char buf[100];
6655 int useskip = 0;
6656 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6657 int length = get_attr_length (insn);
6658 int xdelay;
6660 /* A conditional branch to the following instruction (e.g. the delay slot) is
6661 asking for a disaster. I do not think this can happen as this pattern
6662 is only used when optimizing; jump optimization should eliminate the
6663 jump. But be prepared just in case. */
6665 if (branch_to_delay_slot_p (insn))
6666 return "nop";
6668 /* If this is a long branch with its delay slot unfilled, set `nullify'
6669 as it can nullify the delay slot and save a nop. */
6670 if (length == 8 && dbr_sequence_length () == 0)
6671 nullify = 1;
6673 /* If this is a short forward conditional branch which did not get
6674 its delay slot filled, the delay slot can still be nullified. */
6675 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6676 nullify = forward_branch_p (insn);
6678 /* A forward branch over a single nullified insn can be done with a
6679 extrs instruction. This avoids a single cycle penalty due to
6680 mis-predicted branch if we fall through (branch not taken). */
6682 if (length == 4
6683 && next_real_insn (insn) != 0
6684 && get_attr_length (next_real_insn (insn)) == 4
6685 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6686 && nullify)
6687 useskip = 1;
6689 switch (length)
6692 /* All short conditional branches except backwards with an unfilled
6693 delay slot. */
6694 case 4:
6695 if (useskip)
6696 strcpy (buf, "{vextrs,|extrw,s,}");
6697 else
6698 strcpy (buf, "{bvb,|bb,}");
6699 if (useskip && GET_MODE (operands[0]) == DImode)
6700 strcpy (buf, "extrd,s,*");
6701 else if (GET_MODE (operands[0]) == DImode)
6702 strcpy (buf, "bb,*");
6703 if ((which == 0 && negated)
6704 || (which == 1 && ! negated))
6705 strcat (buf, ">=");
6706 else
6707 strcat (buf, "<");
6708 if (useskip)
6709 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
6710 else if (nullify && negated)
6712 if (branch_needs_nop_p (insn))
6713 strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
6714 else
6715 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
6717 else if (nullify && ! negated)
6719 if (branch_needs_nop_p (insn))
6720 strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
6721 else
6722 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
6724 else if (! nullify && negated)
6725 strcat (buf, "{ %0,%3| %0,%%sar,%3}");
6726 else if (! nullify && ! negated)
6727 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
6728 break;
6730 /* All long conditionals. Note a short backward branch with an
6731 unfilled delay slot is treated just like a long backward branch
6732 with an unfilled delay slot. */
6733 case 8:
6734 /* Handle weird backwards branch with a filled delay slot
6735 which is nullified. */
6736 if (dbr_sequence_length () != 0
6737 && ! forward_branch_p (insn)
6738 && nullify)
6740 strcpy (buf, "{bvb,|bb,}");
6741 if (GET_MODE (operands[0]) == DImode)
6742 strcat (buf, "*");
6743 if ((which == 0 && negated)
6744 || (which == 1 && ! negated))
6745 strcat (buf, "<");
6746 else
6747 strcat (buf, ">=");
6748 if (negated)
6749 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
6750 else
6751 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
6753 /* Handle short backwards branch with an unfilled delay slot.
6754 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6755 taken and untaken branches. */
6756 else if (dbr_sequence_length () == 0
6757 && ! forward_branch_p (insn)
6758 && INSN_ADDRESSES_SET_P ()
6759 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6760 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6762 strcpy (buf, "{bvb,|bb,}");
6763 if (GET_MODE (operands[0]) == DImode)
6764 strcat (buf, "*");
6765 if ((which == 0 && negated)
6766 || (which == 1 && ! negated))
6767 strcat (buf, ">=");
6768 else
6769 strcat (buf, "<");
6770 if (negated)
6771 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
6772 else
6773 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
6775 else
6777 strcpy (buf, "{vextrs,|extrw,s,}");
6778 if (GET_MODE (operands[0]) == DImode)
6779 strcpy (buf, "extrd,s,*");
6780 if ((which == 0 && negated)
6781 || (which == 1 && ! negated))
6782 strcat (buf, "<");
6783 else
6784 strcat (buf, ">=");
6785 if (nullify && negated)
6786 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
6787 else if (nullify && ! negated)
6788 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
6789 else if (negated)
6790 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
6791 else
6792 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
6794 break;
6796 default:
6797 /* The reversed conditional branch must branch over one additional
6798 instruction if the delay slot is filled and needs to be extracted
6799 by output_lbranch. If the delay slot is empty or this is a
6800 nullified forward branch, the instruction after the reversed
6801 condition branch must be nullified. */
6802 if (dbr_sequence_length () == 0
6803 || (nullify && forward_branch_p (insn)))
6805 nullify = 1;
6806 xdelay = 0;
6807 operands[4] = GEN_INT (length);
6809 else
6811 xdelay = 1;
6812 operands[4] = GEN_INT (length + 4);
6815 if (GET_MODE (operands[0]) == DImode)
6816 strcpy (buf, "bb,*");
6817 else
6818 strcpy (buf, "{bvb,|bb,}");
6819 if ((which == 0 && negated)
6820 || (which == 1 && !negated))
6821 strcat (buf, "<");
6822 else
6823 strcat (buf, ">=");
6824 if (nullify)
6825 strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
6826 else
6827 strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
6828 output_asm_insn (buf, operands);
6829 return output_lbranch (negated ? operands[3] : operands[2],
6830 insn, xdelay);
6832 return buf;
6835 /* Return the output template for emitting a dbra type insn.
6837 Note it may perform some output operations on its own before
6838 returning the final output string. */
6839 const char *
6840 output_dbra (rtx *operands, rtx insn, int which_alternative)
6842 int length = get_attr_length (insn);
6844 /* A conditional branch to the following instruction (e.g. the delay slot) is
6845 asking for a disaster. Be prepared! */
6847 if (branch_to_delay_slot_p (insn))
6849 if (which_alternative == 0)
6850 return "ldo %1(%0),%0";
6851 else if (which_alternative == 1)
6853 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
6854 output_asm_insn ("ldw -16(%%r30),%4", operands);
6855 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6856 return "{fldws|fldw} -16(%%r30),%0";
6858 else
6860 output_asm_insn ("ldw %0,%4", operands);
6861 return "ldo %1(%4),%4\n\tstw %4,%0";
6865 if (which_alternative == 0)
6867 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6868 int xdelay;
6870 /* If this is a long branch with its delay slot unfilled, set `nullify'
6871 as it can nullify the delay slot and save a nop. */
6872 if (length == 8 && dbr_sequence_length () == 0)
6873 nullify = 1;
6875 /* If this is a short forward conditional branch which did not get
6876 its delay slot filled, the delay slot can still be nullified. */
6877 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6878 nullify = forward_branch_p (insn);
6880 switch (length)
6882 case 4:
6883 if (nullify)
6885 if (branch_needs_nop_p (insn))
6886 return "addib,%C2,n %1,%0,%3%#";
6887 else
6888 return "addib,%C2,n %1,%0,%3";
6890 else
6891 return "addib,%C2 %1,%0,%3";
6893 case 8:
6894 /* Handle weird backwards branch with a fulled delay slot
6895 which is nullified. */
6896 if (dbr_sequence_length () != 0
6897 && ! forward_branch_p (insn)
6898 && nullify)
6899 return "addib,%N2,n %1,%0,.+12\n\tb %3";
6900 /* Handle short backwards branch with an unfilled delay slot.
6901 Using a addb;nop rather than addi;bl saves 1 cycle for both
6902 taken and untaken branches. */
6903 else if (dbr_sequence_length () == 0
6904 && ! forward_branch_p (insn)
6905 && INSN_ADDRESSES_SET_P ()
6906 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6907 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6908 return "addib,%C2 %1,%0,%3%#";
6910 /* Handle normal cases. */
6911 if (nullify)
6912 return "addi,%N2 %1,%0,%0\n\tb,n %3";
6913 else
6914 return "addi,%N2 %1,%0,%0\n\tb %3";
6916 default:
6917 /* The reversed conditional branch must branch over one additional
6918 instruction if the delay slot is filled and needs to be extracted
6919 by output_lbranch. If the delay slot is empty or this is a
6920 nullified forward branch, the instruction after the reversed
6921 condition branch must be nullified. */
6922 if (dbr_sequence_length () == 0
6923 || (nullify && forward_branch_p (insn)))
6925 nullify = 1;
6926 xdelay = 0;
6927 operands[4] = GEN_INT (length);
6929 else
6931 xdelay = 1;
6932 operands[4] = GEN_INT (length + 4);
6935 if (nullify)
6936 output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
6937 else
6938 output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
6940 return output_lbranch (operands[3], insn, xdelay);
6944 /* Deal with gross reload from FP register case. */
6945 else if (which_alternative == 1)
6947 /* Move loop counter from FP register to MEM then into a GR,
6948 increment the GR, store the GR into MEM, and finally reload
6949 the FP register from MEM from within the branch's delay slot. */
6950 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
6951 operands);
6952 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6953 if (length == 24)
6954 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
6955 else if (length == 28)
6956 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
6957 else
6959 operands[5] = GEN_INT (length - 16);
6960 output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
6961 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
6962 return output_lbranch (operands[3], insn, 0);
6965 /* Deal with gross reload from memory case. */
6966 else
6968 /* Reload loop counter from memory, the store back to memory
6969 happens in the branch's delay slot. */
6970 output_asm_insn ("ldw %0,%4", operands);
6971 if (length == 12)
6972 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
6973 else if (length == 16)
6974 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
6975 else
6977 operands[5] = GEN_INT (length - 4);
6978 output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
6979 return output_lbranch (operands[3], insn, 0);
6984 /* Return the output template for emitting a movb type insn.
6986 Note it may perform some output operations on its own before
6987 returning the final output string. */
6988 const char *
6989 output_movb (rtx *operands, rtx insn, int which_alternative,
6990 int reverse_comparison)
6992 int length = get_attr_length (insn);
6994 /* A conditional branch to the following instruction (e.g. the delay slot) is
6995 asking for a disaster. Be prepared! */
6997 if (branch_to_delay_slot_p (insn))
6999 if (which_alternative == 0)
7000 return "copy %1,%0";
7001 else if (which_alternative == 1)
7003 output_asm_insn ("stw %1,-16(%%r30)", operands);
7004 return "{fldws|fldw} -16(%%r30),%0";
7006 else if (which_alternative == 2)
7007 return "stw %1,%0";
7008 else
7009 return "mtsar %r1";
7012 /* Support the second variant. */
7013 if (reverse_comparison)
7014 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
7016 if (which_alternative == 0)
7018 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7019 int xdelay;
7021 /* If this is a long branch with its delay slot unfilled, set `nullify'
7022 as it can nullify the delay slot and save a nop. */
7023 if (length == 8 && dbr_sequence_length () == 0)
7024 nullify = 1;
7026 /* If this is a short forward conditional branch which did not get
7027 its delay slot filled, the delay slot can still be nullified. */
7028 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7029 nullify = forward_branch_p (insn);
7031 switch (length)
7033 case 4:
7034 if (nullify)
7036 if (branch_needs_nop_p (insn))
7037 return "movb,%C2,n %1,%0,%3%#";
7038 else
7039 return "movb,%C2,n %1,%0,%3";
7041 else
7042 return "movb,%C2 %1,%0,%3";
7044 case 8:
7045 /* Handle weird backwards branch with a filled delay slot
7046 which is nullified. */
7047 if (dbr_sequence_length () != 0
7048 && ! forward_branch_p (insn)
7049 && nullify)
7050 return "movb,%N2,n %1,%0,.+12\n\tb %3";
7052 /* Handle short backwards branch with an unfilled delay slot.
7053 Using a movb;nop rather than or;bl saves 1 cycle for both
7054 taken and untaken branches. */
7055 else if (dbr_sequence_length () == 0
7056 && ! forward_branch_p (insn)
7057 && INSN_ADDRESSES_SET_P ()
7058 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7059 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7060 return "movb,%C2 %1,%0,%3%#";
7061 /* Handle normal cases. */
7062 if (nullify)
7063 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
7064 else
7065 return "or,%N2 %1,%%r0,%0\n\tb %3";
7067 default:
7068 /* The reversed conditional branch must branch over one additional
7069 instruction if the delay slot is filled and needs to be extracted
7070 by output_lbranch. If the delay slot is empty or this is a
7071 nullified forward branch, the instruction after the reversed
7072 condition branch must be nullified. */
7073 if (dbr_sequence_length () == 0
7074 || (nullify && forward_branch_p (insn)))
7076 nullify = 1;
7077 xdelay = 0;
7078 operands[4] = GEN_INT (length);
7080 else
7082 xdelay = 1;
7083 operands[4] = GEN_INT (length + 4);
7086 if (nullify)
7087 output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
7088 else
7089 output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
7091 return output_lbranch (operands[3], insn, xdelay);
7094 /* Deal with gross reload for FP destination register case. */
7095 else if (which_alternative == 1)
7097 /* Move source register to MEM, perform the branch test, then
7098 finally load the FP register from MEM from within the branch's
7099 delay slot. */
7100 output_asm_insn ("stw %1,-16(%%r30)", operands);
7101 if (length == 12)
7102 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7103 else if (length == 16)
7104 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7105 else
7107 operands[4] = GEN_INT (length - 4);
7108 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
7109 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7110 return output_lbranch (operands[3], insn, 0);
7113 /* Deal with gross reload from memory case. */
7114 else if (which_alternative == 2)
7116 /* Reload loop counter from memory, the store back to memory
7117 happens in the branch's delay slot. */
7118 if (length == 8)
7119 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7120 else if (length == 12)
7121 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7122 else
7124 operands[4] = GEN_INT (length);
7125 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7126 operands);
7127 return output_lbranch (operands[3], insn, 0);
7130 /* Handle SAR as a destination. */
7131 else
7133 if (length == 8)
7134 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7135 else if (length == 12)
7136 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7137 else
7139 operands[4] = GEN_INT (length);
7140 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7141 operands);
7142 return output_lbranch (operands[3], insn, 0);
7147 /* Copy any FP arguments in INSN into integer registers. */
7148 static void
7149 copy_fp_args (rtx insn)
7151 rtx link;
7152 rtx xoperands[2];
7154 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7156 int arg_mode, regno;
7157 rtx use = XEXP (link, 0);
7159 if (! (GET_CODE (use) == USE
7160 && GET_CODE (XEXP (use, 0)) == REG
7161 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7162 continue;
7164 arg_mode = GET_MODE (XEXP (use, 0));
7165 regno = REGNO (XEXP (use, 0));
7167 /* Is it a floating point register? */
7168 if (regno >= 32 && regno <= 39)
7170 /* Copy the FP register into an integer register via memory. */
7171 if (arg_mode == SFmode)
7173 xoperands[0] = XEXP (use, 0);
7174 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
7175 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
7176 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7178 else
7180 xoperands[0] = XEXP (use, 0);
7181 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
7182 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
7183 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
7184 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7190 /* Compute length of the FP argument copy sequence for INSN. */
7191 static int
7192 length_fp_args (rtx insn)
7194 int length = 0;
7195 rtx link;
7197 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7199 int arg_mode, regno;
7200 rtx use = XEXP (link, 0);
7202 if (! (GET_CODE (use) == USE
7203 && GET_CODE (XEXP (use, 0)) == REG
7204 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7205 continue;
7207 arg_mode = GET_MODE (XEXP (use, 0));
7208 regno = REGNO (XEXP (use, 0));
7210 /* Is it a floating point register? */
7211 if (regno >= 32 && regno <= 39)
7213 if (arg_mode == SFmode)
7214 length += 8;
7215 else
7216 length += 12;
7220 return length;
7223 /* Return the attribute length for the millicode call instruction INSN.
7224 The length must match the code generated by output_millicode_call.
7225 We include the delay slot in the returned length as it is better to
7226 over estimate the length than to under estimate it. */
7229 attr_length_millicode_call (rtx insn)
7231 unsigned long distance = -1;
7232 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7234 if (INSN_ADDRESSES_SET_P ())
7236 distance = (total + insn_current_reference_address (insn));
7237 if (distance < total)
7238 distance = -1;
7241 if (TARGET_64BIT)
7243 if (!TARGET_LONG_CALLS && distance < 7600000)
7244 return 8;
7246 return 20;
7248 else if (TARGET_PORTABLE_RUNTIME)
7249 return 24;
7250 else
7252 if (!TARGET_LONG_CALLS && distance < 240000)
7253 return 8;
7255 if (TARGET_LONG_ABS_CALL && !flag_pic)
7256 return 12;
7258 return 24;
7262 /* INSN is a function call. It may have an unconditional jump
7263 in its delay slot.
7265 CALL_DEST is the routine we are calling. */
7267 const char *
7268 output_millicode_call (rtx insn, rtx call_dest)
7270 int attr_length = get_attr_length (insn);
7271 int seq_length = dbr_sequence_length ();
7272 int distance;
7273 rtx seq_insn;
7274 rtx xoperands[3];
7276 xoperands[0] = call_dest;
7277 xoperands[2] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
7279 /* Handle the common case where we are sure that the branch will
7280 reach the beginning of the $CODE$ subspace. The within reach
7281 form of the $$sh_func_adrs call has a length of 28. Because
7282 it has an attribute type of multi, it never has a nonzero
7283 sequence length. The length of the $$sh_func_adrs is the same
7284 as certain out of reach PIC calls to other routines. */
7285 if (!TARGET_LONG_CALLS
7286 && ((seq_length == 0
7287 && (attr_length == 12
7288 || (attr_length == 28 && get_attr_type (insn) == TYPE_MULTI)))
7289 || (seq_length != 0 && attr_length == 8)))
7291 output_asm_insn ("{bl|b,l} %0,%2", xoperands);
7293 else
7295 if (TARGET_64BIT)
7297 /* It might seem that one insn could be saved by accessing
7298 the millicode function using the linkage table. However,
7299 this doesn't work in shared libraries and other dynamically
7300 loaded objects. Using a pc-relative sequence also avoids
7301 problems related to the implicit use of the gp register. */
7302 output_asm_insn ("b,l .+8,%%r1", xoperands);
7304 if (TARGET_GAS)
7306 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
7307 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
7309 else
7311 xoperands[1] = gen_label_rtx ();
7312 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7313 targetm.asm_out.internal_label (asm_out_file, "L",
7314 CODE_LABEL_NUMBER (xoperands[1]));
7315 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7318 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7320 else if (TARGET_PORTABLE_RUNTIME)
7322 /* Pure portable runtime doesn't allow be/ble; we also don't
7323 have PIC support in the assembler/linker, so this sequence
7324 is needed. */
7326 /* Get the address of our target into %r1. */
7327 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7328 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7330 /* Get our return address into %r31. */
7331 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7332 output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
7334 /* Jump to our target address in %r1. */
7335 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7337 else if (!flag_pic)
7339 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7340 if (TARGET_PA_20)
7341 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
7342 else
7343 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7345 else
7347 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7348 output_asm_insn ("addi 16,%%r1,%%r31", xoperands);
7350 if (TARGET_SOM || !TARGET_GAS)
7352 /* The HP assembler can generate relocations for the
7353 difference of two symbols. GAS can do this for a
7354 millicode symbol but not an arbitrary external
7355 symbol when generating SOM output. */
7356 xoperands[1] = gen_label_rtx ();
7357 targetm.asm_out.internal_label (asm_out_file, "L",
7358 CODE_LABEL_NUMBER (xoperands[1]));
7359 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7360 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7362 else
7364 output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands);
7365 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1",
7366 xoperands);
7369 /* Jump to our target address in %r1. */
7370 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7374 if (seq_length == 0)
7375 output_asm_insn ("nop", xoperands);
7377 /* We are done if there isn't a jump in the delay slot. */
7378 if (seq_length == 0 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7379 return "";
7381 /* This call has an unconditional jump in its delay slot. */
7382 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7384 /* See if the return address can be adjusted. Use the containing
7385 sequence insn's address. */
7386 if (INSN_ADDRESSES_SET_P ())
7388 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7389 distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7390 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7392 if (VAL_14_BITS_P (distance))
7394 xoperands[1] = gen_label_rtx ();
7395 output_asm_insn ("ldo %0-%1(%2),%2", xoperands);
7396 targetm.asm_out.internal_label (asm_out_file, "L",
7397 CODE_LABEL_NUMBER (xoperands[1]));
7399 else
7400 /* ??? This branch may not reach its target. */
7401 output_asm_insn ("nop\n\tb,n %0", xoperands);
7403 else
7404 /* ??? This branch may not reach its target. */
7405 output_asm_insn ("nop\n\tb,n %0", xoperands);
7407 /* Delete the jump. */
7408 SET_INSN_DELETED (NEXT_INSN (insn));
7410 return "";
7413 /* Return the attribute length of the call instruction INSN. The SIBCALL
7414 flag indicates whether INSN is a regular call or a sibling call. The
7415 length returned must be longer than the code actually generated by
7416 output_call. Since branch shortening is done before delay branch
7417 sequencing, there is no way to determine whether or not the delay
7418 slot will be filled during branch shortening. Even when the delay
7419 slot is filled, we may have to add a nop if the delay slot contains
7420 a branch that can't reach its target. Thus, we always have to include
7421 the delay slot in the length estimate. This used to be done in
7422 pa_adjust_insn_length but we do it here now as some sequences always
7423 fill the delay slot and we can save four bytes in the estimate for
7424 these sequences. */
7427 attr_length_call (rtx insn, int sibcall)
7429 int local_call;
7430 rtx call, call_dest;
7431 tree call_decl;
7432 int length = 0;
7433 rtx pat = PATTERN (insn);
7434 unsigned long distance = -1;
7436 gcc_assert (GET_CODE (insn) == CALL_INSN);
7438 if (INSN_ADDRESSES_SET_P ())
7440 unsigned long total;
7442 total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7443 distance = (total + insn_current_reference_address (insn));
7444 if (distance < total)
7445 distance = -1;
7448 gcc_assert (GET_CODE (pat) == PARALLEL);
7450 /* Get the call rtx. */
7451 call = XVECEXP (pat, 0, 0);
7452 if (GET_CODE (call) == SET)
7453 call = SET_SRC (call);
7455 gcc_assert (GET_CODE (call) == CALL);
7457 /* Determine if this is a local call. */
7458 call_dest = XEXP (XEXP (call, 0), 0);
7459 call_decl = SYMBOL_REF_DECL (call_dest);
7460 local_call = call_decl && targetm.binds_local_p (call_decl);
7462 /* pc-relative branch. */
7463 if (!TARGET_LONG_CALLS
7464 && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7465 || distance < 240000))
7466 length += 8;
7468 /* 64-bit plabel sequence. */
7469 else if (TARGET_64BIT && !local_call)
7470 length += sibcall ? 28 : 24;
7472 /* non-pic long absolute branch sequence. */
7473 else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7474 length += 12;
7476 /* long pc-relative branch sequence. */
7477 else if (TARGET_LONG_PIC_SDIFF_CALL
7478 || (TARGET_GAS && !TARGET_SOM
7479 && (TARGET_LONG_PIC_PCREL_CALL || local_call)))
7481 length += 20;
7483 if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && flag_pic)
7484 length += 8;
7487 /* 32-bit plabel sequence. */
7488 else
7490 length += 32;
7492 if (TARGET_SOM)
7493 length += length_fp_args (insn);
7495 if (flag_pic)
7496 length += 4;
7498 if (!TARGET_PA_20)
7500 if (!sibcall)
7501 length += 8;
7503 if (!TARGET_NO_SPACE_REGS && flag_pic)
7504 length += 8;
7508 return length;
7511 /* INSN is a function call. It may have an unconditional jump
7512 in its delay slot.
7514 CALL_DEST is the routine we are calling. */
7516 const char *
7517 output_call (rtx insn, rtx call_dest, int sibcall)
7519 int delay_insn_deleted = 0;
7520 int delay_slot_filled = 0;
7521 int seq_length = dbr_sequence_length ();
7522 tree call_decl = SYMBOL_REF_DECL (call_dest);
7523 int local_call = call_decl && targetm.binds_local_p (call_decl);
7524 rtx xoperands[2];
7526 xoperands[0] = call_dest;
7528 /* Handle the common case where we're sure that the branch will reach
7529 the beginning of the "$CODE$" subspace. This is the beginning of
7530 the current function if we are in a named section. */
7531 if (!TARGET_LONG_CALLS && attr_length_call (insn, sibcall) == 8)
7533 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
7534 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7536 else
7538 if (TARGET_64BIT && !local_call)
7540 /* ??? As far as I can tell, the HP linker doesn't support the
7541 long pc-relative sequence described in the 64-bit runtime
7542 architecture. So, we use a slightly longer indirect call. */
7543 xoperands[0] = get_deferred_plabel (call_dest);
7544 xoperands[1] = gen_label_rtx ();
7546 /* If this isn't a sibcall, we put the load of %r27 into the
7547 delay slot. We can't do this in a sibcall as we don't
7548 have a second call-clobbered scratch register available. */
7549 if (seq_length != 0
7550 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7551 && !sibcall)
7553 final_scan_insn (NEXT_INSN (insn), asm_out_file,
7554 optimize, 0, NULL);
7556 /* Now delete the delay insn. */
7557 SET_INSN_DELETED (NEXT_INSN (insn));
7558 delay_insn_deleted = 1;
7561 output_asm_insn ("addil LT'%0,%%r27", xoperands);
7562 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
7563 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
7565 if (sibcall)
7567 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7568 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands);
7569 output_asm_insn ("bve (%%r1)", xoperands);
7571 else
7573 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
7574 output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
7575 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7576 delay_slot_filled = 1;
7579 else
7581 int indirect_call = 0;
7583 /* Emit a long call. There are several different sequences
7584 of increasing length and complexity. In most cases,
7585 they don't allow an instruction in the delay slot. */
7586 if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7587 && !TARGET_LONG_PIC_SDIFF_CALL
7588 && !(TARGET_GAS && !TARGET_SOM
7589 && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7590 && !TARGET_64BIT)
7591 indirect_call = 1;
7593 if (seq_length != 0
7594 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7595 && !sibcall
7596 && (!TARGET_PA_20
7597 || indirect_call
7598 || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)))
7600 /* A non-jump insn in the delay slot. By definition we can
7601 emit this insn before the call (and in fact before argument
7602 relocating. */
7603 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
7604 NULL);
7606 /* Now delete the delay insn. */
7607 SET_INSN_DELETED (NEXT_INSN (insn));
7608 delay_insn_deleted = 1;
7611 if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7613 /* This is the best sequence for making long calls in
7614 non-pic code. Unfortunately, GNU ld doesn't provide
7615 the stub needed for external calls, and GAS's support
7616 for this with the SOM linker is buggy. It is safe
7617 to use this for local calls. */
7618 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7619 if (sibcall)
7620 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
7621 else
7623 if (TARGET_PA_20)
7624 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
7625 xoperands);
7626 else
7627 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7629 output_asm_insn ("copy %%r31,%%r2", xoperands);
7630 delay_slot_filled = 1;
7633 else
7635 if (TARGET_LONG_PIC_SDIFF_CALL)
7637 /* The HP assembler and linker can handle relocations
7638 for the difference of two symbols. The HP assembler
7639 recognizes the sequence as a pc-relative call and
7640 the linker provides stubs when needed. */
7641 xoperands[1] = gen_label_rtx ();
7642 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7643 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7644 targetm.asm_out.internal_label (asm_out_file, "L",
7645 CODE_LABEL_NUMBER (xoperands[1]));
7646 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7648 else if (TARGET_GAS && !TARGET_SOM
7649 && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7651 /* GAS currently can't generate the relocations that
7652 are needed for the SOM linker under HP-UX using this
7653 sequence. The GNU linker doesn't generate the stubs
7654 that are needed for external calls on TARGET_ELF32
7655 with this sequence. For now, we have to use a
7656 longer plabel sequence when using GAS. */
7657 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7658 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1",
7659 xoperands);
7660 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1",
7661 xoperands);
7663 else
7665 /* Emit a long plabel-based call sequence. This is
7666 essentially an inline implementation of $$dyncall.
7667 We don't actually try to call $$dyncall as this is
7668 as difficult as calling the function itself. */
7669 xoperands[0] = get_deferred_plabel (call_dest);
7670 xoperands[1] = gen_label_rtx ();
7672 /* Since the call is indirect, FP arguments in registers
7673 need to be copied to the general registers. Then, the
7674 argument relocation stub will copy them back. */
7675 if (TARGET_SOM)
7676 copy_fp_args (insn);
7678 if (flag_pic)
7680 output_asm_insn ("addil LT'%0,%%r19", xoperands);
7681 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
7682 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands);
7684 else
7686 output_asm_insn ("addil LR'%0-$global$,%%r27",
7687 xoperands);
7688 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
7689 xoperands);
7692 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands);
7693 output_asm_insn ("depi 0,31,2,%%r1", xoperands);
7694 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands);
7695 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands);
7697 if (!sibcall && !TARGET_PA_20)
7699 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
7700 if (TARGET_NO_SPACE_REGS)
7701 output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
7702 else
7703 output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
7707 if (TARGET_PA_20)
7709 if (sibcall)
7710 output_asm_insn ("bve (%%r1)", xoperands);
7711 else
7713 if (indirect_call)
7715 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7716 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
7717 delay_slot_filled = 1;
7719 else
7720 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7723 else
7725 if (!TARGET_NO_SPACE_REGS && flag_pic)
7726 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
7727 xoperands);
7729 if (sibcall)
7731 if (TARGET_NO_SPACE_REGS || !flag_pic)
7732 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
7733 else
7734 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
7736 else
7738 if (TARGET_NO_SPACE_REGS || !flag_pic)
7739 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
7740 else
7741 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
7743 if (indirect_call)
7744 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
7745 else
7746 output_asm_insn ("copy %%r31,%%r2", xoperands);
7747 delay_slot_filled = 1;
7754 if (!delay_slot_filled && (seq_length == 0 || delay_insn_deleted))
7755 output_asm_insn ("nop", xoperands);
7757 /* We are done if there isn't a jump in the delay slot. */
7758 if (seq_length == 0
7759 || delay_insn_deleted
7760 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7761 return "";
7763 /* A sibcall should never have a branch in the delay slot. */
7764 gcc_assert (!sibcall);
7766 /* This call has an unconditional jump in its delay slot. */
7767 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7769 if (!delay_slot_filled && INSN_ADDRESSES_SET_P ())
7771 /* See if the return address can be adjusted. Use the containing
7772 sequence insn's address. This would break the regular call/return@
7773 relationship assumed by the table based eh unwinder, so only do that
7774 if the call is not possibly throwing. */
7775 rtx seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7776 int distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7777 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7779 if (VAL_14_BITS_P (distance)
7780 && !(can_throw_internal (insn) || can_throw_external (insn)))
7782 xoperands[1] = gen_label_rtx ();
7783 output_asm_insn ("ldo %0-%1(%%r2),%%r2", xoperands);
7784 targetm.asm_out.internal_label (asm_out_file, "L",
7785 CODE_LABEL_NUMBER (xoperands[1]));
7787 else
7788 output_asm_insn ("nop\n\tb,n %0", xoperands);
7790 else
7791 output_asm_insn ("b,n %0", xoperands);
7793 /* Delete the jump. */
7794 SET_INSN_DELETED (NEXT_INSN (insn));
7796 return "";
7799 /* Return the attribute length of the indirect call instruction INSN.
7800 The length must match the code generated by output_indirect call.
7801 The returned length includes the delay slot. Currently, the delay
7802 slot of an indirect call sequence is not exposed and it is used by
7803 the sequence itself. */
7806 attr_length_indirect_call (rtx insn)
7808 unsigned long distance = -1;
7809 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7811 if (INSN_ADDRESSES_SET_P ())
7813 distance = (total + insn_current_reference_address (insn));
7814 if (distance < total)
7815 distance = -1;
7818 if (TARGET_64BIT)
7819 return 12;
7821 if (TARGET_FAST_INDIRECT_CALLS
7822 || (!TARGET_PORTABLE_RUNTIME
7823 && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
7824 || distance < 240000)))
7825 return 8;
7827 if (flag_pic)
7828 return 24;
7830 if (TARGET_PORTABLE_RUNTIME)
7831 return 20;
7833 /* Out of reach, can use ble. */
7834 return 12;
7837 const char *
7838 output_indirect_call (rtx insn, rtx call_dest)
7840 rtx xoperands[1];
7842 if (TARGET_64BIT)
7844 xoperands[0] = call_dest;
7845 output_asm_insn ("ldd 16(%0),%%r2", xoperands);
7846 output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands);
7847 return "";
7850 /* First the special case for kernels, level 0 systems, etc. */
7851 if (TARGET_FAST_INDIRECT_CALLS)
7852 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
7854 /* Now the normal case -- we can reach $$dyncall directly or
7855 we're sure that we can get there via a long-branch stub.
7857 No need to check target flags as the length uniquely identifies
7858 the remaining cases. */
7859 if (attr_length_indirect_call (insn) == 8)
7861 /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
7862 $$dyncall. Since BLE uses %r31 as the link register, the 22-bit
7863 variant of the B,L instruction can't be used on the SOM target. */
7864 if (TARGET_PA_20 && !TARGET_SOM)
7865 return ".CALL\tARGW0=GR\n\tb,l $$dyncall,%%r2\n\tcopy %%r2,%%r31";
7866 else
7867 return ".CALL\tARGW0=GR\n\tbl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
7870 /* Long millicode call, but we are not generating PIC or portable runtime
7871 code. */
7872 if (attr_length_indirect_call (insn) == 12)
7873 return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
7875 /* Long millicode call for portable runtime. */
7876 if (attr_length_indirect_call (insn) == 20)
7877 return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)\n\tnop";
7879 /* We need a long PIC call to $$dyncall. */
7880 xoperands[0] = NULL_RTX;
7881 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7882 if (TARGET_SOM || !TARGET_GAS)
7884 xoperands[0] = gen_label_rtx ();
7885 output_asm_insn ("addil L'$$dyncall-%0,%%r1", xoperands);
7886 targetm.asm_out.internal_label (asm_out_file, "L",
7887 CODE_LABEL_NUMBER (xoperands[0]));
7888 output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands);
7890 else
7892 output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r1", xoperands);
7893 output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
7894 xoperands);
7896 output_asm_insn ("blr %%r0,%%r2", xoperands);
7897 output_asm_insn ("bv,n %%r0(%%r1)\n\tnop", xoperands);
7898 return "";
7901 /* Return the total length of the save and restore instructions needed for
7902 the data linkage table pointer (i.e., the PIC register) across the call
7903 instruction INSN. No-return calls do not require a save and restore.
7904 In addition, we may be able to avoid the save and restore for calls
7905 within the same translation unit. */
7908 attr_length_save_restore_dltp (rtx insn)
7910 if (find_reg_note (insn, REG_NORETURN, NULL_RTX))
7911 return 0;
7913 return 8;
7916 /* In HPUX 8.0's shared library scheme, special relocations are needed
7917 for function labels if they might be passed to a function
7918 in a shared library (because shared libraries don't live in code
7919 space), and special magic is needed to construct their address. */
7921 void
7922 hppa_encode_label (rtx sym)
7924 const char *str = XSTR (sym, 0);
7925 int len = strlen (str) + 1;
7926 char *newstr, *p;
7928 p = newstr = XALLOCAVEC (char, len + 1);
7929 *p++ = '@';
7930 strcpy (p, str);
7932 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
7935 static void
7936 pa_encode_section_info (tree decl, rtx rtl, int first)
7938 int old_referenced = 0;
7940 if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF)
7941 old_referenced
7942 = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED;
7944 default_encode_section_info (decl, rtl, first);
7946 if (first && TEXT_SPACE_P (decl))
7948 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
7949 if (TREE_CODE (decl) == FUNCTION_DECL)
7950 hppa_encode_label (XEXP (rtl, 0));
7952 else if (old_referenced)
7953 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced;
7956 /* This is sort of inverse to pa_encode_section_info. */
7958 static const char *
7959 pa_strip_name_encoding (const char *str)
7961 str += (*str == '@');
7962 str += (*str == '*');
7963 return str;
7967 function_label_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7969 return GET_CODE (op) == SYMBOL_REF && FUNCTION_NAME_P (XSTR (op, 0));
7972 /* Returns 1 if OP is a function label involved in a simple addition
7973 with a constant. Used to keep certain patterns from matching
7974 during instruction combination. */
7976 is_function_label_plus_const (rtx op)
7978 /* Strip off any CONST. */
7979 if (GET_CODE (op) == CONST)
7980 op = XEXP (op, 0);
7982 return (GET_CODE (op) == PLUS
7983 && function_label_operand (XEXP (op, 0), Pmode)
7984 && GET_CODE (XEXP (op, 1)) == CONST_INT);
7987 /* Output assembly code for a thunk to FUNCTION. */
7989 static void
7990 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
7991 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
7992 tree function)
7994 static unsigned int current_thunk_number;
7995 int val_14 = VAL_14_BITS_P (delta);
7996 unsigned int old_last_address = last_address, nbytes = 0;
7997 char label[16];
7998 rtx xoperands[4];
8000 xoperands[0] = XEXP (DECL_RTL (function), 0);
8001 xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
8002 xoperands[2] = GEN_INT (delta);
8004 ASM_OUTPUT_LABEL (file, XSTR (xoperands[1], 0));
8005 fprintf (file, "\t.PROC\n\t.CALLINFO FRAME=0,NO_CALLS\n\t.ENTRY\n");
8007 /* Output the thunk. We know that the function is in the same
8008 translation unit (i.e., the same space) as the thunk, and that
8009 thunks are output after their method. Thus, we don't need an
8010 external branch to reach the function. With SOM and GAS,
8011 functions and thunks are effectively in different sections.
8012 Thus, we can always use a IA-relative branch and the linker
8013 will add a long branch stub if necessary.
8015 However, we have to be careful when generating PIC code on the
8016 SOM port to ensure that the sequence does not transfer to an
8017 import stub for the target function as this could clobber the
8018 return value saved at SP-24. This would also apply to the
8019 32-bit linux port if the multi-space model is implemented. */
8020 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8021 && !(flag_pic && TREE_PUBLIC (function))
8022 && (TARGET_GAS || last_address < 262132))
8023 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8024 && ((targetm.have_named_sections
8025 && DECL_SECTION_NAME (thunk_fndecl) != NULL
8026 /* The GNU 64-bit linker has rather poor stub management.
8027 So, we use a long branch from thunks that aren't in
8028 the same section as the target function. */
8029 && ((!TARGET_64BIT
8030 && (DECL_SECTION_NAME (thunk_fndecl)
8031 != DECL_SECTION_NAME (function)))
8032 || ((DECL_SECTION_NAME (thunk_fndecl)
8033 == DECL_SECTION_NAME (function))
8034 && last_address < 262132)))
8035 || (targetm.have_named_sections
8036 && DECL_SECTION_NAME (thunk_fndecl) == NULL
8037 && DECL_SECTION_NAME (function) == NULL
8038 && last_address < 262132)
8039 || (!targetm.have_named_sections && last_address < 262132))))
8041 if (!val_14)
8042 output_asm_insn ("addil L'%2,%%r26", xoperands);
8044 output_asm_insn ("b %0", xoperands);
8046 if (val_14)
8048 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8049 nbytes += 8;
8051 else
8053 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8054 nbytes += 12;
8057 else if (TARGET_64BIT)
8059 /* We only have one call-clobbered scratch register, so we can't
8060 make use of the delay slot if delta doesn't fit in 14 bits. */
8061 if (!val_14)
8063 output_asm_insn ("addil L'%2,%%r26", xoperands);
8064 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8067 output_asm_insn ("b,l .+8,%%r1", xoperands);
8069 if (TARGET_GAS)
8071 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
8072 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
8074 else
8076 xoperands[3] = GEN_INT (val_14 ? 8 : 16);
8077 output_asm_insn ("addil L'%0-%1-%3,%%r1", xoperands);
8080 if (val_14)
8082 output_asm_insn ("bv %%r0(%%r1)", xoperands);
8083 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8084 nbytes += 20;
8086 else
8088 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8089 nbytes += 24;
8092 else if (TARGET_PORTABLE_RUNTIME)
8094 output_asm_insn ("ldil L'%0,%%r1", xoperands);
8095 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
8097 if (!val_14)
8098 output_asm_insn ("addil L'%2,%%r26", xoperands);
8100 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8102 if (val_14)
8104 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8105 nbytes += 16;
8107 else
8109 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8110 nbytes += 20;
8113 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8115 /* The function is accessible from outside this module. The only
8116 way to avoid an import stub between the thunk and function is to
8117 call the function directly with an indirect sequence similar to
8118 that used by $$dyncall. This is possible because $$dyncall acts
8119 as the import stub in an indirect call. */
8120 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8121 xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8122 output_asm_insn ("addil LT'%3,%%r19", xoperands);
8123 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8124 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8125 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8126 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8127 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8128 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8130 if (!val_14)
8132 output_asm_insn ("addil L'%2,%%r26", xoperands);
8133 nbytes += 4;
8136 if (TARGET_PA_20)
8138 output_asm_insn ("bve (%%r22)", xoperands);
8139 nbytes += 36;
8141 else if (TARGET_NO_SPACE_REGS)
8143 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
8144 nbytes += 36;
8146 else
8148 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8149 output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8150 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
8151 nbytes += 44;
8154 if (val_14)
8155 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8156 else
8157 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8159 else if (flag_pic)
8161 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
8163 if (TARGET_SOM || !TARGET_GAS)
8165 output_asm_insn ("addil L'%0-%1-8,%%r1", xoperands);
8166 output_asm_insn ("ldo R'%0-%1-8(%%r1),%%r22", xoperands);
8168 else
8170 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
8171 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r22", xoperands);
8174 if (!val_14)
8175 output_asm_insn ("addil L'%2,%%r26", xoperands);
8177 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8179 if (val_14)
8181 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8182 nbytes += 20;
8184 else
8186 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8187 nbytes += 24;
8190 else
8192 if (!val_14)
8193 output_asm_insn ("addil L'%2,%%r26", xoperands);
8195 output_asm_insn ("ldil L'%0,%%r22", xoperands);
8196 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8198 if (val_14)
8200 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8201 nbytes += 12;
8203 else
8205 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8206 nbytes += 16;
8210 fprintf (file, "\t.EXIT\n\t.PROCEND\n");
8212 if (TARGET_SOM && TARGET_GAS)
8214 /* We done with this subspace except possibly for some additional
8215 debug information. Forget that we are in this subspace to ensure
8216 that the next function is output in its own subspace. */
8217 in_section = NULL;
8218 cfun->machine->in_nsubspa = 2;
8221 if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8223 switch_to_section (data_section);
8224 output_asm_insn (".align 4", xoperands);
8225 ASM_OUTPUT_LABEL (file, label);
8226 output_asm_insn (".word P'%0", xoperands);
8229 current_thunk_number++;
8230 nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
8231 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
8232 last_address += nbytes;
8233 if (old_last_address > last_address)
8234 last_address = UINT_MAX;
8235 update_total_code_bytes (nbytes);
8238 /* Only direct calls to static functions are allowed to be sibling (tail)
8239 call optimized.
8241 This restriction is necessary because some linker generated stubs will
8242 store return pointers into rp' in some cases which might clobber a
8243 live value already in rp'.
8245 In a sibcall the current function and the target function share stack
8246 space. Thus if the path to the current function and the path to the
8247 target function save a value in rp', they save the value into the
8248 same stack slot, which has undesirable consequences.
8250 Because of the deferred binding nature of shared libraries any function
8251 with external scope could be in a different load module and thus require
8252 rp' to be saved when calling that function. So sibcall optimizations
8253 can only be safe for static function.
8255 Note that GCC never needs return value relocations, so we don't have to
8256 worry about static calls with return value relocations (which require
8257 saving rp').
8259 It is safe to perform a sibcall optimization when the target function
8260 will never return. */
8261 static bool
8262 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8264 if (TARGET_PORTABLE_RUNTIME)
8265 return false;
8267 /* Sibcalls are ok for TARGET_ELF32 as along as the linker is used in
8268 single subspace mode and the call is not indirect. As far as I know,
8269 there is no operating system support for the multiple subspace mode.
8270 It might be possible to support indirect calls if we didn't use
8271 $$dyncall (see the indirect sequence generated in output_call). */
8272 if (TARGET_ELF32)
8273 return (decl != NULL_TREE);
8275 /* Sibcalls are not ok because the arg pointer register is not a fixed
8276 register. This prevents the sibcall optimization from occurring. In
8277 addition, there are problems with stub placement using GNU ld. This
8278 is because a normal sibcall branch uses a 17-bit relocation while
8279 a regular call branch uses a 22-bit relocation. As a result, more
8280 care needs to be taken in the placement of long-branch stubs. */
8281 if (TARGET_64BIT)
8282 return false;
8284 /* Sibcalls are only ok within a translation unit. */
8285 return (decl && !TREE_PUBLIC (decl));
8288 /* ??? Addition is not commutative on the PA due to the weird implicit
8289 space register selection rules for memory addresses. Therefore, we
8290 don't consider a + b == b + a, as this might be inside a MEM. */
8291 static bool
8292 pa_commutative_p (const_rtx x, int outer_code)
8294 return (COMMUTATIVE_P (x)
8295 && (TARGET_NO_SPACE_REGS
8296 || (outer_code != UNKNOWN && outer_code != MEM)
8297 || GET_CODE (x) != PLUS));
8300 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8301 use in fmpyadd instructions. */
8303 fmpyaddoperands (rtx *operands)
8305 enum machine_mode mode = GET_MODE (operands[0]);
8307 /* Must be a floating point mode. */
8308 if (mode != SFmode && mode != DFmode)
8309 return 0;
8311 /* All modes must be the same. */
8312 if (! (mode == GET_MODE (operands[1])
8313 && mode == GET_MODE (operands[2])
8314 && mode == GET_MODE (operands[3])
8315 && mode == GET_MODE (operands[4])
8316 && mode == GET_MODE (operands[5])))
8317 return 0;
8319 /* All operands must be registers. */
8320 if (! (GET_CODE (operands[1]) == REG
8321 && GET_CODE (operands[2]) == REG
8322 && GET_CODE (operands[3]) == REG
8323 && GET_CODE (operands[4]) == REG
8324 && GET_CODE (operands[5]) == REG))
8325 return 0;
8327 /* Only 2 real operands to the addition. One of the input operands must
8328 be the same as the output operand. */
8329 if (! rtx_equal_p (operands[3], operands[4])
8330 && ! rtx_equal_p (operands[3], operands[5]))
8331 return 0;
8333 /* Inout operand of add cannot conflict with any operands from multiply. */
8334 if (rtx_equal_p (operands[3], operands[0])
8335 || rtx_equal_p (operands[3], operands[1])
8336 || rtx_equal_p (operands[3], operands[2]))
8337 return 0;
8339 /* multiply cannot feed into addition operands. */
8340 if (rtx_equal_p (operands[4], operands[0])
8341 || rtx_equal_p (operands[5], operands[0]))
8342 return 0;
8344 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8345 if (mode == SFmode
8346 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8347 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8348 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8349 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8350 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8351 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8352 return 0;
8354 /* Passed. Operands are suitable for fmpyadd. */
8355 return 1;
8358 #if !defined(USE_COLLECT2)
8359 static void
8360 pa_asm_out_constructor (rtx symbol, int priority)
8362 if (!function_label_operand (symbol, VOIDmode))
8363 hppa_encode_label (symbol);
8365 #ifdef CTORS_SECTION_ASM_OP
8366 default_ctor_section_asm_out_constructor (symbol, priority);
8367 #else
8368 # ifdef TARGET_ASM_NAMED_SECTION
8369 default_named_section_asm_out_constructor (symbol, priority);
8370 # else
8371 default_stabs_asm_out_constructor (symbol, priority);
8372 # endif
8373 #endif
8376 static void
8377 pa_asm_out_destructor (rtx symbol, int priority)
8379 if (!function_label_operand (symbol, VOIDmode))
8380 hppa_encode_label (symbol);
8382 #ifdef DTORS_SECTION_ASM_OP
8383 default_dtor_section_asm_out_destructor (symbol, priority);
8384 #else
8385 # ifdef TARGET_ASM_NAMED_SECTION
8386 default_named_section_asm_out_destructor (symbol, priority);
8387 # else
8388 default_stabs_asm_out_destructor (symbol, priority);
8389 # endif
8390 #endif
8392 #endif
8394 /* This function places uninitialized global data in the bss section.
8395 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
8396 function on the SOM port to prevent uninitialized global data from
8397 being placed in the data section. */
8399 void
8400 pa_asm_output_aligned_bss (FILE *stream,
8401 const char *name,
8402 unsigned HOST_WIDE_INT size,
8403 unsigned int align)
8405 switch_to_section (bss_section);
8406 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8408 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
8409 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
8410 #endif
8412 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
8413 ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
8414 #endif
8416 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8417 ASM_OUTPUT_LABEL (stream, name);
8418 fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8421 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
8422 that doesn't allow the alignment of global common storage to be directly
8423 specified. The SOM linker aligns common storage based on the rounded
8424 value of the NUM_BYTES parameter in the .comm directive. It's not
8425 possible to use the .align directive as it doesn't affect the alignment
8426 of the label associated with a .comm directive. */
8428 void
8429 pa_asm_output_aligned_common (FILE *stream,
8430 const char *name,
8431 unsigned HOST_WIDE_INT size,
8432 unsigned int align)
8434 unsigned int max_common_align;
8436 max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
8437 if (align > max_common_align)
8439 warning (0, "alignment (%u) for %s exceeds maximum alignment "
8440 "for global common data. Using %u",
8441 align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT);
8442 align = max_common_align;
8445 switch_to_section (bss_section);
8447 assemble_name (stream, name);
8448 fprintf (stream, "\t.comm "HOST_WIDE_INT_PRINT_UNSIGNED"\n",
8449 MAX (size, align / BITS_PER_UNIT));
8452 /* We can't use .comm for local common storage as the SOM linker effectively
8453 treats the symbol as universal and uses the same storage for local symbols
8454 with the same name in different object files. The .block directive
8455 reserves an uninitialized block of storage. However, it's not common
8456 storage. Fortunately, GCC never requests common storage with the same
8457 name in any given translation unit. */
8459 void
8460 pa_asm_output_aligned_local (FILE *stream,
8461 const char *name,
8462 unsigned HOST_WIDE_INT size,
8463 unsigned int align)
8465 switch_to_section (bss_section);
8466 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8468 #ifdef LOCAL_ASM_OP
8469 fprintf (stream, "%s", LOCAL_ASM_OP);
8470 assemble_name (stream, name);
8471 fprintf (stream, "\n");
8472 #endif
8474 ASM_OUTPUT_LABEL (stream, name);
8475 fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8478 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8479 use in fmpysub instructions. */
8481 fmpysuboperands (rtx *operands)
8483 enum machine_mode mode = GET_MODE (operands[0]);
8485 /* Must be a floating point mode. */
8486 if (mode != SFmode && mode != DFmode)
8487 return 0;
8489 /* All modes must be the same. */
8490 if (! (mode == GET_MODE (operands[1])
8491 && mode == GET_MODE (operands[2])
8492 && mode == GET_MODE (operands[3])
8493 && mode == GET_MODE (operands[4])
8494 && mode == GET_MODE (operands[5])))
8495 return 0;
8497 /* All operands must be registers. */
8498 if (! (GET_CODE (operands[1]) == REG
8499 && GET_CODE (operands[2]) == REG
8500 && GET_CODE (operands[3]) == REG
8501 && GET_CODE (operands[4]) == REG
8502 && GET_CODE (operands[5]) == REG))
8503 return 0;
8505 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
8506 operation, so operands[4] must be the same as operand[3]. */
8507 if (! rtx_equal_p (operands[3], operands[4]))
8508 return 0;
8510 /* multiply cannot feed into subtraction. */
8511 if (rtx_equal_p (operands[5], operands[0]))
8512 return 0;
8514 /* Inout operand of sub cannot conflict with any operands from multiply. */
8515 if (rtx_equal_p (operands[3], operands[0])
8516 || rtx_equal_p (operands[3], operands[1])
8517 || rtx_equal_p (operands[3], operands[2]))
8518 return 0;
8520 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8521 if (mode == SFmode
8522 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8523 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8524 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8525 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8526 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8527 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8528 return 0;
8530 /* Passed. Operands are suitable for fmpysub. */
8531 return 1;
8534 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
8535 constants for shadd instructions. */
8537 shadd_constant_p (int val)
8539 if (val == 2 || val == 4 || val == 8)
8540 return 1;
8541 else
8542 return 0;
8545 /* Return 1 if OP is valid as a base or index register in a
8546 REG+REG address. */
8549 borx_reg_operand (rtx op, enum machine_mode mode)
8551 if (GET_CODE (op) != REG)
8552 return 0;
8554 /* We must reject virtual registers as the only expressions that
8555 can be instantiated are REG and REG+CONST. */
8556 if (op == virtual_incoming_args_rtx
8557 || op == virtual_stack_vars_rtx
8558 || op == virtual_stack_dynamic_rtx
8559 || op == virtual_outgoing_args_rtx
8560 || op == virtual_cfa_rtx)
8561 return 0;
8563 /* While it's always safe to index off the frame pointer, it's not
8564 profitable to do so when the frame pointer is being eliminated. */
8565 if (!reload_completed
8566 && flag_omit_frame_pointer
8567 && !cfun->calls_alloca
8568 && op == frame_pointer_rtx)
8569 return 0;
8571 return register_operand (op, mode);
8574 /* Return 1 if this operand is anything other than a hard register. */
8577 non_hard_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8579 return ! (GET_CODE (op) == REG && REGNO (op) < FIRST_PSEUDO_REGISTER);
8582 /* Return TRUE if INSN branches forward. */
8584 static bool
8585 forward_branch_p (rtx insn)
8587 rtx lab = JUMP_LABEL (insn);
8589 /* The INSN must have a jump label. */
8590 gcc_assert (lab != NULL_RTX);
8592 if (INSN_ADDRESSES_SET_P ())
8593 return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn));
8595 while (insn)
8597 if (insn == lab)
8598 return true;
8599 else
8600 insn = NEXT_INSN (insn);
8603 return false;
8606 /* Return 1 if OP is an equality comparison, else return 0. */
8608 eq_neq_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8610 return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
8613 /* Return 1 if INSN is in the delay slot of a call instruction. */
8615 jump_in_call_delay (rtx insn)
8618 if (GET_CODE (insn) != JUMP_INSN)
8619 return 0;
8621 if (PREV_INSN (insn)
8622 && PREV_INSN (PREV_INSN (insn))
8623 && GET_CODE (next_real_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
8625 rtx test_insn = next_real_insn (PREV_INSN (PREV_INSN (insn)));
8627 return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
8628 && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
8631 else
8632 return 0;
8635 /* Output an unconditional move and branch insn. */
8637 const char *
8638 output_parallel_movb (rtx *operands, rtx insn)
8640 int length = get_attr_length (insn);
8642 /* These are the cases in which we win. */
8643 if (length == 4)
8644 return "mov%I1b,tr %1,%0,%2";
8646 /* None of the following cases win, but they don't lose either. */
8647 if (length == 8)
8649 if (dbr_sequence_length () == 0)
8651 /* Nothing in the delay slot, fake it by putting the combined
8652 insn (the copy or add) in the delay slot of a bl. */
8653 if (GET_CODE (operands[1]) == CONST_INT)
8654 return "b %2\n\tldi %1,%0";
8655 else
8656 return "b %2\n\tcopy %1,%0";
8658 else
8660 /* Something in the delay slot, but we've got a long branch. */
8661 if (GET_CODE (operands[1]) == CONST_INT)
8662 return "ldi %1,%0\n\tb %2";
8663 else
8664 return "copy %1,%0\n\tb %2";
8668 if (GET_CODE (operands[1]) == CONST_INT)
8669 output_asm_insn ("ldi %1,%0", operands);
8670 else
8671 output_asm_insn ("copy %1,%0", operands);
8672 return output_lbranch (operands[2], insn, 1);
8675 /* Output an unconditional add and branch insn. */
8677 const char *
8678 output_parallel_addb (rtx *operands, rtx insn)
8680 int length = get_attr_length (insn);
8682 /* To make life easy we want operand0 to be the shared input/output
8683 operand and operand1 to be the readonly operand. */
8684 if (operands[0] == operands[1])
8685 operands[1] = operands[2];
8687 /* These are the cases in which we win. */
8688 if (length == 4)
8689 return "add%I1b,tr %1,%0,%3";
8691 /* None of the following cases win, but they don't lose either. */
8692 if (length == 8)
8694 if (dbr_sequence_length () == 0)
8695 /* Nothing in the delay slot, fake it by putting the combined
8696 insn (the copy or add) in the delay slot of a bl. */
8697 return "b %3\n\tadd%I1 %1,%0,%0";
8698 else
8699 /* Something in the delay slot, but we've got a long branch. */
8700 return "add%I1 %1,%0,%0\n\tb %3";
8703 output_asm_insn ("add%I1 %1,%0,%0", operands);
8704 return output_lbranch (operands[3], insn, 1);
8707 /* Return nonzero if INSN (a jump insn) immediately follows a call
8708 to a named function. This is used to avoid filling the delay slot
8709 of the jump since it can usually be eliminated by modifying RP in
8710 the delay slot of the call. */
8713 following_call (rtx insn)
8715 if (! TARGET_JUMP_IN_DELAY)
8716 return 0;
8718 /* Find the previous real insn, skipping NOTEs. */
8719 insn = PREV_INSN (insn);
8720 while (insn && GET_CODE (insn) == NOTE)
8721 insn = PREV_INSN (insn);
8723 /* Check for CALL_INSNs and millicode calls. */
8724 if (insn
8725 && ((GET_CODE (insn) == CALL_INSN
8726 && get_attr_type (insn) != TYPE_DYNCALL)
8727 || (GET_CODE (insn) == INSN
8728 && GET_CODE (PATTERN (insn)) != SEQUENCE
8729 && GET_CODE (PATTERN (insn)) != USE
8730 && GET_CODE (PATTERN (insn)) != CLOBBER
8731 && get_attr_type (insn) == TYPE_MILLI)))
8732 return 1;
8734 return 0;
8737 /* We use this hook to perform a PA specific optimization which is difficult
8738 to do in earlier passes.
8740 We want the delay slots of branches within jump tables to be filled.
8741 None of the compiler passes at the moment even has the notion that a
8742 PA jump table doesn't contain addresses, but instead contains actual
8743 instructions!
8745 Because we actually jump into the table, the addresses of each entry
8746 must stay constant in relation to the beginning of the table (which
8747 itself must stay constant relative to the instruction to jump into
8748 it). I don't believe we can guarantee earlier passes of the compiler
8749 will adhere to those rules.
8751 So, late in the compilation process we find all the jump tables, and
8752 expand them into real code -- e.g. each entry in the jump table vector
8753 will get an appropriate label followed by a jump to the final target.
8755 Reorg and the final jump pass can then optimize these branches and
8756 fill their delay slots. We end up with smaller, more efficient code.
8758 The jump instructions within the table are special; we must be able
8759 to identify them during assembly output (if the jumps don't get filled
8760 we need to emit a nop rather than nullifying the delay slot)). We
8761 identify jumps in switch tables by using insns with the attribute
8762 type TYPE_BTABLE_BRANCH.
8764 We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB
8765 insns. This serves two purposes, first it prevents jump.c from
8766 noticing that the last N entries in the table jump to the instruction
8767 immediately after the table and deleting the jumps. Second, those
8768 insns mark where we should emit .begin_brtab and .end_brtab directives
8769 when using GAS (allows for better link time optimizations). */
8771 static void
8772 pa_reorg (void)
8774 rtx insn;
8776 remove_useless_addtr_insns (1);
8778 if (pa_cpu < PROCESSOR_8000)
8779 pa_combine_instructions ();
8782 /* This is fairly cheap, so always run it if optimizing. */
8783 if (optimize > 0 && !TARGET_BIG_SWITCH)
8785 /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns. */
8786 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8788 rtx pattern, tmp, location, label;
8789 unsigned int length, i;
8791 /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode. */
8792 if (GET_CODE (insn) != JUMP_INSN
8793 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8794 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8795 continue;
8797 /* Emit marker for the beginning of the branch table. */
8798 emit_insn_before (gen_begin_brtab (), insn);
8800 pattern = PATTERN (insn);
8801 location = PREV_INSN (insn);
8802 length = XVECLEN (pattern, GET_CODE (pattern) == ADDR_DIFF_VEC);
8804 for (i = 0; i < length; i++)
8806 /* Emit a label before each jump to keep jump.c from
8807 removing this code. */
8808 tmp = gen_label_rtx ();
8809 LABEL_NUSES (tmp) = 1;
8810 emit_label_after (tmp, location);
8811 location = NEXT_INSN (location);
8813 if (GET_CODE (pattern) == ADDR_VEC)
8814 label = XEXP (XVECEXP (pattern, 0, i), 0);
8815 else
8816 label = XEXP (XVECEXP (pattern, 1, i), 0);
8818 tmp = gen_short_jump (label);
8820 /* Emit the jump itself. */
8821 tmp = emit_jump_insn_after (tmp, location);
8822 JUMP_LABEL (tmp) = label;
8823 LABEL_NUSES (label)++;
8824 location = NEXT_INSN (location);
8826 /* Emit a BARRIER after the jump. */
8827 emit_barrier_after (location);
8828 location = NEXT_INSN (location);
8831 /* Emit marker for the end of the branch table. */
8832 emit_insn_before (gen_end_brtab (), location);
8833 location = NEXT_INSN (location);
8834 emit_barrier_after (location);
8836 /* Delete the ADDR_VEC or ADDR_DIFF_VEC. */
8837 delete_insn (insn);
8840 else
8842 /* Still need brtab marker insns. FIXME: the presence of these
8843 markers disables output of the branch table to readonly memory,
8844 and any alignment directives that might be needed. Possibly,
8845 the begin_brtab insn should be output before the label for the
8846 table. This doesn't matter at the moment since the tables are
8847 always output in the text section. */
8848 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8850 /* Find an ADDR_VEC insn. */
8851 if (GET_CODE (insn) != JUMP_INSN
8852 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8853 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8854 continue;
8856 /* Now generate markers for the beginning and end of the
8857 branch table. */
8858 emit_insn_before (gen_begin_brtab (), insn);
8859 emit_insn_after (gen_end_brtab (), insn);
8864 /* The PA has a number of odd instructions which can perform multiple
8865 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
8866 it may be profitable to combine two instructions into one instruction
8867 with two outputs. It's not profitable PA2.0 machines because the
8868 two outputs would take two slots in the reorder buffers.
8870 This routine finds instructions which can be combined and combines
8871 them. We only support some of the potential combinations, and we
8872 only try common ways to find suitable instructions.
8874 * addb can add two registers or a register and a small integer
8875 and jump to a nearby (+-8k) location. Normally the jump to the
8876 nearby location is conditional on the result of the add, but by
8877 using the "true" condition we can make the jump unconditional.
8878 Thus addb can perform two independent operations in one insn.
8880 * movb is similar to addb in that it can perform a reg->reg
8881 or small immediate->reg copy and jump to a nearby (+-8k location).
8883 * fmpyadd and fmpysub can perform a FP multiply and either an
8884 FP add or FP sub if the operands of the multiply and add/sub are
8885 independent (there are other minor restrictions). Note both
8886 the fmpy and fadd/fsub can in theory move to better spots according
8887 to data dependencies, but for now we require the fmpy stay at a
8888 fixed location.
8890 * Many of the memory operations can perform pre & post updates
8891 of index registers. GCC's pre/post increment/decrement addressing
8892 is far too simple to take advantage of all the possibilities. This
8893 pass may not be suitable since those insns may not be independent.
8895 * comclr can compare two ints or an int and a register, nullify
8896 the following instruction and zero some other register. This
8897 is more difficult to use as it's harder to find an insn which
8898 will generate a comclr than finding something like an unconditional
8899 branch. (conditional moves & long branches create comclr insns).
8901 * Most arithmetic operations can conditionally skip the next
8902 instruction. They can be viewed as "perform this operation
8903 and conditionally jump to this nearby location" (where nearby
8904 is an insns away). These are difficult to use due to the
8905 branch length restrictions. */
8907 static void
8908 pa_combine_instructions (void)
8910 rtx anchor, new_rtx;
8912 /* This can get expensive since the basic algorithm is on the
8913 order of O(n^2) (or worse). Only do it for -O2 or higher
8914 levels of optimization. */
8915 if (optimize < 2)
8916 return;
8918 /* Walk down the list of insns looking for "anchor" insns which
8919 may be combined with "floating" insns. As the name implies,
8920 "anchor" instructions don't move, while "floating" insns may
8921 move around. */
8922 new_rtx = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
8923 new_rtx = make_insn_raw (new_rtx);
8925 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
8927 enum attr_pa_combine_type anchor_attr;
8928 enum attr_pa_combine_type floater_attr;
8930 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
8931 Also ignore any special USE insns. */
8932 if ((GET_CODE (anchor) != INSN
8933 && GET_CODE (anchor) != JUMP_INSN
8934 && GET_CODE (anchor) != CALL_INSN)
8935 || GET_CODE (PATTERN (anchor)) == USE
8936 || GET_CODE (PATTERN (anchor)) == CLOBBER
8937 || GET_CODE (PATTERN (anchor)) == ADDR_VEC
8938 || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC)
8939 continue;
8941 anchor_attr = get_attr_pa_combine_type (anchor);
8942 /* See if anchor is an insn suitable for combination. */
8943 if (anchor_attr == PA_COMBINE_TYPE_FMPY
8944 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
8945 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
8946 && ! forward_branch_p (anchor)))
8948 rtx floater;
8950 for (floater = PREV_INSN (anchor);
8951 floater;
8952 floater = PREV_INSN (floater))
8954 if (GET_CODE (floater) == NOTE
8955 || (GET_CODE (floater) == INSN
8956 && (GET_CODE (PATTERN (floater)) == USE
8957 || GET_CODE (PATTERN (floater)) == CLOBBER)))
8958 continue;
8960 /* Anything except a regular INSN will stop our search. */
8961 if (GET_CODE (floater) != INSN
8962 || GET_CODE (PATTERN (floater)) == ADDR_VEC
8963 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
8965 floater = NULL_RTX;
8966 break;
8969 /* See if FLOATER is suitable for combination with the
8970 anchor. */
8971 floater_attr = get_attr_pa_combine_type (floater);
8972 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
8973 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
8974 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8975 && floater_attr == PA_COMBINE_TYPE_FMPY))
8977 /* If ANCHOR and FLOATER can be combined, then we're
8978 done with this pass. */
8979 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
8980 SET_DEST (PATTERN (floater)),
8981 XEXP (SET_SRC (PATTERN (floater)), 0),
8982 XEXP (SET_SRC (PATTERN (floater)), 1)))
8983 break;
8986 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
8987 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
8989 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
8991 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
8992 SET_DEST (PATTERN (floater)),
8993 XEXP (SET_SRC (PATTERN (floater)), 0),
8994 XEXP (SET_SRC (PATTERN (floater)), 1)))
8995 break;
8997 else
8999 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9000 SET_DEST (PATTERN (floater)),
9001 SET_SRC (PATTERN (floater)),
9002 SET_SRC (PATTERN (floater))))
9003 break;
9008 /* If we didn't find anything on the backwards scan try forwards. */
9009 if (!floater
9010 && (anchor_attr == PA_COMBINE_TYPE_FMPY
9011 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
9013 for (floater = anchor; floater; floater = NEXT_INSN (floater))
9015 if (GET_CODE (floater) == NOTE
9016 || (GET_CODE (floater) == INSN
9017 && (GET_CODE (PATTERN (floater)) == USE
9018 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9020 continue;
9022 /* Anything except a regular INSN will stop our search. */
9023 if (GET_CODE (floater) != INSN
9024 || GET_CODE (PATTERN (floater)) == ADDR_VEC
9025 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
9027 floater = NULL_RTX;
9028 break;
9031 /* See if FLOATER is suitable for combination with the
9032 anchor. */
9033 floater_attr = get_attr_pa_combine_type (floater);
9034 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9035 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9036 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9037 && floater_attr == PA_COMBINE_TYPE_FMPY))
9039 /* If ANCHOR and FLOATER can be combined, then we're
9040 done with this pass. */
9041 if (pa_can_combine_p (new_rtx, anchor, floater, 1,
9042 SET_DEST (PATTERN (floater)),
9043 XEXP (SET_SRC (PATTERN (floater)),
9045 XEXP (SET_SRC (PATTERN (floater)),
9046 1)))
9047 break;
9052 /* FLOATER will be nonzero if we found a suitable floating
9053 insn for combination with ANCHOR. */
9054 if (floater
9055 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9056 || anchor_attr == PA_COMBINE_TYPE_FMPY))
9058 /* Emit the new instruction and delete the old anchor. */
9059 emit_insn_before (gen_rtx_PARALLEL
9060 (VOIDmode,
9061 gen_rtvec (2, PATTERN (anchor),
9062 PATTERN (floater))),
9063 anchor);
9065 SET_INSN_DELETED (anchor);
9067 /* Emit a special USE insn for FLOATER, then delete
9068 the floating insn. */
9069 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
9070 delete_insn (floater);
9072 continue;
9074 else if (floater
9075 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
9077 rtx temp;
9078 /* Emit the new_jump instruction and delete the old anchor. */
9079 temp
9080 = emit_jump_insn_before (gen_rtx_PARALLEL
9081 (VOIDmode,
9082 gen_rtvec (2, PATTERN (anchor),
9083 PATTERN (floater))),
9084 anchor);
9086 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
9087 SET_INSN_DELETED (anchor);
9089 /* Emit a special USE insn for FLOATER, then delete
9090 the floating insn. */
9091 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
9092 delete_insn (floater);
9093 continue;
9099 static int
9100 pa_can_combine_p (rtx new_rtx, rtx anchor, rtx floater, int reversed, rtx dest,
9101 rtx src1, rtx src2)
9103 int insn_code_number;
9104 rtx start, end;
9106 /* Create a PARALLEL with the patterns of ANCHOR and
9107 FLOATER, try to recognize it, then test constraints
9108 for the resulting pattern.
9110 If the pattern doesn't match or the constraints
9111 aren't met keep searching for a suitable floater
9112 insn. */
9113 XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor);
9114 XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater);
9115 INSN_CODE (new_rtx) = -1;
9116 insn_code_number = recog_memoized (new_rtx);
9117 if (insn_code_number < 0
9118 || (extract_insn (new_rtx), ! constrain_operands (1)))
9119 return 0;
9121 if (reversed)
9123 start = anchor;
9124 end = floater;
9126 else
9128 start = floater;
9129 end = anchor;
9132 /* There's up to three operands to consider. One
9133 output and two inputs.
9135 The output must not be used between FLOATER & ANCHOR
9136 exclusive. The inputs must not be set between
9137 FLOATER and ANCHOR exclusive. */
9139 if (reg_used_between_p (dest, start, end))
9140 return 0;
9142 if (reg_set_between_p (src1, start, end))
9143 return 0;
9145 if (reg_set_between_p (src2, start, end))
9146 return 0;
9148 /* If we get here, then everything is good. */
9149 return 1;
9152 /* Return nonzero if references for INSN are delayed.
9154 Millicode insns are actually function calls with some special
9155 constraints on arguments and register usage.
9157 Millicode calls always expect their arguments in the integer argument
9158 registers, and always return their result in %r29 (ret1). They
9159 are expected to clobber their arguments, %r1, %r29, and the return
9160 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9162 This function tells reorg that the references to arguments and
9163 millicode calls do not appear to happen until after the millicode call.
9164 This allows reorg to put insns which set the argument registers into the
9165 delay slot of the millicode call -- thus they act more like traditional
9166 CALL_INSNs.
9168 Note we cannot consider side effects of the insn to be delayed because
9169 the branch and link insn will clobber the return pointer. If we happened
9170 to use the return pointer in the delay slot of the call, then we lose.
9172 get_attr_type will try to recognize the given insn, so make sure to
9173 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9174 in particular. */
9176 insn_refs_are_delayed (rtx insn)
9178 return ((GET_CODE (insn) == INSN
9179 && GET_CODE (PATTERN (insn)) != SEQUENCE
9180 && GET_CODE (PATTERN (insn)) != USE
9181 && GET_CODE (PATTERN (insn)) != CLOBBER
9182 && get_attr_type (insn) == TYPE_MILLI));
9185 /* On the HP-PA the value is found in register(s) 28(-29), unless
9186 the mode is SF or DF. Then the value is returned in fr4 (32).
9188 This must perform the same promotions as PROMOTE_MODE, else
9189 TARGET_PROMOTE_FUNCTION_RETURN will not work correctly.
9191 Small structures must be returned in a PARALLEL on PA64 in order
9192 to match the HP Compiler ABI. */
9195 function_value (const_tree valtype, const_tree func ATTRIBUTE_UNUSED)
9197 enum machine_mode valmode;
9199 if (AGGREGATE_TYPE_P (valtype)
9200 || TREE_CODE (valtype) == COMPLEX_TYPE
9201 || TREE_CODE (valtype) == VECTOR_TYPE)
9203 if (TARGET_64BIT)
9205 /* Aggregates with a size less than or equal to 128 bits are
9206 returned in GR 28(-29). They are left justified. The pad
9207 bits are undefined. Larger aggregates are returned in
9208 memory. */
9209 rtx loc[2];
9210 int i, offset = 0;
9211 int ub = int_size_in_bytes (valtype) <= UNITS_PER_WORD ? 1 : 2;
9213 for (i = 0; i < ub; i++)
9215 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9216 gen_rtx_REG (DImode, 28 + i),
9217 GEN_INT (offset));
9218 offset += 8;
9221 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
9223 else if (int_size_in_bytes (valtype) > UNITS_PER_WORD)
9225 /* Aggregates 5 to 8 bytes in size are returned in general
9226 registers r28-r29 in the same manner as other non
9227 floating-point objects. The data is right-justified and
9228 zero-extended to 64 bits. This is opposite to the normal
9229 justification used on big endian targets and requires
9230 special treatment. */
9231 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9232 gen_rtx_REG (DImode, 28), const0_rtx);
9233 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9237 if ((INTEGRAL_TYPE_P (valtype)
9238 && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD)
9239 || POINTER_TYPE_P (valtype))
9240 valmode = word_mode;
9241 else
9242 valmode = TYPE_MODE (valtype);
9244 if (TREE_CODE (valtype) == REAL_TYPE
9245 && !AGGREGATE_TYPE_P (valtype)
9246 && TYPE_MODE (valtype) != TFmode
9247 && !TARGET_SOFT_FLOAT)
9248 return gen_rtx_REG (valmode, 32);
9250 return gen_rtx_REG (valmode, 28);
9253 /* Return the location of a parameter that is passed in a register or NULL
9254 if the parameter has any component that is passed in memory.
9256 This is new code and will be pushed to into the net sources after
9257 further testing.
9259 ??? We might want to restructure this so that it looks more like other
9260 ports. */
9262 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
9263 int named ATTRIBUTE_UNUSED)
9265 int max_arg_words = (TARGET_64BIT ? 8 : 4);
9266 int alignment = 0;
9267 int arg_size;
9268 int fpr_reg_base;
9269 int gpr_reg_base;
9270 rtx retval;
9272 if (mode == VOIDmode)
9273 return NULL_RTX;
9275 arg_size = FUNCTION_ARG_SIZE (mode, type);
9277 /* If this arg would be passed partially or totally on the stack, then
9278 this routine should return zero. pa_arg_partial_bytes will
9279 handle arguments which are split between regs and stack slots if
9280 the ABI mandates split arguments. */
9281 if (!TARGET_64BIT)
9283 /* The 32-bit ABI does not split arguments. */
9284 if (cum->words + arg_size > max_arg_words)
9285 return NULL_RTX;
9287 else
9289 if (arg_size > 1)
9290 alignment = cum->words & 1;
9291 if (cum->words + alignment >= max_arg_words)
9292 return NULL_RTX;
9295 /* The 32bit ABIs and the 64bit ABIs are rather different,
9296 particularly in their handling of FP registers. We might
9297 be able to cleverly share code between them, but I'm not
9298 going to bother in the hope that splitting them up results
9299 in code that is more easily understood. */
9301 if (TARGET_64BIT)
9303 /* Advance the base registers to their current locations.
9305 Remember, gprs grow towards smaller register numbers while
9306 fprs grow to higher register numbers. Also remember that
9307 although FP regs are 32-bit addressable, we pretend that
9308 the registers are 64-bits wide. */
9309 gpr_reg_base = 26 - cum->words;
9310 fpr_reg_base = 32 + cum->words;
9312 /* Arguments wider than one word and small aggregates need special
9313 treatment. */
9314 if (arg_size > 1
9315 || mode == BLKmode
9316 || (type && (AGGREGATE_TYPE_P (type)
9317 || TREE_CODE (type) == COMPLEX_TYPE
9318 || TREE_CODE (type) == VECTOR_TYPE)))
9320 /* Double-extended precision (80-bit), quad-precision (128-bit)
9321 and aggregates including complex numbers are aligned on
9322 128-bit boundaries. The first eight 64-bit argument slots
9323 are associated one-to-one, with general registers r26
9324 through r19, and also with floating-point registers fr4
9325 through fr11. Arguments larger than one word are always
9326 passed in general registers.
9328 Using a PARALLEL with a word mode register results in left
9329 justified data on a big-endian target. */
9331 rtx loc[8];
9332 int i, offset = 0, ub = arg_size;
9334 /* Align the base register. */
9335 gpr_reg_base -= alignment;
9337 ub = MIN (ub, max_arg_words - cum->words - alignment);
9338 for (i = 0; i < ub; i++)
9340 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9341 gen_rtx_REG (DImode, gpr_reg_base),
9342 GEN_INT (offset));
9343 gpr_reg_base -= 1;
9344 offset += 8;
9347 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
9350 else
9352 /* If the argument is larger than a word, then we know precisely
9353 which registers we must use. */
9354 if (arg_size > 1)
9356 if (cum->words)
9358 gpr_reg_base = 23;
9359 fpr_reg_base = 38;
9361 else
9363 gpr_reg_base = 25;
9364 fpr_reg_base = 34;
9367 /* Structures 5 to 8 bytes in size are passed in the general
9368 registers in the same manner as other non floating-point
9369 objects. The data is right-justified and zero-extended
9370 to 64 bits. This is opposite to the normal justification
9371 used on big endian targets and requires special treatment.
9372 We now define BLOCK_REG_PADDING to pad these objects.
9373 Aggregates, complex and vector types are passed in the same
9374 manner as structures. */
9375 if (mode == BLKmode
9376 || (type && (AGGREGATE_TYPE_P (type)
9377 || TREE_CODE (type) == COMPLEX_TYPE
9378 || TREE_CODE (type) == VECTOR_TYPE)))
9380 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9381 gen_rtx_REG (DImode, gpr_reg_base),
9382 const0_rtx);
9383 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9386 else
9388 /* We have a single word (32 bits). A simple computation
9389 will get us the register #s we need. */
9390 gpr_reg_base = 26 - cum->words;
9391 fpr_reg_base = 32 + 2 * cum->words;
9395 /* Determine if the argument needs to be passed in both general and
9396 floating point registers. */
9397 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
9398 /* If we are doing soft-float with portable runtime, then there
9399 is no need to worry about FP regs. */
9400 && !TARGET_SOFT_FLOAT
9401 /* The parameter must be some kind of scalar float, else we just
9402 pass it in integer registers. */
9403 && GET_MODE_CLASS (mode) == MODE_FLOAT
9404 /* The target function must not have a prototype. */
9405 && cum->nargs_prototype <= 0
9406 /* libcalls do not need to pass items in both FP and general
9407 registers. */
9408 && type != NULL_TREE
9409 /* All this hair applies to "outgoing" args only. This includes
9410 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
9411 && !cum->incoming)
9412 /* Also pass outgoing floating arguments in both registers in indirect
9413 calls with the 32 bit ABI and the HP assembler since there is no
9414 way to the specify argument locations in static functions. */
9415 || (!TARGET_64BIT
9416 && !TARGET_GAS
9417 && !cum->incoming
9418 && cum->indirect
9419 && GET_MODE_CLASS (mode) == MODE_FLOAT))
9421 retval
9422 = gen_rtx_PARALLEL
9423 (mode,
9424 gen_rtvec (2,
9425 gen_rtx_EXPR_LIST (VOIDmode,
9426 gen_rtx_REG (mode, fpr_reg_base),
9427 const0_rtx),
9428 gen_rtx_EXPR_LIST (VOIDmode,
9429 gen_rtx_REG (mode, gpr_reg_base),
9430 const0_rtx)));
9432 else
9434 /* See if we should pass this parameter in a general register. */
9435 if (TARGET_SOFT_FLOAT
9436 /* Indirect calls in the normal 32bit ABI require all arguments
9437 to be passed in general registers. */
9438 || (!TARGET_PORTABLE_RUNTIME
9439 && !TARGET_64BIT
9440 && !TARGET_ELF32
9441 && cum->indirect)
9442 /* If the parameter is not a scalar floating-point parameter,
9443 then it belongs in GPRs. */
9444 || GET_MODE_CLASS (mode) != MODE_FLOAT
9445 /* Structure with single SFmode field belongs in GPR. */
9446 || (type && AGGREGATE_TYPE_P (type)))
9447 retval = gen_rtx_REG (mode, gpr_reg_base);
9448 else
9449 retval = gen_rtx_REG (mode, fpr_reg_base);
9451 return retval;
9455 /* If this arg would be passed totally in registers or totally on the stack,
9456 then this routine should return zero. */
9458 static int
9459 pa_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
9460 tree type, bool named ATTRIBUTE_UNUSED)
9462 unsigned int max_arg_words = 8;
9463 unsigned int offset = 0;
9465 if (!TARGET_64BIT)
9466 return 0;
9468 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
9469 offset = 1;
9471 if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
9472 /* Arg fits fully into registers. */
9473 return 0;
9474 else if (cum->words + offset >= max_arg_words)
9475 /* Arg fully on the stack. */
9476 return 0;
9477 else
9478 /* Arg is split. */
9479 return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
9483 /* A get_unnamed_section callback for switching to the text section.
9485 This function is only used with SOM. Because we don't support
9486 named subspaces, we can only create a new subspace or switch back
9487 to the default text subspace. */
9489 static void
9490 som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED)
9492 gcc_assert (TARGET_SOM);
9493 if (TARGET_GAS)
9495 if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
9497 /* We only want to emit a .nsubspa directive once at the
9498 start of the function. */
9499 cfun->machine->in_nsubspa = 1;
9501 /* Create a new subspace for the text. This provides
9502 better stub placement and one-only functions. */
9503 if (cfun->decl
9504 && DECL_ONE_ONLY (cfun->decl)
9505 && !DECL_WEAK (cfun->decl))
9507 output_section_asm_op ("\t.SPACE $TEXT$\n"
9508 "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
9509 "ACCESS=44,SORT=24,COMDAT");
9510 return;
9513 else
9515 /* There isn't a current function or the body of the current
9516 function has been completed. So, we are changing to the
9517 text section to output debugging information. Thus, we
9518 need to forget that we are in the text section so that
9519 varasm.c will call us when text_section is selected again. */
9520 gcc_assert (!cfun || !cfun->machine
9521 || cfun->machine->in_nsubspa == 2);
9522 in_section = NULL;
9524 output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
9525 return;
9527 output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
9530 /* A get_unnamed_section callback for switching to comdat data
9531 sections. This function is only used with SOM. */
9533 static void
9534 som_output_comdat_data_section_asm_op (const void *data)
9536 in_section = NULL;
9537 output_section_asm_op (data);
9540 /* Implement TARGET_ASM_INITIALIZE_SECTIONS */
9542 static void
9543 pa_som_asm_init_sections (void)
9545 text_section
9546 = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
9548 /* SOM puts readonly data in the default $LIT$ subspace when PIC code
9549 is not being generated. */
9550 som_readonly_data_section
9551 = get_unnamed_section (0, output_section_asm_op,
9552 "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
9554 /* When secondary definitions are not supported, SOM makes readonly
9555 data one-only by creating a new $LIT$ subspace in $TEXT$ with
9556 the comdat flag. */
9557 som_one_only_readonly_data_section
9558 = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
9559 "\t.SPACE $TEXT$\n"
9560 "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
9561 "ACCESS=0x2c,SORT=16,COMDAT");
9564 /* When secondary definitions are not supported, SOM makes data one-only
9565 by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */
9566 som_one_only_data_section
9567 = get_unnamed_section (SECTION_WRITE,
9568 som_output_comdat_data_section_asm_op,
9569 "\t.SPACE $PRIVATE$\n"
9570 "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
9571 "ACCESS=31,SORT=24,COMDAT");
9573 /* FIXME: HPUX ld generates incorrect GOT entries for "T" fixups
9574 which reference data within the $TEXT$ space (for example constant
9575 strings in the $LIT$ subspace).
9577 The assemblers (GAS and HP as) both have problems with handling
9578 the difference of two symbols which is the other correct way to
9579 reference constant data during PIC code generation.
9581 So, there's no way to reference constant data which is in the
9582 $TEXT$ space during PIC generation. Instead place all constant
9583 data into the $PRIVATE$ subspace (this reduces sharing, but it
9584 works correctly). */
9585 readonly_data_section = flag_pic ? data_section : som_readonly_data_section;
9587 /* We must not have a reference to an external symbol defined in a
9588 shared library in a readonly section, else the SOM linker will
9589 complain.
9591 So, we force exception information into the data section. */
9592 exception_section = data_section;
9595 /* On hpux10, the linker will give an error if we have a reference
9596 in the read-only data section to a symbol defined in a shared
9597 library. Therefore, expressions that might require a reloc can
9598 not be placed in the read-only data section. */
9600 static section *
9601 pa_select_section (tree exp, int reloc,
9602 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
9604 if (TREE_CODE (exp) == VAR_DECL
9605 && TREE_READONLY (exp)
9606 && !TREE_THIS_VOLATILE (exp)
9607 && DECL_INITIAL (exp)
9608 && (DECL_INITIAL (exp) == error_mark_node
9609 || TREE_CONSTANT (DECL_INITIAL (exp)))
9610 && !reloc)
9612 if (TARGET_SOM
9613 && DECL_ONE_ONLY (exp)
9614 && !DECL_WEAK (exp))
9615 return som_one_only_readonly_data_section;
9616 else
9617 return readonly_data_section;
9619 else if (CONSTANT_CLASS_P (exp) && !reloc)
9620 return readonly_data_section;
9621 else if (TARGET_SOM
9622 && TREE_CODE (exp) == VAR_DECL
9623 && DECL_ONE_ONLY (exp)
9624 && !DECL_WEAK (exp))
9625 return som_one_only_data_section;
9626 else
9627 return data_section;
9630 static void
9631 pa_globalize_label (FILE *stream, const char *name)
9633 /* We only handle DATA objects here, functions are globalized in
9634 ASM_DECLARE_FUNCTION_NAME. */
9635 if (! FUNCTION_NAME_P (name))
9637 fputs ("\t.EXPORT ", stream);
9638 assemble_name (stream, name);
9639 fputs (",DATA\n", stream);
9643 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
9645 static rtx
9646 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
9647 int incoming ATTRIBUTE_UNUSED)
9649 return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
9652 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9654 bool
9655 pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9657 /* SOM ABI says that objects larger than 64 bits are returned in memory.
9658 PA64 ABI says that objects larger than 128 bits are returned in memory.
9659 Note, int_size_in_bytes can return -1 if the size of the object is
9660 variable or larger than the maximum value that can be expressed as
9661 a HOST_WIDE_INT. It can also return zero for an empty type. The
9662 simplest way to handle variable and empty types is to pass them in
9663 memory. This avoids problems in defining the boundaries of argument
9664 slots, allocating registers, etc. */
9665 return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
9666 || int_size_in_bytes (type) <= 0);
9669 /* Structure to hold declaration and name of external symbols that are
9670 emitted by GCC. We generate a vector of these symbols and output them
9671 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
9672 This avoids putting out names that are never really used. */
9674 typedef struct GTY(()) extern_symbol
9676 tree decl;
9677 const char *name;
9678 } extern_symbol;
9680 /* Define gc'd vector type for extern_symbol. */
9681 DEF_VEC_O(extern_symbol);
9682 DEF_VEC_ALLOC_O(extern_symbol,gc);
9684 /* Vector of extern_symbol pointers. */
9685 static GTY(()) VEC(extern_symbol,gc) *extern_symbols;
9687 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9688 /* Mark DECL (name NAME) as an external reference (assembler output
9689 file FILE). This saves the names to output at the end of the file
9690 if actually referenced. */
9692 void
9693 pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
9695 extern_symbol * p = VEC_safe_push (extern_symbol, gc, extern_symbols, NULL);
9697 gcc_assert (file == asm_out_file);
9698 p->decl = decl;
9699 p->name = name;
9702 /* Output text required at the end of an assembler file.
9703 This includes deferred plabels and .import directives for
9704 all external symbols that were actually referenced. */
9706 static void
9707 pa_hpux_file_end (void)
9709 unsigned int i;
9710 extern_symbol *p;
9712 if (!NO_DEFERRED_PROFILE_COUNTERS)
9713 output_deferred_profile_counters ();
9715 output_deferred_plabels ();
9717 for (i = 0; VEC_iterate (extern_symbol, extern_symbols, i, p); i++)
9719 tree decl = p->decl;
9721 if (!TREE_ASM_WRITTEN (decl)
9722 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
9723 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
9726 VEC_free (extern_symbol, gc, extern_symbols);
9728 #endif
9730 /* Return true if a change from mode FROM to mode TO for a register
9731 in register class RCLASS is invalid. */
9733 bool
9734 pa_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
9735 enum reg_class rclass)
9737 if (from == to)
9738 return false;
9740 /* Reject changes to/from complex and vector modes. */
9741 if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from)
9742 || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to))
9743 return true;
9745 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to))
9746 return false;
9748 /* There is no way to load QImode or HImode values directly from
9749 memory. SImode loads to the FP registers are not zero extended.
9750 On the 64-bit target, this conflicts with the definition of
9751 LOAD_EXTEND_OP. Thus, we can't allow changing between modes
9752 with different sizes in the floating-point registers. */
9753 if (MAYBE_FP_REG_CLASS_P (rclass))
9754 return true;
9756 /* HARD_REGNO_MODE_OK places modes with sizes larger than a word
9757 in specific sets of registers. Thus, we cannot allow changing
9758 to a larger mode when it's larger than a word. */
9759 if (GET_MODE_SIZE (to) > UNITS_PER_WORD
9760 && GET_MODE_SIZE (to) > GET_MODE_SIZE (from))
9761 return true;
9763 return false;
9766 /* Returns TRUE if it is a good idea to tie two pseudo registers
9767 when one has mode MODE1 and one has mode MODE2.
9768 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
9769 for any hard reg, then this must be FALSE for correct output.
9771 We should return FALSE for QImode and HImode because these modes
9772 are not ok in the floating-point registers. However, this prevents
9773 tieing these modes to SImode and DImode in the general registers.
9774 So, this isn't a good idea. We rely on HARD_REGNO_MODE_OK and
9775 CANNOT_CHANGE_MODE_CLASS to prevent these modes from being used
9776 in the floating-point registers. */
9778 bool
9779 pa_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
9781 /* Don't tie modes in different classes. */
9782 if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2))
9783 return false;
9785 return true;
9788 #include "gt-pa.h"