2011-03-21 Daniel Jacobowitz <dan@codesourcery.com>
[official-gcc.git] / gcc / config / pa / pa.c
blob98267b0ca63e9a817e76f24d4c1c38580d3a41e2
1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "rtl.h"
28 #include "regs.h"
29 #include "hard-reg-set.h"
30 #include "insn-config.h"
31 #include "conditions.h"
32 #include "insn-attr.h"
33 #include "flags.h"
34 #include "tree.h"
35 #include "output.h"
36 #include "except.h"
37 #include "expr.h"
38 #include "optabs.h"
39 #include "reload.h"
40 #include "integrate.h"
41 #include "function.h"
42 #include "diagnostic-core.h"
43 #include "ggc.h"
44 #include "recog.h"
45 #include "predict.h"
46 #include "tm_p.h"
47 #include "target.h"
48 #include "target-def.h"
49 #include "langhooks.h"
50 #include "df.h"
52 /* Return nonzero if there is a bypass for the output of
53 OUT_INSN and the fp store IN_INSN. */
54 int
55 hppa_fpstore_bypass_p (rtx out_insn, rtx in_insn)
57 enum machine_mode store_mode;
58 enum machine_mode other_mode;
59 rtx set;
61 if (recog_memoized (in_insn) < 0
62 || (get_attr_type (in_insn) != TYPE_FPSTORE
63 && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
64 || recog_memoized (out_insn) < 0)
65 return 0;
67 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
69 set = single_set (out_insn);
70 if (!set)
71 return 0;
73 other_mode = GET_MODE (SET_SRC (set));
75 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
79 #ifndef DO_FRAME_NOTES
80 #ifdef INCOMING_RETURN_ADDR_RTX
81 #define DO_FRAME_NOTES 1
82 #else
83 #define DO_FRAME_NOTES 0
84 #endif
85 #endif
87 static void pa_option_override (void);
88 static void copy_reg_pointer (rtx, rtx);
89 static void fix_range (const char *);
90 static bool pa_handle_option (size_t, const char *, int);
91 static int hppa_register_move_cost (enum machine_mode mode, reg_class_t,
92 reg_class_t);
93 static int hppa_address_cost (rtx, bool);
94 static bool hppa_rtx_costs (rtx, int, int, int *, bool);
95 static inline rtx force_mode (enum machine_mode, rtx);
96 static void pa_reorg (void);
97 static void pa_combine_instructions (void);
98 static int pa_can_combine_p (rtx, rtx, rtx, int, rtx, rtx, rtx);
99 static bool forward_branch_p (rtx);
100 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
101 static int compute_movmem_length (rtx);
102 static int compute_clrmem_length (rtx);
103 static bool pa_assemble_integer (rtx, unsigned int, int);
104 static void remove_useless_addtr_insns (int);
105 static void store_reg (int, HOST_WIDE_INT, int);
106 static void store_reg_modify (int, int, HOST_WIDE_INT);
107 static void load_reg (int, HOST_WIDE_INT, int);
108 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
109 static rtx pa_function_value (const_tree, const_tree, bool);
110 static rtx pa_libcall_value (enum machine_mode, const_rtx);
111 static bool pa_function_value_regno_p (const unsigned int);
112 static void pa_output_function_prologue (FILE *, HOST_WIDE_INT);
113 static void update_total_code_bytes (unsigned int);
114 static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT);
115 static int pa_adjust_cost (rtx, rtx, rtx, int);
116 static int pa_adjust_priority (rtx, int);
117 static int pa_issue_rate (void);
118 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
119 static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
120 ATTRIBUTE_UNUSED;
121 static void pa_encode_section_info (tree, rtx, int);
122 static const char *pa_strip_name_encoding (const char *);
123 static bool pa_function_ok_for_sibcall (tree, tree);
124 static void pa_globalize_label (FILE *, const char *)
125 ATTRIBUTE_UNUSED;
126 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
127 HOST_WIDE_INT, tree);
128 #if !defined(USE_COLLECT2)
129 static void pa_asm_out_constructor (rtx, int);
130 static void pa_asm_out_destructor (rtx, int);
131 #endif
132 static void pa_init_builtins (void);
133 static rtx pa_expand_builtin (tree, rtx, rtx, enum machine_mode mode, int);
134 static rtx hppa_builtin_saveregs (void);
135 static void hppa_va_start (tree, rtx);
136 static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
137 static bool pa_scalar_mode_supported_p (enum machine_mode);
138 static bool pa_commutative_p (const_rtx x, int outer_code);
139 static void copy_fp_args (rtx) ATTRIBUTE_UNUSED;
140 static int length_fp_args (rtx) ATTRIBUTE_UNUSED;
141 static rtx hppa_legitimize_address (rtx, rtx, enum machine_mode);
142 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
143 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
144 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
145 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
146 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
147 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
148 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
149 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
150 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
151 static void output_deferred_plabels (void);
152 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
153 #ifdef ASM_OUTPUT_EXTERNAL_REAL
154 static void pa_hpux_file_end (void);
155 #endif
156 #if HPUX_LONG_DOUBLE_LIBRARY
157 static void pa_hpux_init_libfuncs (void);
158 #endif
159 static rtx pa_struct_value_rtx (tree, int);
160 static bool pa_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
161 const_tree, bool);
162 static int pa_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
163 tree, bool);
164 static void pa_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
165 const_tree, bool);
166 static rtx pa_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
167 const_tree, bool);
168 static unsigned int pa_function_arg_boundary (enum machine_mode, const_tree);
169 static struct machine_function * pa_init_machine_status (void);
170 static reg_class_t pa_secondary_reload (bool, rtx, reg_class_t,
171 enum machine_mode,
172 secondary_reload_info *);
173 static void pa_extra_live_on_entry (bitmap);
174 static enum machine_mode pa_promote_function_mode (const_tree,
175 enum machine_mode, int *,
176 const_tree, int);
178 static void pa_asm_trampoline_template (FILE *);
179 static void pa_trampoline_init (rtx, tree, rtx);
180 static rtx pa_trampoline_adjust_address (rtx);
181 static rtx pa_delegitimize_address (rtx);
182 static bool pa_print_operand_punct_valid_p (unsigned char);
183 static rtx pa_internal_arg_pointer (void);
184 static bool pa_can_eliminate (const int, const int);
185 static void pa_conditional_register_usage (void);
186 static enum machine_mode pa_c_mode_for_suffix (char);
187 static section *pa_function_section (tree, enum node_frequency, bool, bool);
189 /* The following extra sections are only used for SOM. */
190 static GTY(()) section *som_readonly_data_section;
191 static GTY(()) section *som_one_only_readonly_data_section;
192 static GTY(()) section *som_one_only_data_section;
194 /* Which cpu we are scheduling for. */
195 enum processor_type pa_cpu = TARGET_SCHED_DEFAULT;
197 /* The UNIX standard to use for predefines and linking. */
198 int flag_pa_unix = TARGET_HPUX_11_11 ? 1998 : TARGET_HPUX_10_10 ? 1995 : 1993;
200 /* Counts for the number of callee-saved general and floating point
201 registers which were saved by the current function's prologue. */
202 static int gr_saved, fr_saved;
204 /* Boolean indicating whether the return pointer was saved by the
205 current function's prologue. */
206 static bool rp_saved;
208 static rtx find_addr_reg (rtx);
210 /* Keep track of the number of bytes we have output in the CODE subspace
211 during this compilation so we'll know when to emit inline long-calls. */
212 unsigned long total_code_bytes;
214 /* The last address of the previous function plus the number of bytes in
215 associated thunks that have been output. This is used to determine if
216 a thunk can use an IA-relative branch to reach its target function. */
217 static unsigned int last_address;
219 /* Variables to handle plabels that we discover are necessary at assembly
220 output time. They are output after the current function. */
221 struct GTY(()) deferred_plabel
223 rtx internal_label;
224 rtx symbol;
226 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
227 deferred_plabels;
228 static size_t n_deferred_plabels = 0;
230 /* Implement TARGET_OPTION_OPTIMIZATION_TABLE. */
231 static const struct default_options pa_option_optimization_table[] =
233 { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
234 { OPT_LEVELS_NONE, 0, NULL, 0 }
238 /* Initialize the GCC target structure. */
240 #undef TARGET_OPTION_OVERRIDE
241 #define TARGET_OPTION_OVERRIDE pa_option_override
242 #undef TARGET_OPTION_OPTIMIZATION_TABLE
243 #define TARGET_OPTION_OPTIMIZATION_TABLE pa_option_optimization_table
245 #undef TARGET_ASM_ALIGNED_HI_OP
246 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
247 #undef TARGET_ASM_ALIGNED_SI_OP
248 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
249 #undef TARGET_ASM_ALIGNED_DI_OP
250 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
251 #undef TARGET_ASM_UNALIGNED_HI_OP
252 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
253 #undef TARGET_ASM_UNALIGNED_SI_OP
254 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
255 #undef TARGET_ASM_UNALIGNED_DI_OP
256 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
257 #undef TARGET_ASM_INTEGER
258 #define TARGET_ASM_INTEGER pa_assemble_integer
260 #undef TARGET_ASM_FUNCTION_PROLOGUE
261 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
262 #undef TARGET_ASM_FUNCTION_EPILOGUE
263 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
265 #undef TARGET_FUNCTION_VALUE
266 #define TARGET_FUNCTION_VALUE pa_function_value
267 #undef TARGET_LIBCALL_VALUE
268 #define TARGET_LIBCALL_VALUE pa_libcall_value
269 #undef TARGET_FUNCTION_VALUE_REGNO_P
270 #define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p
272 #undef TARGET_LEGITIMIZE_ADDRESS
273 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
275 #undef TARGET_SCHED_ADJUST_COST
276 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
277 #undef TARGET_SCHED_ADJUST_PRIORITY
278 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
279 #undef TARGET_SCHED_ISSUE_RATE
280 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
282 #undef TARGET_ENCODE_SECTION_INFO
283 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
284 #undef TARGET_STRIP_NAME_ENCODING
285 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
287 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
288 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
290 #undef TARGET_COMMUTATIVE_P
291 #define TARGET_COMMUTATIVE_P pa_commutative_p
293 #undef TARGET_ASM_OUTPUT_MI_THUNK
294 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
295 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
296 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
298 #undef TARGET_ASM_FILE_END
299 #ifdef ASM_OUTPUT_EXTERNAL_REAL
300 #define TARGET_ASM_FILE_END pa_hpux_file_end
301 #else
302 #define TARGET_ASM_FILE_END output_deferred_plabels
303 #endif
305 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
306 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p
308 #if !defined(USE_COLLECT2)
309 #undef TARGET_ASM_CONSTRUCTOR
310 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
311 #undef TARGET_ASM_DESTRUCTOR
312 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
313 #endif
315 #undef TARGET_DEFAULT_TARGET_FLAGS
316 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
317 #undef TARGET_HANDLE_OPTION
318 #define TARGET_HANDLE_OPTION pa_handle_option
320 #undef TARGET_INIT_BUILTINS
321 #define TARGET_INIT_BUILTINS pa_init_builtins
323 #undef TARGET_EXPAND_BUILTIN
324 #define TARGET_EXPAND_BUILTIN pa_expand_builtin
326 #undef TARGET_REGISTER_MOVE_COST
327 #define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
328 #undef TARGET_RTX_COSTS
329 #define TARGET_RTX_COSTS hppa_rtx_costs
330 #undef TARGET_ADDRESS_COST
331 #define TARGET_ADDRESS_COST hppa_address_cost
333 #undef TARGET_MACHINE_DEPENDENT_REORG
334 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
336 #if HPUX_LONG_DOUBLE_LIBRARY
337 #undef TARGET_INIT_LIBFUNCS
338 #define TARGET_INIT_LIBFUNCS pa_hpux_init_libfuncs
339 #endif
341 #undef TARGET_PROMOTE_FUNCTION_MODE
342 #define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
343 #undef TARGET_PROMOTE_PROTOTYPES
344 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
346 #undef TARGET_STRUCT_VALUE_RTX
347 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
348 #undef TARGET_RETURN_IN_MEMORY
349 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
350 #undef TARGET_MUST_PASS_IN_STACK
351 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
352 #undef TARGET_PASS_BY_REFERENCE
353 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
354 #undef TARGET_CALLEE_COPIES
355 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true
356 #undef TARGET_ARG_PARTIAL_BYTES
357 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
358 #undef TARGET_FUNCTION_ARG
359 #define TARGET_FUNCTION_ARG pa_function_arg
360 #undef TARGET_FUNCTION_ARG_ADVANCE
361 #define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance
362 #undef TARGET_FUNCTION_ARG_BOUNDARY
363 #define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary
365 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
366 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
367 #undef TARGET_EXPAND_BUILTIN_VA_START
368 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
369 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
370 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
372 #undef TARGET_SCALAR_MODE_SUPPORTED_P
373 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
375 #undef TARGET_CANNOT_FORCE_CONST_MEM
376 #define TARGET_CANNOT_FORCE_CONST_MEM pa_tls_referenced_p
378 #undef TARGET_SECONDARY_RELOAD
379 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
381 #undef TARGET_EXTRA_LIVE_ON_ENTRY
382 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
384 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
385 #define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
386 #undef TARGET_TRAMPOLINE_INIT
387 #define TARGET_TRAMPOLINE_INIT pa_trampoline_init
388 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
389 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
390 #undef TARGET_DELEGITIMIZE_ADDRESS
391 #define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
392 #undef TARGET_INTERNAL_ARG_POINTER
393 #define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer
394 #undef TARGET_CAN_ELIMINATE
395 #define TARGET_CAN_ELIMINATE pa_can_eliminate
396 #undef TARGET_CONDITIONAL_REGISTER_USAGE
397 #define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage
398 #undef TARGET_C_MODE_FOR_SUFFIX
399 #define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix
400 #undef TARGET_ASM_FUNCTION_SECTION
401 #define TARGET_ASM_FUNCTION_SECTION pa_function_section
403 struct gcc_target targetm = TARGET_INITIALIZER;
405 /* Parse the -mfixed-range= option string. */
407 static void
408 fix_range (const char *const_str)
410 int i, first, last;
411 char *str, *dash, *comma;
413 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
414 REG2 are either register names or register numbers. The effect
415 of this option is to mark the registers in the range from REG1 to
416 REG2 as ``fixed'' so they won't be used by the compiler. This is
417 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */
419 i = strlen (const_str);
420 str = (char *) alloca (i + 1);
421 memcpy (str, const_str, i + 1);
423 while (1)
425 dash = strchr (str, '-');
426 if (!dash)
428 warning (0, "value of -mfixed-range must have form REG1-REG2");
429 return;
431 *dash = '\0';
433 comma = strchr (dash + 1, ',');
434 if (comma)
435 *comma = '\0';
437 first = decode_reg_name (str);
438 if (first < 0)
440 warning (0, "unknown register name: %s", str);
441 return;
444 last = decode_reg_name (dash + 1);
445 if (last < 0)
447 warning (0, "unknown register name: %s", dash + 1);
448 return;
451 *dash = '-';
453 if (first > last)
455 warning (0, "%s-%s is an empty range", str, dash + 1);
456 return;
459 for (i = first; i <= last; ++i)
460 fixed_regs[i] = call_used_regs[i] = 1;
462 if (!comma)
463 break;
465 *comma = ',';
466 str = comma + 1;
469 /* Check if all floating point registers have been fixed. */
470 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
471 if (!fixed_regs[i])
472 break;
474 if (i > FP_REG_LAST)
475 target_flags |= MASK_DISABLE_FPREGS;
478 /* Implement TARGET_HANDLE_OPTION. */
480 static bool
481 pa_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
483 switch (code)
485 case OPT_mnosnake:
486 case OPT_mpa_risc_1_0:
487 case OPT_march_1_0:
488 target_flags &= ~(MASK_PA_11 | MASK_PA_20);
489 return true;
491 case OPT_msnake:
492 case OPT_mpa_risc_1_1:
493 case OPT_march_1_1:
494 target_flags &= ~MASK_PA_20;
495 target_flags |= MASK_PA_11;
496 return true;
498 case OPT_mpa_risc_2_0:
499 case OPT_march_2_0:
500 target_flags |= MASK_PA_11 | MASK_PA_20;
501 return true;
503 case OPT_mschedule_:
504 if (strcmp (arg, "8000") == 0)
505 pa_cpu = PROCESSOR_8000;
506 else if (strcmp (arg, "7100") == 0)
507 pa_cpu = PROCESSOR_7100;
508 else if (strcmp (arg, "700") == 0)
509 pa_cpu = PROCESSOR_700;
510 else if (strcmp (arg, "7100LC") == 0)
511 pa_cpu = PROCESSOR_7100LC;
512 else if (strcmp (arg, "7200") == 0)
513 pa_cpu = PROCESSOR_7200;
514 else if (strcmp (arg, "7300") == 0)
515 pa_cpu = PROCESSOR_7300;
516 else
517 return false;
518 return true;
520 case OPT_mfixed_range_:
521 fix_range (arg);
522 return true;
524 #if TARGET_HPUX
525 case OPT_munix_93:
526 flag_pa_unix = 1993;
527 return true;
528 #endif
530 #if TARGET_HPUX_10_10
531 case OPT_munix_95:
532 flag_pa_unix = 1995;
533 return true;
534 #endif
536 #if TARGET_HPUX_11_11
537 case OPT_munix_98:
538 flag_pa_unix = 1998;
539 return true;
540 #endif
542 default:
543 return true;
547 /* Implement the TARGET_OPTION_OVERRIDE hook. */
549 static void
550 pa_option_override (void)
552 /* Unconditional branches in the delay slot are not compatible with dwarf2
553 call frame information. There is no benefit in using this optimization
554 on PA8000 and later processors. */
555 if (pa_cpu >= PROCESSOR_8000
556 || (targetm.except_unwind_info (&global_options) == UI_DWARF2
557 && flag_exceptions)
558 || flag_unwind_tables)
559 target_flags &= ~MASK_JUMP_IN_DELAY;
561 if (flag_pic && TARGET_PORTABLE_RUNTIME)
563 warning (0, "PIC code generation is not supported in the portable runtime model");
566 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
568 warning (0, "PIC code generation is not compatible with fast indirect calls");
571 if (! TARGET_GAS && write_symbols != NO_DEBUG)
573 warning (0, "-g is only supported when using GAS on this processor,");
574 warning (0, "-g option disabled");
575 write_symbols = NO_DEBUG;
578 /* We only support the "big PIC" model now. And we always generate PIC
579 code when in 64bit mode. */
580 if (flag_pic == 1 || TARGET_64BIT)
581 flag_pic = 2;
583 /* Disable -freorder-blocks-and-partition as we don't support hot and
584 cold partitioning. */
585 if (flag_reorder_blocks_and_partition)
587 inform (input_location,
588 "-freorder-blocks-and-partition does not work "
589 "on this architecture");
590 flag_reorder_blocks_and_partition = 0;
591 flag_reorder_blocks = 1;
594 /* We can't guarantee that .dword is available for 32-bit targets. */
595 if (UNITS_PER_WORD == 4)
596 targetm.asm_out.aligned_op.di = NULL;
598 /* The unaligned ops are only available when using GAS. */
599 if (!TARGET_GAS)
601 targetm.asm_out.unaligned_op.hi = NULL;
602 targetm.asm_out.unaligned_op.si = NULL;
603 targetm.asm_out.unaligned_op.di = NULL;
606 init_machine_status = pa_init_machine_status;
609 enum pa_builtins
611 PA_BUILTIN_COPYSIGNQ,
612 PA_BUILTIN_FABSQ,
613 PA_BUILTIN_INFQ,
614 PA_BUILTIN_HUGE_VALQ,
615 PA_BUILTIN_max
618 static GTY(()) tree pa_builtins[(int) PA_BUILTIN_max];
620 static void
621 pa_init_builtins (void)
623 #ifdef DONT_HAVE_FPUTC_UNLOCKED
624 built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED] =
625 built_in_decls[(int) BUILT_IN_PUTC_UNLOCKED];
626 implicit_built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED]
627 = implicit_built_in_decls[(int) BUILT_IN_PUTC_UNLOCKED];
628 #endif
629 #if TARGET_HPUX_11
630 if (built_in_decls [BUILT_IN_FINITE])
631 set_user_assembler_name (built_in_decls [BUILT_IN_FINITE], "_Isfinite");
632 if (built_in_decls [BUILT_IN_FINITEF])
633 set_user_assembler_name (built_in_decls [BUILT_IN_FINITEF], "_Isfinitef");
634 #endif
636 if (HPUX_LONG_DOUBLE_LIBRARY)
638 tree decl, ftype;
640 /* Under HPUX, the __float128 type is a synonym for "long double". */
641 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
642 "__float128");
644 /* TFmode support builtins. */
645 ftype = build_function_type_list (long_double_type_node,
646 long_double_type_node,
647 NULL_TREE);
648 decl = add_builtin_function ("__builtin_fabsq", ftype,
649 PA_BUILTIN_FABSQ, BUILT_IN_MD,
650 "_U_Qfabs", NULL_TREE);
651 TREE_READONLY (decl) = 1;
652 pa_builtins[PA_BUILTIN_FABSQ] = decl;
654 ftype = build_function_type_list (long_double_type_node,
655 long_double_type_node,
656 long_double_type_node,
657 NULL_TREE);
658 decl = add_builtin_function ("__builtin_copysignq", ftype,
659 PA_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
660 "_U_Qfcopysign", NULL_TREE);
661 TREE_READONLY (decl) = 1;
662 pa_builtins[PA_BUILTIN_COPYSIGNQ] = decl;
664 ftype = build_function_type (long_double_type_node, void_list_node);
665 decl = add_builtin_function ("__builtin_infq", ftype,
666 PA_BUILTIN_INFQ, BUILT_IN_MD,
667 NULL, NULL_TREE);
668 pa_builtins[PA_BUILTIN_INFQ] = decl;
670 decl = add_builtin_function ("__builtin_huge_valq", ftype,
671 PA_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
672 NULL, NULL_TREE);
673 pa_builtins[PA_BUILTIN_HUGE_VALQ] = decl;
677 static rtx
678 pa_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
679 enum machine_mode mode ATTRIBUTE_UNUSED,
680 int ignore ATTRIBUTE_UNUSED)
682 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
683 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
685 switch (fcode)
687 case PA_BUILTIN_FABSQ:
688 case PA_BUILTIN_COPYSIGNQ:
689 return expand_call (exp, target, ignore);
691 case PA_BUILTIN_INFQ:
692 case PA_BUILTIN_HUGE_VALQ:
694 enum machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
695 REAL_VALUE_TYPE inf;
696 rtx tmp;
698 real_inf (&inf);
699 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, target_mode);
701 tmp = validize_mem (force_const_mem (target_mode, tmp));
703 if (target == 0)
704 target = gen_reg_rtx (target_mode);
706 emit_move_insn (target, tmp);
707 return target;
710 default:
711 gcc_unreachable ();
714 return NULL_RTX;
717 /* Function to init struct machine_function.
718 This will be called, via a pointer variable,
719 from push_function_context. */
721 static struct machine_function *
722 pa_init_machine_status (void)
724 return ggc_alloc_cleared_machine_function ();
727 /* If FROM is a probable pointer register, mark TO as a probable
728 pointer register with the same pointer alignment as FROM. */
730 static void
731 copy_reg_pointer (rtx to, rtx from)
733 if (REG_POINTER (from))
734 mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
737 /* Return 1 if X contains a symbolic expression. We know these
738 expressions will have one of a few well defined forms, so
739 we need only check those forms. */
741 symbolic_expression_p (rtx x)
744 /* Strip off any HIGH. */
745 if (GET_CODE (x) == HIGH)
746 x = XEXP (x, 0);
748 return (symbolic_operand (x, VOIDmode));
751 /* Accept any constant that can be moved in one instruction into a
752 general register. */
754 cint_ok_for_move (HOST_WIDE_INT ival)
756 /* OK if ldo, ldil, or zdepi, can be used. */
757 return (VAL_14_BITS_P (ival)
758 || ldil_cint_p (ival)
759 || zdepi_cint_p (ival));
762 /* Return truth value of whether OP can be used as an operand in a
763 adddi3 insn. */
765 adddi3_operand (rtx op, enum machine_mode mode)
767 return (register_operand (op, mode)
768 || (GET_CODE (op) == CONST_INT
769 && (TARGET_64BIT ? INT_14_BITS (op) : INT_11_BITS (op))));
772 /* True iff the operand OP can be used as the destination operand of
773 an integer store. This also implies the operand could be used as
774 the source operand of an integer load. Symbolic, lo_sum and indexed
775 memory operands are not allowed. We accept reloading pseudos and
776 other memory operands. */
778 integer_store_memory_operand (rtx op, enum machine_mode mode)
780 return ((reload_in_progress
781 && REG_P (op)
782 && REGNO (op) >= FIRST_PSEUDO_REGISTER
783 && reg_renumber [REGNO (op)] < 0)
784 || (GET_CODE (op) == MEM
785 && (reload_in_progress || memory_address_p (mode, XEXP (op, 0)))
786 && !symbolic_memory_operand (op, VOIDmode)
787 && !IS_LO_SUM_DLT_ADDR_P (XEXP (op, 0))
788 && !IS_INDEX_ADDR_P (XEXP (op, 0))));
791 /* True iff ldil can be used to load this CONST_INT. The least
792 significant 11 bits of the value must be zero and the value must
793 not change sign when extended from 32 to 64 bits. */
795 ldil_cint_p (HOST_WIDE_INT ival)
797 HOST_WIDE_INT x = ival & (((HOST_WIDE_INT) -1 << 31) | 0x7ff);
799 return x == 0 || x == ((HOST_WIDE_INT) -1 << 31);
802 /* True iff zdepi can be used to generate this CONST_INT.
803 zdepi first sign extends a 5-bit signed number to a given field
804 length, then places this field anywhere in a zero. */
806 zdepi_cint_p (unsigned HOST_WIDE_INT x)
808 unsigned HOST_WIDE_INT lsb_mask, t;
810 /* This might not be obvious, but it's at least fast.
811 This function is critical; we don't have the time loops would take. */
812 lsb_mask = x & -x;
813 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
814 /* Return true iff t is a power of two. */
815 return ((t & (t - 1)) == 0);
818 /* True iff depi or extru can be used to compute (reg & mask).
819 Accept bit pattern like these:
820 0....01....1
821 1....10....0
822 1..10..01..1 */
824 and_mask_p (unsigned HOST_WIDE_INT mask)
826 mask = ~mask;
827 mask += mask & -mask;
828 return (mask & (mask - 1)) == 0;
831 /* True iff depi can be used to compute (reg | MASK). */
833 ior_mask_p (unsigned HOST_WIDE_INT mask)
835 mask += mask & -mask;
836 return (mask & (mask - 1)) == 0;
839 /* Legitimize PIC addresses. If the address is already
840 position-independent, we return ORIG. Newly generated
841 position-independent addresses go to REG. If we need more
842 than one register, we lose. */
845 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
847 rtx pic_ref = orig;
849 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
851 /* Labels need special handling. */
852 if (pic_label_operand (orig, mode))
854 rtx insn;
856 /* We do not want to go through the movXX expanders here since that
857 would create recursion.
859 Nor do we really want to call a generator for a named pattern
860 since that requires multiple patterns if we want to support
861 multiple word sizes.
863 So instead we just emit the raw set, which avoids the movXX
864 expanders completely. */
865 mark_reg_pointer (reg, BITS_PER_UNIT);
866 insn = emit_insn (gen_rtx_SET (VOIDmode, reg, orig));
868 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
869 add_reg_note (insn, REG_EQUAL, orig);
871 /* During and after reload, we need to generate a REG_LABEL_OPERAND note
872 and update LABEL_NUSES because this is not done automatically. */
873 if (reload_in_progress || reload_completed)
875 /* Extract LABEL_REF. */
876 if (GET_CODE (orig) == CONST)
877 orig = XEXP (XEXP (orig, 0), 0);
878 /* Extract CODE_LABEL. */
879 orig = XEXP (orig, 0);
880 add_reg_note (insn, REG_LABEL_OPERAND, orig);
881 LABEL_NUSES (orig)++;
883 crtl->uses_pic_offset_table = 1;
884 return reg;
886 if (GET_CODE (orig) == SYMBOL_REF)
888 rtx insn, tmp_reg;
890 gcc_assert (reg);
892 /* Before reload, allocate a temporary register for the intermediate
893 result. This allows the sequence to be deleted when the final
894 result is unused and the insns are trivially dead. */
895 tmp_reg = ((reload_in_progress || reload_completed)
896 ? reg : gen_reg_rtx (Pmode));
898 if (function_label_operand (orig, mode))
900 /* Force function label into memory in word mode. */
901 orig = XEXP (force_const_mem (word_mode, orig), 0);
902 /* Load plabel address from DLT. */
903 emit_move_insn (tmp_reg,
904 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
905 gen_rtx_HIGH (word_mode, orig)));
906 pic_ref
907 = gen_const_mem (Pmode,
908 gen_rtx_LO_SUM (Pmode, tmp_reg,
909 gen_rtx_UNSPEC (Pmode,
910 gen_rtvec (1, orig),
911 UNSPEC_DLTIND14R)));
912 emit_move_insn (reg, pic_ref);
913 /* Now load address of function descriptor. */
914 pic_ref = gen_rtx_MEM (Pmode, reg);
916 else
918 /* Load symbol reference from DLT. */
919 emit_move_insn (tmp_reg,
920 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
921 gen_rtx_HIGH (word_mode, orig)));
922 pic_ref
923 = gen_const_mem (Pmode,
924 gen_rtx_LO_SUM (Pmode, tmp_reg,
925 gen_rtx_UNSPEC (Pmode,
926 gen_rtvec (1, orig),
927 UNSPEC_DLTIND14R)));
930 crtl->uses_pic_offset_table = 1;
931 mark_reg_pointer (reg, BITS_PER_UNIT);
932 insn = emit_move_insn (reg, pic_ref);
934 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
935 set_unique_reg_note (insn, REG_EQUAL, orig);
937 return reg;
939 else if (GET_CODE (orig) == CONST)
941 rtx base;
943 if (GET_CODE (XEXP (orig, 0)) == PLUS
944 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
945 return orig;
947 gcc_assert (reg);
948 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
950 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
951 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
952 base == reg ? 0 : reg);
954 if (GET_CODE (orig) == CONST_INT)
956 if (INT_14_BITS (orig))
957 return plus_constant (base, INTVAL (orig));
958 orig = force_reg (Pmode, orig);
960 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
961 /* Likewise, should we set special REG_NOTEs here? */
964 return pic_ref;
967 static GTY(()) rtx gen_tls_tga;
969 static rtx
970 gen_tls_get_addr (void)
972 if (!gen_tls_tga)
973 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
974 return gen_tls_tga;
977 static rtx
978 hppa_tls_call (rtx arg)
980 rtx ret;
982 ret = gen_reg_rtx (Pmode);
983 emit_library_call_value (gen_tls_get_addr (), ret,
984 LCT_CONST, Pmode, 1, arg, Pmode);
986 return ret;
989 static rtx
990 legitimize_tls_address (rtx addr)
992 rtx ret, insn, tmp, t1, t2, tp;
993 enum tls_model model = SYMBOL_REF_TLS_MODEL (addr);
995 switch (model)
997 case TLS_MODEL_GLOBAL_DYNAMIC:
998 tmp = gen_reg_rtx (Pmode);
999 if (flag_pic)
1000 emit_insn (gen_tgd_load_pic (tmp, addr));
1001 else
1002 emit_insn (gen_tgd_load (tmp, addr));
1003 ret = hppa_tls_call (tmp);
1004 break;
1006 case TLS_MODEL_LOCAL_DYNAMIC:
1007 ret = gen_reg_rtx (Pmode);
1008 tmp = gen_reg_rtx (Pmode);
1009 start_sequence ();
1010 if (flag_pic)
1011 emit_insn (gen_tld_load_pic (tmp, addr));
1012 else
1013 emit_insn (gen_tld_load (tmp, addr));
1014 t1 = hppa_tls_call (tmp);
1015 insn = get_insns ();
1016 end_sequence ();
1017 t2 = gen_reg_rtx (Pmode);
1018 emit_libcall_block (insn, t2, t1,
1019 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1020 UNSPEC_TLSLDBASE));
1021 emit_insn (gen_tld_offset_load (ret, addr, t2));
1022 break;
1024 case TLS_MODEL_INITIAL_EXEC:
1025 tp = gen_reg_rtx (Pmode);
1026 tmp = gen_reg_rtx (Pmode);
1027 ret = gen_reg_rtx (Pmode);
1028 emit_insn (gen_tp_load (tp));
1029 if (flag_pic)
1030 emit_insn (gen_tie_load_pic (tmp, addr));
1031 else
1032 emit_insn (gen_tie_load (tmp, addr));
1033 emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
1034 break;
1036 case TLS_MODEL_LOCAL_EXEC:
1037 tp = gen_reg_rtx (Pmode);
1038 ret = gen_reg_rtx (Pmode);
1039 emit_insn (gen_tp_load (tp));
1040 emit_insn (gen_tle_load (ret, addr, tp));
1041 break;
1043 default:
1044 gcc_unreachable ();
1047 return ret;
1050 /* Try machine-dependent ways of modifying an illegitimate address
1051 to be legitimate. If we find one, return the new, valid address.
1052 This macro is used in only one place: `memory_address' in explow.c.
1054 OLDX is the address as it was before break_out_memory_refs was called.
1055 In some cases it is useful to look at this to decide what needs to be done.
1057 It is always safe for this macro to do nothing. It exists to recognize
1058 opportunities to optimize the output.
1060 For the PA, transform:
1062 memory(X + <large int>)
1064 into:
1066 if (<large int> & mask) >= 16
1067 Y = (<large int> & ~mask) + mask + 1 Round up.
1068 else
1069 Y = (<large int> & ~mask) Round down.
1070 Z = X + Y
1071 memory (Z + (<large int> - Y));
1073 This is for CSE to find several similar references, and only use one Z.
1075 X can either be a SYMBOL_REF or REG, but because combine cannot
1076 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
1077 D will not fit in 14 bits.
1079 MODE_FLOAT references allow displacements which fit in 5 bits, so use
1080 0x1f as the mask.
1082 MODE_INT references allow displacements which fit in 14 bits, so use
1083 0x3fff as the mask.
1085 This relies on the fact that most mode MODE_FLOAT references will use FP
1086 registers and most mode MODE_INT references will use integer registers.
1087 (In the rare case of an FP register used in an integer MODE, we depend
1088 on secondary reloads to clean things up.)
1091 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
1092 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
1093 addressing modes to be used).
1095 Put X and Z into registers. Then put the entire expression into
1096 a register. */
1099 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1100 enum machine_mode mode)
1102 rtx orig = x;
1104 /* We need to canonicalize the order of operands in unscaled indexed
1105 addresses since the code that checks if an address is valid doesn't
1106 always try both orders. */
1107 if (!TARGET_NO_SPACE_REGS
1108 && GET_CODE (x) == PLUS
1109 && GET_MODE (x) == Pmode
1110 && REG_P (XEXP (x, 0))
1111 && REG_P (XEXP (x, 1))
1112 && REG_POINTER (XEXP (x, 0))
1113 && !REG_POINTER (XEXP (x, 1)))
1114 return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
1116 if (PA_SYMBOL_REF_TLS_P (x))
1117 return legitimize_tls_address (x);
1118 else if (flag_pic)
1119 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
1121 /* Strip off CONST. */
1122 if (GET_CODE (x) == CONST)
1123 x = XEXP (x, 0);
1125 /* Special case. Get the SYMBOL_REF into a register and use indexing.
1126 That should always be safe. */
1127 if (GET_CODE (x) == PLUS
1128 && GET_CODE (XEXP (x, 0)) == REG
1129 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
1131 rtx reg = force_reg (Pmode, XEXP (x, 1));
1132 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
1135 /* Note we must reject symbols which represent function addresses
1136 since the assembler/linker can't handle arithmetic on plabels. */
1137 if (GET_CODE (x) == PLUS
1138 && GET_CODE (XEXP (x, 1)) == CONST_INT
1139 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
1140 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
1141 || GET_CODE (XEXP (x, 0)) == REG))
1143 rtx int_part, ptr_reg;
1144 int newoffset;
1145 int offset = INTVAL (XEXP (x, 1));
1146 int mask;
1148 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
1149 ? (INT14_OK_STRICT ? 0x3fff : 0x1f) : 0x3fff);
1151 /* Choose which way to round the offset. Round up if we
1152 are >= halfway to the next boundary. */
1153 if ((offset & mask) >= ((mask + 1) / 2))
1154 newoffset = (offset & ~ mask) + mask + 1;
1155 else
1156 newoffset = (offset & ~ mask);
1158 /* If the newoffset will not fit in 14 bits (ldo), then
1159 handling this would take 4 or 5 instructions (2 to load
1160 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1161 add the new offset and the SYMBOL_REF.) Combine can
1162 not handle 4->2 or 5->2 combinations, so do not create
1163 them. */
1164 if (! VAL_14_BITS_P (newoffset)
1165 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1167 rtx const_part = plus_constant (XEXP (x, 0), newoffset);
1168 rtx tmp_reg
1169 = force_reg (Pmode,
1170 gen_rtx_HIGH (Pmode, const_part));
1171 ptr_reg
1172 = force_reg (Pmode,
1173 gen_rtx_LO_SUM (Pmode,
1174 tmp_reg, const_part));
1176 else
1178 if (! VAL_14_BITS_P (newoffset))
1179 int_part = force_reg (Pmode, GEN_INT (newoffset));
1180 else
1181 int_part = GEN_INT (newoffset);
1183 ptr_reg = force_reg (Pmode,
1184 gen_rtx_PLUS (Pmode,
1185 force_reg (Pmode, XEXP (x, 0)),
1186 int_part));
1188 return plus_constant (ptr_reg, offset - newoffset);
1191 /* Handle (plus (mult (a) (shadd_constant)) (b)). */
1193 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
1194 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1195 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
1196 && (OBJECT_P (XEXP (x, 1))
1197 || GET_CODE (XEXP (x, 1)) == SUBREG)
1198 && GET_CODE (XEXP (x, 1)) != CONST)
1200 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1201 rtx reg1, reg2;
1203 reg1 = XEXP (x, 1);
1204 if (GET_CODE (reg1) != REG)
1205 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1207 reg2 = XEXP (XEXP (x, 0), 0);
1208 if (GET_CODE (reg2) != REG)
1209 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1211 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1212 gen_rtx_MULT (Pmode,
1213 reg2,
1214 GEN_INT (val)),
1215 reg1));
1218 /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
1220 Only do so for floating point modes since this is more speculative
1221 and we lose if it's an integer store. */
1222 if (GET_CODE (x) == PLUS
1223 && GET_CODE (XEXP (x, 0)) == PLUS
1224 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
1225 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
1226 && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
1227 && (mode == SFmode || mode == DFmode))
1230 /* First, try and figure out what to use as a base register. */
1231 rtx reg1, reg2, base, idx;
1233 reg1 = XEXP (XEXP (x, 0), 1);
1234 reg2 = XEXP (x, 1);
1235 base = NULL_RTX;
1236 idx = NULL_RTX;
1238 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1239 then emit_move_sequence will turn on REG_POINTER so we'll know
1240 it's a base register below. */
1241 if (GET_CODE (reg1) != REG)
1242 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1244 if (GET_CODE (reg2) != REG)
1245 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1247 /* Figure out what the base and index are. */
1249 if (GET_CODE (reg1) == REG
1250 && REG_POINTER (reg1))
1252 base = reg1;
1253 idx = gen_rtx_PLUS (Pmode,
1254 gen_rtx_MULT (Pmode,
1255 XEXP (XEXP (XEXP (x, 0), 0), 0),
1256 XEXP (XEXP (XEXP (x, 0), 0), 1)),
1257 XEXP (x, 1));
1259 else if (GET_CODE (reg2) == REG
1260 && REG_POINTER (reg2))
1262 base = reg2;
1263 idx = XEXP (x, 0);
1266 if (base == 0)
1267 return orig;
1269 /* If the index adds a large constant, try to scale the
1270 constant so that it can be loaded with only one insn. */
1271 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1272 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1273 / INTVAL (XEXP (XEXP (idx, 0), 1)))
1274 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1276 /* Divide the CONST_INT by the scale factor, then add it to A. */
1277 int val = INTVAL (XEXP (idx, 1));
1279 val /= INTVAL (XEXP (XEXP (idx, 0), 1));
1280 reg1 = XEXP (XEXP (idx, 0), 0);
1281 if (GET_CODE (reg1) != REG)
1282 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1284 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1286 /* We can now generate a simple scaled indexed address. */
1287 return
1288 force_reg
1289 (Pmode, gen_rtx_PLUS (Pmode,
1290 gen_rtx_MULT (Pmode, reg1,
1291 XEXP (XEXP (idx, 0), 1)),
1292 base));
1295 /* If B + C is still a valid base register, then add them. */
1296 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1297 && INTVAL (XEXP (idx, 1)) <= 4096
1298 && INTVAL (XEXP (idx, 1)) >= -4096)
1300 int val = INTVAL (XEXP (XEXP (idx, 0), 1));
1301 rtx reg1, reg2;
1303 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1305 reg2 = XEXP (XEXP (idx, 0), 0);
1306 if (GET_CODE (reg2) != CONST_INT)
1307 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1309 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1310 gen_rtx_MULT (Pmode,
1311 reg2,
1312 GEN_INT (val)),
1313 reg1));
1316 /* Get the index into a register, then add the base + index and
1317 return a register holding the result. */
1319 /* First get A into a register. */
1320 reg1 = XEXP (XEXP (idx, 0), 0);
1321 if (GET_CODE (reg1) != REG)
1322 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1324 /* And get B into a register. */
1325 reg2 = XEXP (idx, 1);
1326 if (GET_CODE (reg2) != REG)
1327 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1329 reg1 = force_reg (Pmode,
1330 gen_rtx_PLUS (Pmode,
1331 gen_rtx_MULT (Pmode, reg1,
1332 XEXP (XEXP (idx, 0), 1)),
1333 reg2));
1335 /* Add the result to our base register and return. */
1336 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1340 /* Uh-oh. We might have an address for x[n-100000]. This needs
1341 special handling to avoid creating an indexed memory address
1342 with x-100000 as the base.
1344 If the constant part is small enough, then it's still safe because
1345 there is a guard page at the beginning and end of the data segment.
1347 Scaled references are common enough that we want to try and rearrange the
1348 terms so that we can use indexing for these addresses too. Only
1349 do the optimization for floatint point modes. */
1351 if (GET_CODE (x) == PLUS
1352 && symbolic_expression_p (XEXP (x, 1)))
1354 /* Ugly. We modify things here so that the address offset specified
1355 by the index expression is computed first, then added to x to form
1356 the entire address. */
1358 rtx regx1, regx2, regy1, regy2, y;
1360 /* Strip off any CONST. */
1361 y = XEXP (x, 1);
1362 if (GET_CODE (y) == CONST)
1363 y = XEXP (y, 0);
1365 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1367 /* See if this looks like
1368 (plus (mult (reg) (shadd_const))
1369 (const (plus (symbol_ref) (const_int))))
1371 Where const_int is small. In that case the const
1372 expression is a valid pointer for indexing.
1374 If const_int is big, but can be divided evenly by shadd_const
1375 and added to (reg). This allows more scaled indexed addresses. */
1376 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1377 && GET_CODE (XEXP (x, 0)) == MULT
1378 && GET_CODE (XEXP (y, 1)) == CONST_INT
1379 && INTVAL (XEXP (y, 1)) >= -4096
1380 && INTVAL (XEXP (y, 1)) <= 4095
1381 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1382 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1384 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1385 rtx reg1, reg2;
1387 reg1 = XEXP (x, 1);
1388 if (GET_CODE (reg1) != REG)
1389 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1391 reg2 = XEXP (XEXP (x, 0), 0);
1392 if (GET_CODE (reg2) != REG)
1393 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1395 return force_reg (Pmode,
1396 gen_rtx_PLUS (Pmode,
1397 gen_rtx_MULT (Pmode,
1398 reg2,
1399 GEN_INT (val)),
1400 reg1));
1402 else if ((mode == DFmode || mode == SFmode)
1403 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1404 && GET_CODE (XEXP (x, 0)) == MULT
1405 && GET_CODE (XEXP (y, 1)) == CONST_INT
1406 && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
1407 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1408 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1410 regx1
1411 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1412 / INTVAL (XEXP (XEXP (x, 0), 1))));
1413 regx2 = XEXP (XEXP (x, 0), 0);
1414 if (GET_CODE (regx2) != REG)
1415 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1416 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1417 regx2, regx1));
1418 return
1419 force_reg (Pmode,
1420 gen_rtx_PLUS (Pmode,
1421 gen_rtx_MULT (Pmode, regx2,
1422 XEXP (XEXP (x, 0), 1)),
1423 force_reg (Pmode, XEXP (y, 0))));
1425 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1426 && INTVAL (XEXP (y, 1)) >= -4096
1427 && INTVAL (XEXP (y, 1)) <= 4095)
1429 /* This is safe because of the guard page at the
1430 beginning and end of the data space. Just
1431 return the original address. */
1432 return orig;
1434 else
1436 /* Doesn't look like one we can optimize. */
1437 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1438 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1439 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1440 regx1 = force_reg (Pmode,
1441 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1442 regx1, regy2));
1443 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1448 return orig;
1451 /* Implement the TARGET_REGISTER_MOVE_COST hook.
1453 Compute extra cost of moving data between one register class
1454 and another.
1456 Make moves from SAR so expensive they should never happen. We used to
1457 have 0xffff here, but that generates overflow in rare cases.
1459 Copies involving a FP register and a non-FP register are relatively
1460 expensive because they must go through memory.
1462 Other copies are reasonably cheap. */
1464 static int
1465 hppa_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
1466 reg_class_t from, reg_class_t to)
1468 if (from == SHIFT_REGS)
1469 return 0x100;
1470 else if ((FP_REG_CLASS_P (from) && ! FP_REG_CLASS_P (to))
1471 || (FP_REG_CLASS_P (to) && ! FP_REG_CLASS_P (from)))
1472 return 16;
1473 else
1474 return 2;
1477 /* For the HPPA, REG and REG+CONST is cost 0
1478 and addresses involving symbolic constants are cost 2.
1480 PIC addresses are very expensive.
1482 It is no coincidence that this has the same structure
1483 as GO_IF_LEGITIMATE_ADDRESS. */
1485 static int
1486 hppa_address_cost (rtx X,
1487 bool speed ATTRIBUTE_UNUSED)
1489 switch (GET_CODE (X))
1491 case REG:
1492 case PLUS:
1493 case LO_SUM:
1494 return 1;
1495 case HIGH:
1496 return 2;
1497 default:
1498 return 4;
1502 /* Compute a (partial) cost for rtx X. Return true if the complete
1503 cost has been computed, and false if subexpressions should be
1504 scanned. In either case, *TOTAL contains the cost result. */
1506 static bool
1507 hppa_rtx_costs (rtx x, int code, int outer_code, int *total,
1508 bool speed ATTRIBUTE_UNUSED)
1510 switch (code)
1512 case CONST_INT:
1513 if (INTVAL (x) == 0)
1514 *total = 0;
1515 else if (INT_14_BITS (x))
1516 *total = 1;
1517 else
1518 *total = 2;
1519 return true;
1521 case HIGH:
1522 *total = 2;
1523 return true;
1525 case CONST:
1526 case LABEL_REF:
1527 case SYMBOL_REF:
1528 *total = 4;
1529 return true;
1531 case CONST_DOUBLE:
1532 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1533 && outer_code != SET)
1534 *total = 0;
1535 else
1536 *total = 8;
1537 return true;
1539 case MULT:
1540 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1541 *total = COSTS_N_INSNS (3);
1542 else if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1543 *total = COSTS_N_INSNS (8);
1544 else
1545 *total = COSTS_N_INSNS (20);
1546 return true;
1548 case DIV:
1549 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1551 *total = COSTS_N_INSNS (14);
1552 return true;
1554 /* FALLTHRU */
1556 case UDIV:
1557 case MOD:
1558 case UMOD:
1559 *total = COSTS_N_INSNS (60);
1560 return true;
1562 case PLUS: /* this includes shNadd insns */
1563 case MINUS:
1564 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1565 *total = COSTS_N_INSNS (3);
1566 else
1567 *total = COSTS_N_INSNS (1);
1568 return true;
1570 case ASHIFT:
1571 case ASHIFTRT:
1572 case LSHIFTRT:
1573 *total = COSTS_N_INSNS (1);
1574 return true;
1576 default:
1577 return false;
1581 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1582 new rtx with the correct mode. */
1583 static inline rtx
1584 force_mode (enum machine_mode mode, rtx orig)
1586 if (mode == GET_MODE (orig))
1587 return orig;
1589 gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
1591 return gen_rtx_REG (mode, REGNO (orig));
1594 /* Return 1 if *X is a thread-local symbol. */
1596 static int
1597 pa_tls_symbol_ref_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
1599 return PA_SYMBOL_REF_TLS_P (*x);
1602 /* Return 1 if X contains a thread-local symbol. */
1604 bool
1605 pa_tls_referenced_p (rtx x)
1607 if (!TARGET_HAVE_TLS)
1608 return false;
1610 return for_each_rtx (&x, &pa_tls_symbol_ref_1, 0);
1613 /* Emit insns to move operands[1] into operands[0].
1615 Return 1 if we have written out everything that needs to be done to
1616 do the move. Otherwise, return 0 and the caller will emit the move
1617 normally.
1619 Note SCRATCH_REG may not be in the proper mode depending on how it
1620 will be used. This routine is responsible for creating a new copy
1621 of SCRATCH_REG in the proper mode. */
1624 emit_move_sequence (rtx *operands, enum machine_mode mode, rtx scratch_reg)
1626 register rtx operand0 = operands[0];
1627 register rtx operand1 = operands[1];
1628 register rtx tem;
1630 /* We can only handle indexed addresses in the destination operand
1631 of floating point stores. Thus, we need to break out indexed
1632 addresses from the destination operand. */
1633 if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1635 gcc_assert (can_create_pseudo_p ());
1637 tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1638 operand0 = replace_equiv_address (operand0, tem);
1641 /* On targets with non-equivalent space registers, break out unscaled
1642 indexed addresses from the source operand before the final CSE.
1643 We have to do this because the REG_POINTER flag is not correctly
1644 carried through various optimization passes and CSE may substitute
1645 a pseudo without the pointer set for one with the pointer set. As
1646 a result, we loose various opportunities to create insns with
1647 unscaled indexed addresses. */
1648 if (!TARGET_NO_SPACE_REGS
1649 && !cse_not_expected
1650 && GET_CODE (operand1) == MEM
1651 && GET_CODE (XEXP (operand1, 0)) == PLUS
1652 && REG_P (XEXP (XEXP (operand1, 0), 0))
1653 && REG_P (XEXP (XEXP (operand1, 0), 1)))
1654 operand1
1655 = replace_equiv_address (operand1,
1656 copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1658 if (scratch_reg
1659 && reload_in_progress && GET_CODE (operand0) == REG
1660 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1661 operand0 = reg_equiv_mem[REGNO (operand0)];
1662 else if (scratch_reg
1663 && reload_in_progress && GET_CODE (operand0) == SUBREG
1664 && GET_CODE (SUBREG_REG (operand0)) == REG
1665 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1667 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1668 the code which tracks sets/uses for delete_output_reload. */
1669 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1670 reg_equiv_mem [REGNO (SUBREG_REG (operand0))],
1671 SUBREG_BYTE (operand0));
1672 operand0 = alter_subreg (&temp);
1675 if (scratch_reg
1676 && reload_in_progress && GET_CODE (operand1) == REG
1677 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1678 operand1 = reg_equiv_mem[REGNO (operand1)];
1679 else if (scratch_reg
1680 && reload_in_progress && GET_CODE (operand1) == SUBREG
1681 && GET_CODE (SUBREG_REG (operand1)) == REG
1682 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1684 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1685 the code which tracks sets/uses for delete_output_reload. */
1686 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1687 reg_equiv_mem [REGNO (SUBREG_REG (operand1))],
1688 SUBREG_BYTE (operand1));
1689 operand1 = alter_subreg (&temp);
1692 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1693 && ((tem = find_replacement (&XEXP (operand0, 0)))
1694 != XEXP (operand0, 0)))
1695 operand0 = replace_equiv_address (operand0, tem);
1697 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1698 && ((tem = find_replacement (&XEXP (operand1, 0)))
1699 != XEXP (operand1, 0)))
1700 operand1 = replace_equiv_address (operand1, tem);
1702 /* Handle secondary reloads for loads/stores of FP registers from
1703 REG+D addresses where D does not fit in 5 or 14 bits, including
1704 (subreg (mem (addr))) cases. */
1705 if (scratch_reg
1706 && fp_reg_operand (operand0, mode)
1707 && ((GET_CODE (operand1) == MEM
1708 && !memory_address_p ((GET_MODE_SIZE (mode) == 4 ? SFmode : DFmode),
1709 XEXP (operand1, 0)))
1710 || ((GET_CODE (operand1) == SUBREG
1711 && GET_CODE (XEXP (operand1, 0)) == MEM
1712 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1713 ? SFmode : DFmode),
1714 XEXP (XEXP (operand1, 0), 0))))))
1716 if (GET_CODE (operand1) == SUBREG)
1717 operand1 = XEXP (operand1, 0);
1719 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1720 it in WORD_MODE regardless of what mode it was originally given
1721 to us. */
1722 scratch_reg = force_mode (word_mode, scratch_reg);
1724 /* D might not fit in 14 bits either; for such cases load D into
1725 scratch reg. */
1726 if (!memory_address_p (Pmode, XEXP (operand1, 0)))
1728 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1729 emit_move_insn (scratch_reg,
1730 gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
1731 Pmode,
1732 XEXP (XEXP (operand1, 0), 0),
1733 scratch_reg));
1735 else
1736 emit_move_insn (scratch_reg, XEXP (operand1, 0));
1737 emit_insn (gen_rtx_SET (VOIDmode, operand0,
1738 replace_equiv_address (operand1, scratch_reg)));
1739 return 1;
1741 else if (scratch_reg
1742 && fp_reg_operand (operand1, mode)
1743 && ((GET_CODE (operand0) == MEM
1744 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1745 ? SFmode : DFmode),
1746 XEXP (operand0, 0)))
1747 || ((GET_CODE (operand0) == SUBREG)
1748 && GET_CODE (XEXP (operand0, 0)) == MEM
1749 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1750 ? SFmode : DFmode),
1751 XEXP (XEXP (operand0, 0), 0)))))
1753 if (GET_CODE (operand0) == SUBREG)
1754 operand0 = XEXP (operand0, 0);
1756 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1757 it in WORD_MODE regardless of what mode it was originally given
1758 to us. */
1759 scratch_reg = force_mode (word_mode, scratch_reg);
1761 /* D might not fit in 14 bits either; for such cases load D into
1762 scratch reg. */
1763 if (!memory_address_p (Pmode, XEXP (operand0, 0)))
1765 emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
1766 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0,
1767 0)),
1768 Pmode,
1769 XEXP (XEXP (operand0, 0),
1771 scratch_reg));
1773 else
1774 emit_move_insn (scratch_reg, XEXP (operand0, 0));
1775 emit_insn (gen_rtx_SET (VOIDmode,
1776 replace_equiv_address (operand0, scratch_reg),
1777 operand1));
1778 return 1;
1780 /* Handle secondary reloads for loads of FP registers from constant
1781 expressions by forcing the constant into memory.
1783 Use scratch_reg to hold the address of the memory location.
1785 The proper fix is to change TARGET_PREFERRED_RELOAD_CLASS to return
1786 NO_REGS when presented with a const_int and a register class
1787 containing only FP registers. Doing so unfortunately creates
1788 more problems than it solves. Fix this for 2.5. */
1789 else if (scratch_reg
1790 && CONSTANT_P (operand1)
1791 && fp_reg_operand (operand0, mode))
1793 rtx const_mem, xoperands[2];
1795 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1796 it in WORD_MODE regardless of what mode it was originally given
1797 to us. */
1798 scratch_reg = force_mode (word_mode, scratch_reg);
1800 /* Force the constant into memory and put the address of the
1801 memory location into scratch_reg. */
1802 const_mem = force_const_mem (mode, operand1);
1803 xoperands[0] = scratch_reg;
1804 xoperands[1] = XEXP (const_mem, 0);
1805 emit_move_sequence (xoperands, Pmode, 0);
1807 /* Now load the destination register. */
1808 emit_insn (gen_rtx_SET (mode, operand0,
1809 replace_equiv_address (const_mem, scratch_reg)));
1810 return 1;
1812 /* Handle secondary reloads for SAR. These occur when trying to load
1813 the SAR from memory, FP register, or with a constant. */
1814 else if (scratch_reg
1815 && GET_CODE (operand0) == REG
1816 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1817 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1818 && (GET_CODE (operand1) == MEM
1819 || GET_CODE (operand1) == CONST_INT
1820 || (GET_CODE (operand1) == REG
1821 && FP_REG_CLASS_P (REGNO_REG_CLASS (REGNO (operand1))))))
1823 /* D might not fit in 14 bits either; for such cases load D into
1824 scratch reg. */
1825 if (GET_CODE (operand1) == MEM
1826 && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0)))
1828 /* We are reloading the address into the scratch register, so we
1829 want to make sure the scratch register is a full register. */
1830 scratch_reg = force_mode (word_mode, scratch_reg);
1832 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1833 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1834 0)),
1835 Pmode,
1836 XEXP (XEXP (operand1, 0),
1838 scratch_reg));
1840 /* Now we are going to load the scratch register from memory,
1841 we want to load it in the same width as the original MEM,
1842 which must be the same as the width of the ultimate destination,
1843 OPERAND0. */
1844 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1846 emit_move_insn (scratch_reg,
1847 replace_equiv_address (operand1, scratch_reg));
1849 else
1851 /* We want to load the scratch register using the same mode as
1852 the ultimate destination. */
1853 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1855 emit_move_insn (scratch_reg, operand1);
1858 /* And emit the insn to set the ultimate destination. We know that
1859 the scratch register has the same mode as the destination at this
1860 point. */
1861 emit_move_insn (operand0, scratch_reg);
1862 return 1;
1864 /* Handle the most common case: storing into a register. */
1865 else if (register_operand (operand0, mode))
1867 if (register_operand (operand1, mode)
1868 || (GET_CODE (operand1) == CONST_INT
1869 && cint_ok_for_move (INTVAL (operand1)))
1870 || (operand1 == CONST0_RTX (mode))
1871 || (GET_CODE (operand1) == HIGH
1872 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1873 /* Only `general_operands' can come here, so MEM is ok. */
1874 || GET_CODE (operand1) == MEM)
1876 /* Various sets are created during RTL generation which don't
1877 have the REG_POINTER flag correctly set. After the CSE pass,
1878 instruction recognition can fail if we don't consistently
1879 set this flag when performing register copies. This should
1880 also improve the opportunities for creating insns that use
1881 unscaled indexing. */
1882 if (REG_P (operand0) && REG_P (operand1))
1884 if (REG_POINTER (operand1)
1885 && !REG_POINTER (operand0)
1886 && !HARD_REGISTER_P (operand0))
1887 copy_reg_pointer (operand0, operand1);
1890 /* When MEMs are broken out, the REG_POINTER flag doesn't
1891 get set. In some cases, we can set the REG_POINTER flag
1892 from the declaration for the MEM. */
1893 if (REG_P (operand0)
1894 && GET_CODE (operand1) == MEM
1895 && !REG_POINTER (operand0))
1897 tree decl = MEM_EXPR (operand1);
1899 /* Set the register pointer flag and register alignment
1900 if the declaration for this memory reference is a
1901 pointer type. */
1902 if (decl)
1904 tree type;
1906 /* If this is a COMPONENT_REF, use the FIELD_DECL from
1907 tree operand 1. */
1908 if (TREE_CODE (decl) == COMPONENT_REF)
1909 decl = TREE_OPERAND (decl, 1);
1911 type = TREE_TYPE (decl);
1912 type = strip_array_types (type);
1914 if (POINTER_TYPE_P (type))
1916 int align;
1918 type = TREE_TYPE (type);
1919 /* Using TYPE_ALIGN_OK is rather conservative as
1920 only the ada frontend actually sets it. */
1921 align = (TYPE_ALIGN_OK (type) ? TYPE_ALIGN (type)
1922 : BITS_PER_UNIT);
1923 mark_reg_pointer (operand0, align);
1928 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1929 return 1;
1932 else if (GET_CODE (operand0) == MEM)
1934 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1935 && !(reload_in_progress || reload_completed))
1937 rtx temp = gen_reg_rtx (DFmode);
1939 emit_insn (gen_rtx_SET (VOIDmode, temp, operand1));
1940 emit_insn (gen_rtx_SET (VOIDmode, operand0, temp));
1941 return 1;
1943 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1945 /* Run this case quickly. */
1946 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1947 return 1;
1949 if (! (reload_in_progress || reload_completed))
1951 operands[0] = validize_mem (operand0);
1952 operands[1] = operand1 = force_reg (mode, operand1);
1956 /* Simplify the source if we need to.
1957 Note we do have to handle function labels here, even though we do
1958 not consider them legitimate constants. Loop optimizations can
1959 call the emit_move_xxx with one as a source. */
1960 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1961 || function_label_operand (operand1, mode)
1962 || (GET_CODE (operand1) == HIGH
1963 && symbolic_operand (XEXP (operand1, 0), mode)))
1965 int ishighonly = 0;
1967 if (GET_CODE (operand1) == HIGH)
1969 ishighonly = 1;
1970 operand1 = XEXP (operand1, 0);
1972 if (symbolic_operand (operand1, mode))
1974 /* Argh. The assembler and linker can't handle arithmetic
1975 involving plabels.
1977 So we force the plabel into memory, load operand0 from
1978 the memory location, then add in the constant part. */
1979 if ((GET_CODE (operand1) == CONST
1980 && GET_CODE (XEXP (operand1, 0)) == PLUS
1981 && function_label_operand (XEXP (XEXP (operand1, 0), 0), Pmode))
1982 || function_label_operand (operand1, mode))
1984 rtx temp, const_part;
1986 /* Figure out what (if any) scratch register to use. */
1987 if (reload_in_progress || reload_completed)
1989 scratch_reg = scratch_reg ? scratch_reg : operand0;
1990 /* SCRATCH_REG will hold an address and maybe the actual
1991 data. We want it in WORD_MODE regardless of what mode it
1992 was originally given to us. */
1993 scratch_reg = force_mode (word_mode, scratch_reg);
1995 else if (flag_pic)
1996 scratch_reg = gen_reg_rtx (Pmode);
1998 if (GET_CODE (operand1) == CONST)
2000 /* Save away the constant part of the expression. */
2001 const_part = XEXP (XEXP (operand1, 0), 1);
2002 gcc_assert (GET_CODE (const_part) == CONST_INT);
2004 /* Force the function label into memory. */
2005 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
2007 else
2009 /* No constant part. */
2010 const_part = NULL_RTX;
2012 /* Force the function label into memory. */
2013 temp = force_const_mem (mode, operand1);
2017 /* Get the address of the memory location. PIC-ify it if
2018 necessary. */
2019 temp = XEXP (temp, 0);
2020 if (flag_pic)
2021 temp = legitimize_pic_address (temp, mode, scratch_reg);
2023 /* Put the address of the memory location into our destination
2024 register. */
2025 operands[1] = temp;
2026 emit_move_sequence (operands, mode, scratch_reg);
2028 /* Now load from the memory location into our destination
2029 register. */
2030 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
2031 emit_move_sequence (operands, mode, scratch_reg);
2033 /* And add back in the constant part. */
2034 if (const_part != NULL_RTX)
2035 expand_inc (operand0, const_part);
2037 return 1;
2040 if (flag_pic)
2042 rtx temp;
2044 if (reload_in_progress || reload_completed)
2046 temp = scratch_reg ? scratch_reg : operand0;
2047 /* TEMP will hold an address and maybe the actual
2048 data. We want it in WORD_MODE regardless of what mode it
2049 was originally given to us. */
2050 temp = force_mode (word_mode, temp);
2052 else
2053 temp = gen_reg_rtx (Pmode);
2055 /* (const (plus (symbol) (const_int))) must be forced to
2056 memory during/after reload if the const_int will not fit
2057 in 14 bits. */
2058 if (GET_CODE (operand1) == CONST
2059 && GET_CODE (XEXP (operand1, 0)) == PLUS
2060 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
2061 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
2062 && (reload_completed || reload_in_progress)
2063 && flag_pic)
2065 rtx const_mem = force_const_mem (mode, operand1);
2066 operands[1] = legitimize_pic_address (XEXP (const_mem, 0),
2067 mode, temp);
2068 operands[1] = replace_equiv_address (const_mem, operands[1]);
2069 emit_move_sequence (operands, mode, temp);
2071 else
2073 operands[1] = legitimize_pic_address (operand1, mode, temp);
2074 if (REG_P (operand0) && REG_P (operands[1]))
2075 copy_reg_pointer (operand0, operands[1]);
2076 emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1]));
2079 /* On the HPPA, references to data space are supposed to use dp,
2080 register 27, but showing it in the RTL inhibits various cse
2081 and loop optimizations. */
2082 else
2084 rtx temp, set;
2086 if (reload_in_progress || reload_completed)
2088 temp = scratch_reg ? scratch_reg : operand0;
2089 /* TEMP will hold an address and maybe the actual
2090 data. We want it in WORD_MODE regardless of what mode it
2091 was originally given to us. */
2092 temp = force_mode (word_mode, temp);
2094 else
2095 temp = gen_reg_rtx (mode);
2097 /* Loading a SYMBOL_REF into a register makes that register
2098 safe to be used as the base in an indexed address.
2100 Don't mark hard registers though. That loses. */
2101 if (GET_CODE (operand0) == REG
2102 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
2103 mark_reg_pointer (operand0, BITS_PER_UNIT);
2104 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
2105 mark_reg_pointer (temp, BITS_PER_UNIT);
2107 if (ishighonly)
2108 set = gen_rtx_SET (mode, operand0, temp);
2109 else
2110 set = gen_rtx_SET (VOIDmode,
2111 operand0,
2112 gen_rtx_LO_SUM (mode, temp, operand1));
2114 emit_insn (gen_rtx_SET (VOIDmode,
2115 temp,
2116 gen_rtx_HIGH (mode, operand1)));
2117 emit_insn (set);
2120 return 1;
2122 else if (pa_tls_referenced_p (operand1))
2124 rtx tmp = operand1;
2125 rtx addend = NULL;
2127 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
2129 addend = XEXP (XEXP (tmp, 0), 1);
2130 tmp = XEXP (XEXP (tmp, 0), 0);
2133 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
2134 tmp = legitimize_tls_address (tmp);
2135 if (addend)
2137 tmp = gen_rtx_PLUS (mode, tmp, addend);
2138 tmp = force_operand (tmp, operands[0]);
2140 operands[1] = tmp;
2142 else if (GET_CODE (operand1) != CONST_INT
2143 || !cint_ok_for_move (INTVAL (operand1)))
2145 rtx insn, temp;
2146 rtx op1 = operand1;
2147 HOST_WIDE_INT value = 0;
2148 HOST_WIDE_INT insv = 0;
2149 int insert = 0;
2151 if (GET_CODE (operand1) == CONST_INT)
2152 value = INTVAL (operand1);
2154 if (TARGET_64BIT
2155 && GET_CODE (operand1) == CONST_INT
2156 && HOST_BITS_PER_WIDE_INT > 32
2157 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
2159 HOST_WIDE_INT nval;
2161 /* Extract the low order 32 bits of the value and sign extend.
2162 If the new value is the same as the original value, we can
2163 can use the original value as-is. If the new value is
2164 different, we use it and insert the most-significant 32-bits
2165 of the original value into the final result. */
2166 nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
2167 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
2168 if (value != nval)
2170 #if HOST_BITS_PER_WIDE_INT > 32
2171 insv = value >= 0 ? value >> 32 : ~(~value >> 32);
2172 #endif
2173 insert = 1;
2174 value = nval;
2175 operand1 = GEN_INT (nval);
2179 if (reload_in_progress || reload_completed)
2180 temp = scratch_reg ? scratch_reg : operand0;
2181 else
2182 temp = gen_reg_rtx (mode);
2184 /* We don't directly split DImode constants on 32-bit targets
2185 because PLUS uses an 11-bit immediate and the insn sequence
2186 generated is not as efficient as the one using HIGH/LO_SUM. */
2187 if (GET_CODE (operand1) == CONST_INT
2188 && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
2189 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
2190 && !insert)
2192 /* Directly break constant into high and low parts. This
2193 provides better optimization opportunities because various
2194 passes recognize constants split with PLUS but not LO_SUM.
2195 We use a 14-bit signed low part except when the addition
2196 of 0x4000 to the high part might change the sign of the
2197 high part. */
2198 HOST_WIDE_INT low = value & 0x3fff;
2199 HOST_WIDE_INT high = value & ~ 0x3fff;
2201 if (low >= 0x2000)
2203 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
2204 high += 0x2000;
2205 else
2206 high += 0x4000;
2209 low = value - high;
2211 emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (high)));
2212 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
2214 else
2216 emit_insn (gen_rtx_SET (VOIDmode, temp,
2217 gen_rtx_HIGH (mode, operand1)));
2218 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
2221 insn = emit_move_insn (operands[0], operands[1]);
2223 /* Now insert the most significant 32 bits of the value
2224 into the register. When we don't have a second register
2225 available, it could take up to nine instructions to load
2226 a 64-bit integer constant. Prior to reload, we force
2227 constants that would take more than three instructions
2228 to load to the constant pool. During and after reload,
2229 we have to handle all possible values. */
2230 if (insert)
2232 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2233 register and the value to be inserted is outside the
2234 range that can be loaded with three depdi instructions. */
2235 if (temp != operand0 && (insv >= 16384 || insv < -16384))
2237 operand1 = GEN_INT (insv);
2239 emit_insn (gen_rtx_SET (VOIDmode, temp,
2240 gen_rtx_HIGH (mode, operand1)));
2241 emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
2242 emit_insn (gen_insv (operand0, GEN_INT (32),
2243 const0_rtx, temp));
2245 else
2247 int len = 5, pos = 27;
2249 /* Insert the bits using the depdi instruction. */
2250 while (pos >= 0)
2252 HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2253 HOST_WIDE_INT sign = v5 < 0;
2255 /* Left extend the insertion. */
2256 insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2257 while (pos > 0 && (insv & 1) == sign)
2259 insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2260 len += 1;
2261 pos -= 1;
2264 emit_insn (gen_insv (operand0, GEN_INT (len),
2265 GEN_INT (pos), GEN_INT (v5)));
2267 len = pos > 0 && pos < 5 ? pos : 5;
2268 pos -= len;
2273 set_unique_reg_note (insn, REG_EQUAL, op1);
2275 return 1;
2278 /* Now have insn-emit do whatever it normally does. */
2279 return 0;
2282 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2283 it will need a link/runtime reloc). */
2286 reloc_needed (tree exp)
2288 int reloc = 0;
2290 switch (TREE_CODE (exp))
2292 case ADDR_EXPR:
2293 return 1;
2295 case POINTER_PLUS_EXPR:
2296 case PLUS_EXPR:
2297 case MINUS_EXPR:
2298 reloc = reloc_needed (TREE_OPERAND (exp, 0));
2299 reloc |= reloc_needed (TREE_OPERAND (exp, 1));
2300 break;
2302 CASE_CONVERT:
2303 case NON_LVALUE_EXPR:
2304 reloc = reloc_needed (TREE_OPERAND (exp, 0));
2305 break;
2307 case CONSTRUCTOR:
2309 tree value;
2310 unsigned HOST_WIDE_INT ix;
2312 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2313 if (value)
2314 reloc |= reloc_needed (value);
2316 break;
2318 case ERROR_MARK:
2319 break;
2321 default:
2322 break;
2324 return reloc;
2327 /* Does operand (which is a symbolic_operand) live in text space?
2328 If so, SYMBOL_REF_FLAG, which is set by pa_encode_section_info,
2329 will be true. */
2332 read_only_operand (rtx operand, enum machine_mode mode ATTRIBUTE_UNUSED)
2334 if (GET_CODE (operand) == CONST)
2335 operand = XEXP (XEXP (operand, 0), 0);
2336 if (flag_pic)
2338 if (GET_CODE (operand) == SYMBOL_REF)
2339 return SYMBOL_REF_FLAG (operand) && !CONSTANT_POOL_ADDRESS_P (operand);
2341 else
2343 if (GET_CODE (operand) == SYMBOL_REF)
2344 return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand);
2346 return 1;
2350 /* Return the best assembler insn template
2351 for moving operands[1] into operands[0] as a fullword. */
2352 const char *
2353 singlemove_string (rtx *operands)
2355 HOST_WIDE_INT intval;
2357 if (GET_CODE (operands[0]) == MEM)
2358 return "stw %r1,%0";
2359 if (GET_CODE (operands[1]) == MEM)
2360 return "ldw %1,%0";
2361 if (GET_CODE (operands[1]) == CONST_DOUBLE)
2363 long i;
2364 REAL_VALUE_TYPE d;
2366 gcc_assert (GET_MODE (operands[1]) == SFmode);
2368 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2369 bit pattern. */
2370 REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
2371 REAL_VALUE_TO_TARGET_SINGLE (d, i);
2373 operands[1] = GEN_INT (i);
2374 /* Fall through to CONST_INT case. */
2376 if (GET_CODE (operands[1]) == CONST_INT)
2378 intval = INTVAL (operands[1]);
2380 if (VAL_14_BITS_P (intval))
2381 return "ldi %1,%0";
2382 else if ((intval & 0x7ff) == 0)
2383 return "ldil L'%1,%0";
2384 else if (zdepi_cint_p (intval))
2385 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2386 else
2387 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2389 return "copy %1,%0";
2393 /* Compute position (in OP[1]) and width (in OP[2])
2394 useful for copying IMM to a register using the zdepi
2395 instructions. Store the immediate value to insert in OP[0]. */
2396 static void
2397 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2399 int lsb, len;
2401 /* Find the least significant set bit in IMM. */
2402 for (lsb = 0; lsb < 32; lsb++)
2404 if ((imm & 1) != 0)
2405 break;
2406 imm >>= 1;
2409 /* Choose variants based on *sign* of the 5-bit field. */
2410 if ((imm & 0x10) == 0)
2411 len = (lsb <= 28) ? 4 : 32 - lsb;
2412 else
2414 /* Find the width of the bitstring in IMM. */
2415 for (len = 5; len < 32 - lsb; len++)
2417 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2418 break;
2421 /* Sign extend IMM as a 5-bit value. */
2422 imm = (imm & 0xf) - 0x10;
2425 op[0] = imm;
2426 op[1] = 31 - lsb;
2427 op[2] = len;
2430 /* Compute position (in OP[1]) and width (in OP[2])
2431 useful for copying IMM to a register using the depdi,z
2432 instructions. Store the immediate value to insert in OP[0]. */
2433 void
2434 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2436 int lsb, len, maxlen;
2438 maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64);
2440 /* Find the least significant set bit in IMM. */
2441 for (lsb = 0; lsb < maxlen; lsb++)
2443 if ((imm & 1) != 0)
2444 break;
2445 imm >>= 1;
2448 /* Choose variants based on *sign* of the 5-bit field. */
2449 if ((imm & 0x10) == 0)
2450 len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb;
2451 else
2453 /* Find the width of the bitstring in IMM. */
2454 for (len = 5; len < maxlen - lsb; len++)
2456 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2457 break;
2460 /* Extend length if host is narrow and IMM is negative. */
2461 if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb)
2462 len += 32;
2464 /* Sign extend IMM as a 5-bit value. */
2465 imm = (imm & 0xf) - 0x10;
2468 op[0] = imm;
2469 op[1] = 63 - lsb;
2470 op[2] = len;
2473 /* Output assembler code to perform a doubleword move insn
2474 with operands OPERANDS. */
2476 const char *
2477 output_move_double (rtx *operands)
2479 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2480 rtx latehalf[2];
2481 rtx addreg0 = 0, addreg1 = 0;
2483 /* First classify both operands. */
2485 if (REG_P (operands[0]))
2486 optype0 = REGOP;
2487 else if (offsettable_memref_p (operands[0]))
2488 optype0 = OFFSOP;
2489 else if (GET_CODE (operands[0]) == MEM)
2490 optype0 = MEMOP;
2491 else
2492 optype0 = RNDOP;
2494 if (REG_P (operands[1]))
2495 optype1 = REGOP;
2496 else if (CONSTANT_P (operands[1]))
2497 optype1 = CNSTOP;
2498 else if (offsettable_memref_p (operands[1]))
2499 optype1 = OFFSOP;
2500 else if (GET_CODE (operands[1]) == MEM)
2501 optype1 = MEMOP;
2502 else
2503 optype1 = RNDOP;
2505 /* Check for the cases that the operand constraints are not
2506 supposed to allow to happen. */
2507 gcc_assert (optype0 == REGOP || optype1 == REGOP);
2509 /* Handle copies between general and floating registers. */
2511 if (optype0 == REGOP && optype1 == REGOP
2512 && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2514 if (FP_REG_P (operands[0]))
2516 output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2517 output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2518 return "{fldds|fldd} -16(%%sp),%0";
2520 else
2522 output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2523 output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2524 return "{ldws|ldw} -12(%%sp),%R0";
2528 /* Handle auto decrementing and incrementing loads and stores
2529 specifically, since the structure of the function doesn't work
2530 for them without major modification. Do it better when we learn
2531 this port about the general inc/dec addressing of PA.
2532 (This was written by tege. Chide him if it doesn't work.) */
2534 if (optype0 == MEMOP)
2536 /* We have to output the address syntax ourselves, since print_operand
2537 doesn't deal with the addresses we want to use. Fix this later. */
2539 rtx addr = XEXP (operands[0], 0);
2540 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2542 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2544 operands[0] = XEXP (addr, 0);
2545 gcc_assert (GET_CODE (operands[1]) == REG
2546 && GET_CODE (operands[0]) == REG);
2548 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2550 /* No overlap between high target register and address
2551 register. (We do this in a non-obvious way to
2552 save a register file writeback) */
2553 if (GET_CODE (addr) == POST_INC)
2554 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2555 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2557 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2559 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2561 operands[0] = XEXP (addr, 0);
2562 gcc_assert (GET_CODE (operands[1]) == REG
2563 && GET_CODE (operands[0]) == REG);
2565 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2566 /* No overlap between high target register and address
2567 register. (We do this in a non-obvious way to save a
2568 register file writeback) */
2569 if (GET_CODE (addr) == PRE_INC)
2570 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2571 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2574 if (optype1 == MEMOP)
2576 /* We have to output the address syntax ourselves, since print_operand
2577 doesn't deal with the addresses we want to use. Fix this later. */
2579 rtx addr = XEXP (operands[1], 0);
2580 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2582 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2584 operands[1] = XEXP (addr, 0);
2585 gcc_assert (GET_CODE (operands[0]) == REG
2586 && GET_CODE (operands[1]) == REG);
2588 if (!reg_overlap_mentioned_p (high_reg, addr))
2590 /* No overlap between high target register and address
2591 register. (We do this in a non-obvious way to
2592 save a register file writeback) */
2593 if (GET_CODE (addr) == POST_INC)
2594 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2595 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2597 else
2599 /* This is an undefined situation. We should load into the
2600 address register *and* update that register. Probably
2601 we don't need to handle this at all. */
2602 if (GET_CODE (addr) == POST_INC)
2603 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2604 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2607 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2609 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2611 operands[1] = XEXP (addr, 0);
2612 gcc_assert (GET_CODE (operands[0]) == REG
2613 && GET_CODE (operands[1]) == REG);
2615 if (!reg_overlap_mentioned_p (high_reg, addr))
2617 /* No overlap between high target register and address
2618 register. (We do this in a non-obvious way to
2619 save a register file writeback) */
2620 if (GET_CODE (addr) == PRE_INC)
2621 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2622 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2624 else
2626 /* This is an undefined situation. We should load into the
2627 address register *and* update that register. Probably
2628 we don't need to handle this at all. */
2629 if (GET_CODE (addr) == PRE_INC)
2630 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2631 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2634 else if (GET_CODE (addr) == PLUS
2635 && GET_CODE (XEXP (addr, 0)) == MULT)
2637 rtx xoperands[4];
2638 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2640 if (!reg_overlap_mentioned_p (high_reg, addr))
2642 xoperands[0] = high_reg;
2643 xoperands[1] = XEXP (addr, 1);
2644 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2645 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2646 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2647 xoperands);
2648 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2650 else
2652 xoperands[0] = high_reg;
2653 xoperands[1] = XEXP (addr, 1);
2654 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2655 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2656 output_asm_insn ("{sh%O3addl %2,%1,%R0|shladd,l %2,%O3,%1,%R0}",
2657 xoperands);
2658 return "ldw 0(%R0),%0\n\tldw 4(%R0),%R0";
2663 /* If an operand is an unoffsettable memory ref, find a register
2664 we can increment temporarily to make it refer to the second word. */
2666 if (optype0 == MEMOP)
2667 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2669 if (optype1 == MEMOP)
2670 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2672 /* Ok, we can do one word at a time.
2673 Normally we do the low-numbered word first.
2675 In either case, set up in LATEHALF the operands to use
2676 for the high-numbered word and in some cases alter the
2677 operands in OPERANDS to be suitable for the low-numbered word. */
2679 if (optype0 == REGOP)
2680 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2681 else if (optype0 == OFFSOP)
2682 latehalf[0] = adjust_address (operands[0], SImode, 4);
2683 else
2684 latehalf[0] = operands[0];
2686 if (optype1 == REGOP)
2687 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2688 else if (optype1 == OFFSOP)
2689 latehalf[1] = adjust_address (operands[1], SImode, 4);
2690 else if (optype1 == CNSTOP)
2691 split_double (operands[1], &operands[1], &latehalf[1]);
2692 else
2693 latehalf[1] = operands[1];
2695 /* If the first move would clobber the source of the second one,
2696 do them in the other order.
2698 This can happen in two cases:
2700 mem -> register where the first half of the destination register
2701 is the same register used in the memory's address. Reload
2702 can create such insns.
2704 mem in this case will be either register indirect or register
2705 indirect plus a valid offset.
2707 register -> register move where REGNO(dst) == REGNO(src + 1)
2708 someone (Tim/Tege?) claimed this can happen for parameter loads.
2710 Handle mem -> register case first. */
2711 if (optype0 == REGOP
2712 && (optype1 == MEMOP || optype1 == OFFSOP)
2713 && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
2714 operands[1], 0))
2716 /* Do the late half first. */
2717 if (addreg1)
2718 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2719 output_asm_insn (singlemove_string (latehalf), latehalf);
2721 /* Then clobber. */
2722 if (addreg1)
2723 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2724 return singlemove_string (operands);
2727 /* Now handle register -> register case. */
2728 if (optype0 == REGOP && optype1 == REGOP
2729 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2731 output_asm_insn (singlemove_string (latehalf), latehalf);
2732 return singlemove_string (operands);
2735 /* Normal case: do the two words, low-numbered first. */
2737 output_asm_insn (singlemove_string (operands), operands);
2739 /* Make any unoffsettable addresses point at high-numbered word. */
2740 if (addreg0)
2741 output_asm_insn ("ldo 4(%0),%0", &addreg0);
2742 if (addreg1)
2743 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2745 /* Do that word. */
2746 output_asm_insn (singlemove_string (latehalf), latehalf);
2748 /* Undo the adds we just did. */
2749 if (addreg0)
2750 output_asm_insn ("ldo -4(%0),%0", &addreg0);
2751 if (addreg1)
2752 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2754 return "";
2757 const char *
2758 output_fp_move_double (rtx *operands)
2760 if (FP_REG_P (operands[0]))
2762 if (FP_REG_P (operands[1])
2763 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2764 output_asm_insn ("fcpy,dbl %f1,%0", operands);
2765 else
2766 output_asm_insn ("fldd%F1 %1,%0", operands);
2768 else if (FP_REG_P (operands[1]))
2770 output_asm_insn ("fstd%F0 %1,%0", operands);
2772 else
2774 rtx xoperands[2];
2776 gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2778 /* This is a pain. You have to be prepared to deal with an
2779 arbitrary address here including pre/post increment/decrement.
2781 so avoid this in the MD. */
2782 gcc_assert (GET_CODE (operands[0]) == REG);
2784 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2785 xoperands[0] = operands[0];
2786 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2788 return "";
2791 /* Return a REG that occurs in ADDR with coefficient 1.
2792 ADDR can be effectively incremented by incrementing REG. */
2794 static rtx
2795 find_addr_reg (rtx addr)
2797 while (GET_CODE (addr) == PLUS)
2799 if (GET_CODE (XEXP (addr, 0)) == REG)
2800 addr = XEXP (addr, 0);
2801 else if (GET_CODE (XEXP (addr, 1)) == REG)
2802 addr = XEXP (addr, 1);
2803 else if (CONSTANT_P (XEXP (addr, 0)))
2804 addr = XEXP (addr, 1);
2805 else if (CONSTANT_P (XEXP (addr, 1)))
2806 addr = XEXP (addr, 0);
2807 else
2808 gcc_unreachable ();
2810 gcc_assert (GET_CODE (addr) == REG);
2811 return addr;
2814 /* Emit code to perform a block move.
2816 OPERANDS[0] is the destination pointer as a REG, clobbered.
2817 OPERANDS[1] is the source pointer as a REG, clobbered.
2818 OPERANDS[2] is a register for temporary storage.
2819 OPERANDS[3] is a register for temporary storage.
2820 OPERANDS[4] is the size as a CONST_INT
2821 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2822 OPERANDS[6] is another temporary register. */
2824 const char *
2825 output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2827 int align = INTVAL (operands[5]);
2828 unsigned long n_bytes = INTVAL (operands[4]);
2830 /* We can't move more than a word at a time because the PA
2831 has no longer integer move insns. (Could use fp mem ops?) */
2832 if (align > (TARGET_64BIT ? 8 : 4))
2833 align = (TARGET_64BIT ? 8 : 4);
2835 /* Note that we know each loop below will execute at least twice
2836 (else we would have open-coded the copy). */
2837 switch (align)
2839 case 8:
2840 /* Pre-adjust the loop counter. */
2841 operands[4] = GEN_INT (n_bytes - 16);
2842 output_asm_insn ("ldi %4,%2", operands);
2844 /* Copying loop. */
2845 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2846 output_asm_insn ("ldd,ma 8(%1),%6", operands);
2847 output_asm_insn ("std,ma %3,8(%0)", operands);
2848 output_asm_insn ("addib,>= -16,%2,.-12", operands);
2849 output_asm_insn ("std,ma %6,8(%0)", operands);
2851 /* Handle the residual. There could be up to 7 bytes of
2852 residual to copy! */
2853 if (n_bytes % 16 != 0)
2855 operands[4] = GEN_INT (n_bytes % 8);
2856 if (n_bytes % 16 >= 8)
2857 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2858 if (n_bytes % 8 != 0)
2859 output_asm_insn ("ldd 0(%1),%6", operands);
2860 if (n_bytes % 16 >= 8)
2861 output_asm_insn ("std,ma %3,8(%0)", operands);
2862 if (n_bytes % 8 != 0)
2863 output_asm_insn ("stdby,e %6,%4(%0)", operands);
2865 return "";
2867 case 4:
2868 /* Pre-adjust the loop counter. */
2869 operands[4] = GEN_INT (n_bytes - 8);
2870 output_asm_insn ("ldi %4,%2", operands);
2872 /* Copying loop. */
2873 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2874 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2875 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2876 output_asm_insn ("addib,>= -8,%2,.-12", operands);
2877 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2879 /* Handle the residual. There could be up to 7 bytes of
2880 residual to copy! */
2881 if (n_bytes % 8 != 0)
2883 operands[4] = GEN_INT (n_bytes % 4);
2884 if (n_bytes % 8 >= 4)
2885 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2886 if (n_bytes % 4 != 0)
2887 output_asm_insn ("ldw 0(%1),%6", operands);
2888 if (n_bytes % 8 >= 4)
2889 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2890 if (n_bytes % 4 != 0)
2891 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2893 return "";
2895 case 2:
2896 /* Pre-adjust the loop counter. */
2897 operands[4] = GEN_INT (n_bytes - 4);
2898 output_asm_insn ("ldi %4,%2", operands);
2900 /* Copying loop. */
2901 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2902 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2903 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2904 output_asm_insn ("addib,>= -4,%2,.-12", operands);
2905 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2907 /* Handle the residual. */
2908 if (n_bytes % 4 != 0)
2910 if (n_bytes % 4 >= 2)
2911 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2912 if (n_bytes % 2 != 0)
2913 output_asm_insn ("ldb 0(%1),%6", operands);
2914 if (n_bytes % 4 >= 2)
2915 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2916 if (n_bytes % 2 != 0)
2917 output_asm_insn ("stb %6,0(%0)", operands);
2919 return "";
2921 case 1:
2922 /* Pre-adjust the loop counter. */
2923 operands[4] = GEN_INT (n_bytes - 2);
2924 output_asm_insn ("ldi %4,%2", operands);
2926 /* Copying loop. */
2927 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2928 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2929 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2930 output_asm_insn ("addib,>= -2,%2,.-12", operands);
2931 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2933 /* Handle the residual. */
2934 if (n_bytes % 2 != 0)
2936 output_asm_insn ("ldb 0(%1),%3", operands);
2937 output_asm_insn ("stb %3,0(%0)", operands);
2939 return "";
2941 default:
2942 gcc_unreachable ();
2946 /* Count the number of insns necessary to handle this block move.
2948 Basic structure is the same as emit_block_move, except that we
2949 count insns rather than emit them. */
2951 static int
2952 compute_movmem_length (rtx insn)
2954 rtx pat = PATTERN (insn);
2955 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2956 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2957 unsigned int n_insns = 0;
2959 /* We can't move more than four bytes at a time because the PA
2960 has no longer integer move insns. (Could use fp mem ops?) */
2961 if (align > (TARGET_64BIT ? 8 : 4))
2962 align = (TARGET_64BIT ? 8 : 4);
2964 /* The basic copying loop. */
2965 n_insns = 6;
2967 /* Residuals. */
2968 if (n_bytes % (2 * align) != 0)
2970 if ((n_bytes % (2 * align)) >= align)
2971 n_insns += 2;
2973 if ((n_bytes % align) != 0)
2974 n_insns += 2;
2977 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2978 return n_insns * 4;
2981 /* Emit code to perform a block clear.
2983 OPERANDS[0] is the destination pointer as a REG, clobbered.
2984 OPERANDS[1] is a register for temporary storage.
2985 OPERANDS[2] is the size as a CONST_INT
2986 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
2988 const char *
2989 output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2991 int align = INTVAL (operands[3]);
2992 unsigned long n_bytes = INTVAL (operands[2]);
2994 /* We can't clear more than a word at a time because the PA
2995 has no longer integer move insns. */
2996 if (align > (TARGET_64BIT ? 8 : 4))
2997 align = (TARGET_64BIT ? 8 : 4);
2999 /* Note that we know each loop below will execute at least twice
3000 (else we would have open-coded the copy). */
3001 switch (align)
3003 case 8:
3004 /* Pre-adjust the loop counter. */
3005 operands[2] = GEN_INT (n_bytes - 16);
3006 output_asm_insn ("ldi %2,%1", operands);
3008 /* Loop. */
3009 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3010 output_asm_insn ("addib,>= -16,%1,.-4", operands);
3011 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3013 /* Handle the residual. There could be up to 7 bytes of
3014 residual to copy! */
3015 if (n_bytes % 16 != 0)
3017 operands[2] = GEN_INT (n_bytes % 8);
3018 if (n_bytes % 16 >= 8)
3019 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3020 if (n_bytes % 8 != 0)
3021 output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
3023 return "";
3025 case 4:
3026 /* Pre-adjust the loop counter. */
3027 operands[2] = GEN_INT (n_bytes - 8);
3028 output_asm_insn ("ldi %2,%1", operands);
3030 /* Loop. */
3031 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3032 output_asm_insn ("addib,>= -8,%1,.-4", operands);
3033 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3035 /* Handle the residual. There could be up to 7 bytes of
3036 residual to copy! */
3037 if (n_bytes % 8 != 0)
3039 operands[2] = GEN_INT (n_bytes % 4);
3040 if (n_bytes % 8 >= 4)
3041 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3042 if (n_bytes % 4 != 0)
3043 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
3045 return "";
3047 case 2:
3048 /* Pre-adjust the loop counter. */
3049 operands[2] = GEN_INT (n_bytes - 4);
3050 output_asm_insn ("ldi %2,%1", operands);
3052 /* Loop. */
3053 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3054 output_asm_insn ("addib,>= -4,%1,.-4", operands);
3055 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3057 /* Handle the residual. */
3058 if (n_bytes % 4 != 0)
3060 if (n_bytes % 4 >= 2)
3061 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3062 if (n_bytes % 2 != 0)
3063 output_asm_insn ("stb %%r0,0(%0)", operands);
3065 return "";
3067 case 1:
3068 /* Pre-adjust the loop counter. */
3069 operands[2] = GEN_INT (n_bytes - 2);
3070 output_asm_insn ("ldi %2,%1", operands);
3072 /* Loop. */
3073 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3074 output_asm_insn ("addib,>= -2,%1,.-4", operands);
3075 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3077 /* Handle the residual. */
3078 if (n_bytes % 2 != 0)
3079 output_asm_insn ("stb %%r0,0(%0)", operands);
3081 return "";
3083 default:
3084 gcc_unreachable ();
3088 /* Count the number of insns necessary to handle this block move.
3090 Basic structure is the same as emit_block_move, except that we
3091 count insns rather than emit them. */
3093 static int
3094 compute_clrmem_length (rtx insn)
3096 rtx pat = PATTERN (insn);
3097 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
3098 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
3099 unsigned int n_insns = 0;
3101 /* We can't clear more than a word at a time because the PA
3102 has no longer integer move insns. */
3103 if (align > (TARGET_64BIT ? 8 : 4))
3104 align = (TARGET_64BIT ? 8 : 4);
3106 /* The basic loop. */
3107 n_insns = 4;
3109 /* Residuals. */
3110 if (n_bytes % (2 * align) != 0)
3112 if ((n_bytes % (2 * align)) >= align)
3113 n_insns++;
3115 if ((n_bytes % align) != 0)
3116 n_insns++;
3119 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
3120 return n_insns * 4;
3124 const char *
3125 output_and (rtx *operands)
3127 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3129 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3130 int ls0, ls1, ms0, p, len;
3132 for (ls0 = 0; ls0 < 32; ls0++)
3133 if ((mask & (1 << ls0)) == 0)
3134 break;
3136 for (ls1 = ls0; ls1 < 32; ls1++)
3137 if ((mask & (1 << ls1)) != 0)
3138 break;
3140 for (ms0 = ls1; ms0 < 32; ms0++)
3141 if ((mask & (1 << ms0)) == 0)
3142 break;
3144 gcc_assert (ms0 == 32);
3146 if (ls1 == 32)
3148 len = ls0;
3150 gcc_assert (len);
3152 operands[2] = GEN_INT (len);
3153 return "{extru|extrw,u} %1,31,%2,%0";
3155 else
3157 /* We could use this `depi' for the case above as well, but `depi'
3158 requires one more register file access than an `extru'. */
3160 p = 31 - ls0;
3161 len = ls1 - ls0;
3163 operands[2] = GEN_INT (p);
3164 operands[3] = GEN_INT (len);
3165 return "{depi|depwi} 0,%2,%3,%0";
3168 else
3169 return "and %1,%2,%0";
3172 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3173 storing the result in operands[0]. */
3174 const char *
3175 output_64bit_and (rtx *operands)
3177 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3179 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3180 int ls0, ls1, ms0, p, len;
3182 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
3183 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
3184 break;
3186 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
3187 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
3188 break;
3190 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
3191 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
3192 break;
3194 gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
3196 if (ls1 == HOST_BITS_PER_WIDE_INT)
3198 len = ls0;
3200 gcc_assert (len);
3202 operands[2] = GEN_INT (len);
3203 return "extrd,u %1,63,%2,%0";
3205 else
3207 /* We could use this `depi' for the case above as well, but `depi'
3208 requires one more register file access than an `extru'. */
3210 p = 63 - ls0;
3211 len = ls1 - ls0;
3213 operands[2] = GEN_INT (p);
3214 operands[3] = GEN_INT (len);
3215 return "depdi 0,%2,%3,%0";
3218 else
3219 return "and %1,%2,%0";
3222 const char *
3223 output_ior (rtx *operands)
3225 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3226 int bs0, bs1, p, len;
3228 if (INTVAL (operands[2]) == 0)
3229 return "copy %1,%0";
3231 for (bs0 = 0; bs0 < 32; bs0++)
3232 if ((mask & (1 << bs0)) != 0)
3233 break;
3235 for (bs1 = bs0; bs1 < 32; bs1++)
3236 if ((mask & (1 << bs1)) == 0)
3237 break;
3239 gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3241 p = 31 - bs0;
3242 len = bs1 - bs0;
3244 operands[2] = GEN_INT (p);
3245 operands[3] = GEN_INT (len);
3246 return "{depi|depwi} -1,%2,%3,%0";
3249 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3250 storing the result in operands[0]. */
3251 const char *
3252 output_64bit_ior (rtx *operands)
3254 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3255 int bs0, bs1, p, len;
3257 if (INTVAL (operands[2]) == 0)
3258 return "copy %1,%0";
3260 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
3261 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
3262 break;
3264 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
3265 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
3266 break;
3268 gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
3269 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3271 p = 63 - bs0;
3272 len = bs1 - bs0;
3274 operands[2] = GEN_INT (p);
3275 operands[3] = GEN_INT (len);
3276 return "depdi -1,%2,%3,%0";
3279 /* Target hook for assembling integer objects. This code handles
3280 aligned SI and DI integers specially since function references
3281 must be preceded by P%. */
3283 static bool
3284 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
3286 if (size == UNITS_PER_WORD
3287 && aligned_p
3288 && function_label_operand (x, VOIDmode))
3290 fputs (size == 8? "\t.dword\tP%" : "\t.word\tP%", asm_out_file);
3291 output_addr_const (asm_out_file, x);
3292 fputc ('\n', asm_out_file);
3293 return true;
3295 return default_assemble_integer (x, size, aligned_p);
3298 /* Output an ascii string. */
3299 void
3300 output_ascii (FILE *file, const char *p, int size)
3302 int i;
3303 int chars_output;
3304 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
3306 /* The HP assembler can only take strings of 256 characters at one
3307 time. This is a limitation on input line length, *not* the
3308 length of the string. Sigh. Even worse, it seems that the
3309 restriction is in number of input characters (see \xnn &
3310 \whatever). So we have to do this very carefully. */
3312 fputs ("\t.STRING \"", file);
3314 chars_output = 0;
3315 for (i = 0; i < size; i += 4)
3317 int co = 0;
3318 int io = 0;
3319 for (io = 0, co = 0; io < MIN (4, size - i); io++)
3321 register unsigned int c = (unsigned char) p[i + io];
3323 if (c == '\"' || c == '\\')
3324 partial_output[co++] = '\\';
3325 if (c >= ' ' && c < 0177)
3326 partial_output[co++] = c;
3327 else
3329 unsigned int hexd;
3330 partial_output[co++] = '\\';
3331 partial_output[co++] = 'x';
3332 hexd = c / 16 - 0 + '0';
3333 if (hexd > '9')
3334 hexd -= '9' - 'a' + 1;
3335 partial_output[co++] = hexd;
3336 hexd = c % 16 - 0 + '0';
3337 if (hexd > '9')
3338 hexd -= '9' - 'a' + 1;
3339 partial_output[co++] = hexd;
3342 if (chars_output + co > 243)
3344 fputs ("\"\n\t.STRING \"", file);
3345 chars_output = 0;
3347 fwrite (partial_output, 1, (size_t) co, file);
3348 chars_output += co;
3349 co = 0;
3351 fputs ("\"\n", file);
3354 /* Try to rewrite floating point comparisons & branches to avoid
3355 useless add,tr insns.
3357 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3358 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3359 first attempt to remove useless add,tr insns. It is zero
3360 for the second pass as reorg sometimes leaves bogus REG_DEAD
3361 notes lying around.
3363 When CHECK_NOTES is zero we can only eliminate add,tr insns
3364 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3365 instructions. */
3366 static void
3367 remove_useless_addtr_insns (int check_notes)
3369 rtx insn;
3370 static int pass = 0;
3372 /* This is fairly cheap, so always run it when optimizing. */
3373 if (optimize > 0)
3375 int fcmp_count = 0;
3376 int fbranch_count = 0;
3378 /* Walk all the insns in this function looking for fcmp & fbranch
3379 instructions. Keep track of how many of each we find. */
3380 for (insn = get_insns (); insn; insn = next_insn (insn))
3382 rtx tmp;
3384 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
3385 if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
3386 continue;
3388 tmp = PATTERN (insn);
3390 /* It must be a set. */
3391 if (GET_CODE (tmp) != SET)
3392 continue;
3394 /* If the destination is CCFP, then we've found an fcmp insn. */
3395 tmp = SET_DEST (tmp);
3396 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3398 fcmp_count++;
3399 continue;
3402 tmp = PATTERN (insn);
3403 /* If this is an fbranch instruction, bump the fbranch counter. */
3404 if (GET_CODE (tmp) == SET
3405 && SET_DEST (tmp) == pc_rtx
3406 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3407 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3408 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3409 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3411 fbranch_count++;
3412 continue;
3417 /* Find all floating point compare + branch insns. If possible,
3418 reverse the comparison & the branch to avoid add,tr insns. */
3419 for (insn = get_insns (); insn; insn = next_insn (insn))
3421 rtx tmp, next;
3423 /* Ignore anything that isn't an INSN. */
3424 if (GET_CODE (insn) != INSN)
3425 continue;
3427 tmp = PATTERN (insn);
3429 /* It must be a set. */
3430 if (GET_CODE (tmp) != SET)
3431 continue;
3433 /* The destination must be CCFP, which is register zero. */
3434 tmp = SET_DEST (tmp);
3435 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3436 continue;
3438 /* INSN should be a set of CCFP.
3440 See if the result of this insn is used in a reversed FP
3441 conditional branch. If so, reverse our condition and
3442 the branch. Doing so avoids useless add,tr insns. */
3443 next = next_insn (insn);
3444 while (next)
3446 /* Jumps, calls and labels stop our search. */
3447 if (GET_CODE (next) == JUMP_INSN
3448 || GET_CODE (next) == CALL_INSN
3449 || GET_CODE (next) == CODE_LABEL)
3450 break;
3452 /* As does another fcmp insn. */
3453 if (GET_CODE (next) == INSN
3454 && GET_CODE (PATTERN (next)) == SET
3455 && GET_CODE (SET_DEST (PATTERN (next))) == REG
3456 && REGNO (SET_DEST (PATTERN (next))) == 0)
3457 break;
3459 next = next_insn (next);
3462 /* Is NEXT_INSN a branch? */
3463 if (next
3464 && GET_CODE (next) == JUMP_INSN)
3466 rtx pattern = PATTERN (next);
3468 /* If it a reversed fp conditional branch (e.g. uses add,tr)
3469 and CCFP dies, then reverse our conditional and the branch
3470 to avoid the add,tr. */
3471 if (GET_CODE (pattern) == SET
3472 && SET_DEST (pattern) == pc_rtx
3473 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3474 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3475 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3476 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3477 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3478 && (fcmp_count == fbranch_count
3479 || (check_notes
3480 && find_regno_note (next, REG_DEAD, 0))))
3482 /* Reverse the branch. */
3483 tmp = XEXP (SET_SRC (pattern), 1);
3484 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3485 XEXP (SET_SRC (pattern), 2) = tmp;
3486 INSN_CODE (next) = -1;
3488 /* Reverse our condition. */
3489 tmp = PATTERN (insn);
3490 PUT_CODE (XEXP (tmp, 1),
3491 (reverse_condition_maybe_unordered
3492 (GET_CODE (XEXP (tmp, 1)))));
3498 pass = !pass;
3502 /* You may have trouble believing this, but this is the 32 bit HP-PA
3503 stack layout. Wow.
3505 Offset Contents
3507 Variable arguments (optional; any number may be allocated)
3509 SP-(4*(N+9)) arg word N
3511 SP-56 arg word 5
3512 SP-52 arg word 4
3514 Fixed arguments (must be allocated; may remain unused)
3516 SP-48 arg word 3
3517 SP-44 arg word 2
3518 SP-40 arg word 1
3519 SP-36 arg word 0
3521 Frame Marker
3523 SP-32 External Data Pointer (DP)
3524 SP-28 External sr4
3525 SP-24 External/stub RP (RP')
3526 SP-20 Current RP
3527 SP-16 Static Link
3528 SP-12 Clean up
3529 SP-8 Calling Stub RP (RP'')
3530 SP-4 Previous SP
3532 Top of Frame
3534 SP-0 Stack Pointer (points to next available address)
3538 /* This function saves registers as follows. Registers marked with ' are
3539 this function's registers (as opposed to the previous function's).
3540 If a frame_pointer isn't needed, r4 is saved as a general register;
3541 the space for the frame pointer is still allocated, though, to keep
3542 things simple.
3545 Top of Frame
3547 SP (FP') Previous FP
3548 SP + 4 Alignment filler (sigh)
3549 SP + 8 Space for locals reserved here.
3553 SP + n All call saved register used.
3557 SP + o All call saved fp registers used.
3561 SP + p (SP') points to next available address.
3565 /* Global variables set by output_function_prologue(). */
3566 /* Size of frame. Need to know this to emit return insns from
3567 leaf procedures. */
3568 static HOST_WIDE_INT actual_fsize, local_fsize;
3569 static int save_fregs;
3571 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3572 Handle case where DISP > 8k by using the add_high_const patterns.
3574 Note in DISP > 8k case, we will leave the high part of the address
3575 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3577 static void
3578 store_reg (int reg, HOST_WIDE_INT disp, int base)
3580 rtx insn, dest, src, basereg;
3582 src = gen_rtx_REG (word_mode, reg);
3583 basereg = gen_rtx_REG (Pmode, base);
3584 if (VAL_14_BITS_P (disp))
3586 dest = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3587 insn = emit_move_insn (dest, src);
3589 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3591 rtx delta = GEN_INT (disp);
3592 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3594 emit_move_insn (tmpreg, delta);
3595 insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3596 if (DO_FRAME_NOTES)
3598 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3599 gen_rtx_SET (VOIDmode, tmpreg,
3600 gen_rtx_PLUS (Pmode, basereg, delta)));
3601 RTX_FRAME_RELATED_P (insn) = 1;
3603 dest = gen_rtx_MEM (word_mode, tmpreg);
3604 insn = emit_move_insn (dest, src);
3606 else
3608 rtx delta = GEN_INT (disp);
3609 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3610 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3612 emit_move_insn (tmpreg, high);
3613 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3614 insn = emit_move_insn (dest, src);
3615 if (DO_FRAME_NOTES)
3616 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3617 gen_rtx_SET (VOIDmode,
3618 gen_rtx_MEM (word_mode,
3619 gen_rtx_PLUS (word_mode,
3620 basereg,
3621 delta)),
3622 src));
3625 if (DO_FRAME_NOTES)
3626 RTX_FRAME_RELATED_P (insn) = 1;
3629 /* Emit RTL to store REG at the memory location specified by BASE and then
3630 add MOD to BASE. MOD must be <= 8k. */
3632 static void
3633 store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3635 rtx insn, basereg, srcreg, delta;
3637 gcc_assert (VAL_14_BITS_P (mod));
3639 basereg = gen_rtx_REG (Pmode, base);
3640 srcreg = gen_rtx_REG (word_mode, reg);
3641 delta = GEN_INT (mod);
3643 insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3644 if (DO_FRAME_NOTES)
3646 RTX_FRAME_RELATED_P (insn) = 1;
3648 /* RTX_FRAME_RELATED_P must be set on each frame related set
3649 in a parallel with more than one element. */
3650 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3651 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3655 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3656 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3657 whether to add a frame note or not.
3659 In the DISP > 8k case, we leave the high part of the address in %r1.
3660 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3662 static void
3663 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3665 rtx insn;
3667 if (VAL_14_BITS_P (disp))
3669 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3670 plus_constant (gen_rtx_REG (Pmode, base), disp));
3672 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3674 rtx basereg = gen_rtx_REG (Pmode, base);
3675 rtx delta = GEN_INT (disp);
3676 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3678 emit_move_insn (tmpreg, delta);
3679 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3680 gen_rtx_PLUS (Pmode, tmpreg, basereg));
3681 if (DO_FRAME_NOTES)
3682 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3683 gen_rtx_SET (VOIDmode, tmpreg,
3684 gen_rtx_PLUS (Pmode, basereg, delta)));
3686 else
3688 rtx basereg = gen_rtx_REG (Pmode, base);
3689 rtx delta = GEN_INT (disp);
3690 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3692 emit_move_insn (tmpreg,
3693 gen_rtx_PLUS (Pmode, basereg,
3694 gen_rtx_HIGH (Pmode, delta)));
3695 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3696 gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3699 if (DO_FRAME_NOTES && note)
3700 RTX_FRAME_RELATED_P (insn) = 1;
3703 HOST_WIDE_INT
3704 compute_frame_size (HOST_WIDE_INT size, int *fregs_live)
3706 int freg_saved = 0;
3707 int i, j;
3709 /* The code in hppa_expand_prologue and hppa_expand_epilogue must
3710 be consistent with the rounding and size calculation done here.
3711 Change them at the same time. */
3713 /* We do our own stack alignment. First, round the size of the
3714 stack locals up to a word boundary. */
3715 size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3717 /* Space for previous frame pointer + filler. If any frame is
3718 allocated, we need to add in the STARTING_FRAME_OFFSET. We
3719 waste some space here for the sake of HP compatibility. The
3720 first slot is only used when the frame pointer is needed. */
3721 if (size || frame_pointer_needed)
3722 size += STARTING_FRAME_OFFSET;
3724 /* If the current function calls __builtin_eh_return, then we need
3725 to allocate stack space for registers that will hold data for
3726 the exception handler. */
3727 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3729 unsigned int i;
3731 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3732 continue;
3733 size += i * UNITS_PER_WORD;
3736 /* Account for space used by the callee general register saves. */
3737 for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3738 if (df_regs_ever_live_p (i))
3739 size += UNITS_PER_WORD;
3741 /* Account for space used by the callee floating point register saves. */
3742 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3743 if (df_regs_ever_live_p (i)
3744 || (!TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3746 freg_saved = 1;
3748 /* We always save both halves of the FP register, so always
3749 increment the frame size by 8 bytes. */
3750 size += 8;
3753 /* If any of the floating registers are saved, account for the
3754 alignment needed for the floating point register save block. */
3755 if (freg_saved)
3757 size = (size + 7) & ~7;
3758 if (fregs_live)
3759 *fregs_live = 1;
3762 /* The various ABIs include space for the outgoing parameters in the
3763 size of the current function's stack frame. We don't need to align
3764 for the outgoing arguments as their alignment is set by the final
3765 rounding for the frame as a whole. */
3766 size += crtl->outgoing_args_size;
3768 /* Allocate space for the fixed frame marker. This space must be
3769 allocated for any function that makes calls or allocates
3770 stack space. */
3771 if (!current_function_is_leaf || size)
3772 size += TARGET_64BIT ? 48 : 32;
3774 /* Finally, round to the preferred stack boundary. */
3775 return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3776 & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3779 /* Generate the assembly code for function entry. FILE is a stdio
3780 stream to output the code to. SIZE is an int: how many units of
3781 temporary storage to allocate.
3783 Refer to the array `regs_ever_live' to determine which registers to
3784 save; `regs_ever_live[I]' is nonzero if register number I is ever
3785 used in the function. This function is responsible for knowing
3786 which registers should not be saved even if used. */
3788 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3789 of memory. If any fpu reg is used in the function, we allocate
3790 such a block here, at the bottom of the frame, just in case it's needed.
3792 If this function is a leaf procedure, then we may choose not
3793 to do a "save" insn. The decision about whether or not
3794 to do this is made in regclass.c. */
3796 static void
3797 pa_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3799 /* The function's label and associated .PROC must never be
3800 separated and must be output *after* any profiling declarations
3801 to avoid changing spaces/subspaces within a procedure. */
3802 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3803 fputs ("\t.PROC\n", file);
3805 /* hppa_expand_prologue does the dirty work now. We just need
3806 to output the assembler directives which denote the start
3807 of a function. */
3808 fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
3809 if (current_function_is_leaf)
3810 fputs (",NO_CALLS", file);
3811 else
3812 fputs (",CALLS", file);
3813 if (rp_saved)
3814 fputs (",SAVE_RP", file);
3816 /* The SAVE_SP flag is used to indicate that register %r3 is stored
3817 at the beginning of the frame and that it is used as the frame
3818 pointer for the frame. We do this because our current frame
3819 layout doesn't conform to that specified in the HP runtime
3820 documentation and we need a way to indicate to programs such as
3821 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
3822 isn't used by HP compilers but is supported by the assembler.
3823 However, SAVE_SP is supposed to indicate that the previous stack
3824 pointer has been saved in the frame marker. */
3825 if (frame_pointer_needed)
3826 fputs (",SAVE_SP", file);
3828 /* Pass on information about the number of callee register saves
3829 performed in the prologue.
3831 The compiler is supposed to pass the highest register number
3832 saved, the assembler then has to adjust that number before
3833 entering it into the unwind descriptor (to account for any
3834 caller saved registers with lower register numbers than the
3835 first callee saved register). */
3836 if (gr_saved)
3837 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3839 if (fr_saved)
3840 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3842 fputs ("\n\t.ENTRY\n", file);
3844 remove_useless_addtr_insns (0);
3847 void
3848 hppa_expand_prologue (void)
3850 int merge_sp_adjust_with_store = 0;
3851 HOST_WIDE_INT size = get_frame_size ();
3852 HOST_WIDE_INT offset;
3853 int i;
3854 rtx insn, tmpreg;
3856 gr_saved = 0;
3857 fr_saved = 0;
3858 save_fregs = 0;
3860 /* Compute total size for frame pointer, filler, locals and rounding to
3861 the next word boundary. Similar code appears in compute_frame_size
3862 and must be changed in tandem with this code. */
3863 local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3864 if (local_fsize || frame_pointer_needed)
3865 local_fsize += STARTING_FRAME_OFFSET;
3867 actual_fsize = compute_frame_size (size, &save_fregs);
3868 if (flag_stack_usage)
3869 current_function_static_stack_size = actual_fsize;
3871 /* Compute a few things we will use often. */
3872 tmpreg = gen_rtx_REG (word_mode, 1);
3874 /* Save RP first. The calling conventions manual states RP will
3875 always be stored into the caller's frame at sp - 20 or sp - 16
3876 depending on which ABI is in use. */
3877 if (df_regs_ever_live_p (2) || crtl->calls_eh_return)
3879 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3880 rp_saved = true;
3882 else
3883 rp_saved = false;
3885 /* Allocate the local frame and set up the frame pointer if needed. */
3886 if (actual_fsize != 0)
3888 if (frame_pointer_needed)
3890 /* Copy the old frame pointer temporarily into %r1. Set up the
3891 new stack pointer, then store away the saved old frame pointer
3892 into the stack at sp and at the same time update the stack
3893 pointer by actual_fsize bytes. Two versions, first
3894 handles small (<8k) frames. The second handles large (>=8k)
3895 frames. */
3896 insn = emit_move_insn (tmpreg, hard_frame_pointer_rtx);
3897 if (DO_FRAME_NOTES)
3898 RTX_FRAME_RELATED_P (insn) = 1;
3900 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3901 if (DO_FRAME_NOTES)
3902 RTX_FRAME_RELATED_P (insn) = 1;
3904 if (VAL_14_BITS_P (actual_fsize))
3905 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
3906 else
3908 /* It is incorrect to store the saved frame pointer at *sp,
3909 then increment sp (writes beyond the current stack boundary).
3911 So instead use stwm to store at *sp and post-increment the
3912 stack pointer as an atomic operation. Then increment sp to
3913 finish allocating the new frame. */
3914 HOST_WIDE_INT adjust1 = 8192 - 64;
3915 HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
3917 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
3918 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3919 adjust2, 1);
3922 /* We set SAVE_SP in frames that need a frame pointer. Thus,
3923 we need to store the previous stack pointer (frame pointer)
3924 into the frame marker on targets that use the HP unwind
3925 library. This allows the HP unwind library to be used to
3926 unwind GCC frames. However, we are not fully compatible
3927 with the HP library because our frame layout differs from
3928 that specified in the HP runtime specification.
3930 We don't want a frame note on this instruction as the frame
3931 marker moves during dynamic stack allocation.
3933 This instruction also serves as a blockage to prevent
3934 register spills from being scheduled before the stack
3935 pointer is raised. This is necessary as we store
3936 registers using the frame pointer as a base register,
3937 and the frame pointer is set before sp is raised. */
3938 if (TARGET_HPUX_UNWIND_LIBRARY)
3940 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
3941 GEN_INT (TARGET_64BIT ? -8 : -4));
3943 emit_move_insn (gen_rtx_MEM (word_mode, addr),
3944 hard_frame_pointer_rtx);
3946 else
3947 emit_insn (gen_blockage ());
3949 /* no frame pointer needed. */
3950 else
3952 /* In some cases we can perform the first callee register save
3953 and allocating the stack frame at the same time. If so, just
3954 make a note of it and defer allocating the frame until saving
3955 the callee registers. */
3956 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
3957 merge_sp_adjust_with_store = 1;
3958 /* Can not optimize. Adjust the stack frame by actual_fsize
3959 bytes. */
3960 else
3961 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3962 actual_fsize, 1);
3966 /* Normal register save.
3968 Do not save the frame pointer in the frame_pointer_needed case. It
3969 was done earlier. */
3970 if (frame_pointer_needed)
3972 offset = local_fsize;
3974 /* Saving the EH return data registers in the frame is the simplest
3975 way to get the frame unwind information emitted. We put them
3976 just before the general registers. */
3977 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3979 unsigned int i, regno;
3981 for (i = 0; ; ++i)
3983 regno = EH_RETURN_DATA_REGNO (i);
3984 if (regno == INVALID_REGNUM)
3985 break;
3987 store_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
3988 offset += UNITS_PER_WORD;
3992 for (i = 18; i >= 4; i--)
3993 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
3995 store_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
3996 offset += UNITS_PER_WORD;
3997 gr_saved++;
3999 /* Account for %r3 which is saved in a special place. */
4000 gr_saved++;
4002 /* No frame pointer needed. */
4003 else
4005 offset = local_fsize - actual_fsize;
4007 /* Saving the EH return data registers in the frame is the simplest
4008 way to get the frame unwind information emitted. */
4009 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4011 unsigned int i, regno;
4013 for (i = 0; ; ++i)
4015 regno = EH_RETURN_DATA_REGNO (i);
4016 if (regno == INVALID_REGNUM)
4017 break;
4019 /* If merge_sp_adjust_with_store is nonzero, then we can
4020 optimize the first save. */
4021 if (merge_sp_adjust_with_store)
4023 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
4024 merge_sp_adjust_with_store = 0;
4026 else
4027 store_reg (regno, offset, STACK_POINTER_REGNUM);
4028 offset += UNITS_PER_WORD;
4032 for (i = 18; i >= 3; i--)
4033 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4035 /* If merge_sp_adjust_with_store is nonzero, then we can
4036 optimize the first GR save. */
4037 if (merge_sp_adjust_with_store)
4039 store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
4040 merge_sp_adjust_with_store = 0;
4042 else
4043 store_reg (i, offset, STACK_POINTER_REGNUM);
4044 offset += UNITS_PER_WORD;
4045 gr_saved++;
4048 /* If we wanted to merge the SP adjustment with a GR save, but we never
4049 did any GR saves, then just emit the adjustment here. */
4050 if (merge_sp_adjust_with_store)
4051 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4052 actual_fsize, 1);
4055 /* The hppa calling conventions say that %r19, the pic offset
4056 register, is saved at sp - 32 (in this function's frame)
4057 when generating PIC code. FIXME: What is the correct thing
4058 to do for functions which make no calls and allocate no
4059 frame? Do we need to allocate a frame, or can we just omit
4060 the save? For now we'll just omit the save.
4062 We don't want a note on this insn as the frame marker can
4063 move if there is a dynamic stack allocation. */
4064 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
4066 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
4068 emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
4072 /* Align pointer properly (doubleword boundary). */
4073 offset = (offset + 7) & ~7;
4075 /* Floating point register store. */
4076 if (save_fregs)
4078 rtx base;
4080 /* First get the frame or stack pointer to the start of the FP register
4081 save area. */
4082 if (frame_pointer_needed)
4084 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4085 base = hard_frame_pointer_rtx;
4087 else
4089 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4090 base = stack_pointer_rtx;
4093 /* Now actually save the FP registers. */
4094 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4096 if (df_regs_ever_live_p (i)
4097 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4099 rtx addr, insn, reg;
4100 addr = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
4101 reg = gen_rtx_REG (DFmode, i);
4102 insn = emit_move_insn (addr, reg);
4103 if (DO_FRAME_NOTES)
4105 RTX_FRAME_RELATED_P (insn) = 1;
4106 if (TARGET_64BIT)
4108 rtx mem = gen_rtx_MEM (DFmode,
4109 plus_constant (base, offset));
4110 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4111 gen_rtx_SET (VOIDmode, mem, reg));
4113 else
4115 rtx meml = gen_rtx_MEM (SFmode,
4116 plus_constant (base, offset));
4117 rtx memr = gen_rtx_MEM (SFmode,
4118 plus_constant (base, offset + 4));
4119 rtx regl = gen_rtx_REG (SFmode, i);
4120 rtx regr = gen_rtx_REG (SFmode, i + 1);
4121 rtx setl = gen_rtx_SET (VOIDmode, meml, regl);
4122 rtx setr = gen_rtx_SET (VOIDmode, memr, regr);
4123 rtvec vec;
4125 RTX_FRAME_RELATED_P (setl) = 1;
4126 RTX_FRAME_RELATED_P (setr) = 1;
4127 vec = gen_rtvec (2, setl, setr);
4128 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4129 gen_rtx_SEQUENCE (VOIDmode, vec));
4132 offset += GET_MODE_SIZE (DFmode);
4133 fr_saved++;
4139 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
4140 Handle case where DISP > 8k by using the add_high_const patterns. */
4142 static void
4143 load_reg (int reg, HOST_WIDE_INT disp, int base)
4145 rtx dest = gen_rtx_REG (word_mode, reg);
4146 rtx basereg = gen_rtx_REG (Pmode, base);
4147 rtx src;
4149 if (VAL_14_BITS_P (disp))
4150 src = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
4151 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
4153 rtx delta = GEN_INT (disp);
4154 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4156 emit_move_insn (tmpreg, delta);
4157 if (TARGET_DISABLE_INDEXING)
4159 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4160 src = gen_rtx_MEM (word_mode, tmpreg);
4162 else
4163 src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4165 else
4167 rtx delta = GEN_INT (disp);
4168 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
4169 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4171 emit_move_insn (tmpreg, high);
4172 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
4175 emit_move_insn (dest, src);
4178 /* Update the total code bytes output to the text section. */
4180 static void
4181 update_total_code_bytes (unsigned int nbytes)
4183 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
4184 && !IN_NAMED_SECTION_P (cfun->decl))
4186 unsigned int old_total = total_code_bytes;
4188 total_code_bytes += nbytes;
4190 /* Be prepared to handle overflows. */
4191 if (old_total > total_code_bytes)
4192 total_code_bytes = UINT_MAX;
4196 /* This function generates the assembly code for function exit.
4197 Args are as for output_function_prologue ().
4199 The function epilogue should not depend on the current stack
4200 pointer! It should use the frame pointer only. This is mandatory
4201 because of alloca; we also take advantage of it to omit stack
4202 adjustments before returning. */
4204 static void
4205 pa_output_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4207 rtx insn = get_last_insn ();
4209 last_address = 0;
4211 /* hppa_expand_epilogue does the dirty work now. We just need
4212 to output the assembler directives which denote the end
4213 of a function.
4215 To make debuggers happy, emit a nop if the epilogue was completely
4216 eliminated due to a volatile call as the last insn in the
4217 current function. That way the return address (in %r2) will
4218 always point to a valid instruction in the current function. */
4220 /* Get the last real insn. */
4221 if (GET_CODE (insn) == NOTE)
4222 insn = prev_real_insn (insn);
4224 /* If it is a sequence, then look inside. */
4225 if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
4226 insn = XVECEXP (PATTERN (insn), 0, 0);
4228 /* If insn is a CALL_INSN, then it must be a call to a volatile
4229 function (otherwise there would be epilogue insns). */
4230 if (insn && GET_CODE (insn) == CALL_INSN)
4232 fputs ("\tnop\n", file);
4233 last_address += 4;
4236 fputs ("\t.EXIT\n\t.PROCEND\n", file);
4238 if (TARGET_SOM && TARGET_GAS)
4240 /* We done with this subspace except possibly for some additional
4241 debug information. Forget that we are in this subspace to ensure
4242 that the next function is output in its own subspace. */
4243 in_section = NULL;
4244 cfun->machine->in_nsubspa = 2;
4247 if (INSN_ADDRESSES_SET_P ())
4249 insn = get_last_nonnote_insn ();
4250 last_address += INSN_ADDRESSES (INSN_UID (insn));
4251 if (INSN_P (insn))
4252 last_address += insn_default_length (insn);
4253 last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4254 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
4256 else
4257 last_address = UINT_MAX;
4259 /* Finally, update the total number of code bytes output so far. */
4260 update_total_code_bytes (last_address);
4263 void
4264 hppa_expand_epilogue (void)
4266 rtx tmpreg;
4267 HOST_WIDE_INT offset;
4268 HOST_WIDE_INT ret_off = 0;
4269 int i;
4270 int merge_sp_adjust_with_load = 0;
4272 /* We will use this often. */
4273 tmpreg = gen_rtx_REG (word_mode, 1);
4275 /* Try to restore RP early to avoid load/use interlocks when
4276 RP gets used in the return (bv) instruction. This appears to still
4277 be necessary even when we schedule the prologue and epilogue. */
4278 if (rp_saved)
4280 ret_off = TARGET_64BIT ? -16 : -20;
4281 if (frame_pointer_needed)
4283 load_reg (2, ret_off, HARD_FRAME_POINTER_REGNUM);
4284 ret_off = 0;
4286 else
4288 /* No frame pointer, and stack is smaller than 8k. */
4289 if (VAL_14_BITS_P (ret_off - actual_fsize))
4291 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4292 ret_off = 0;
4297 /* General register restores. */
4298 if (frame_pointer_needed)
4300 offset = local_fsize;
4302 /* If the current function calls __builtin_eh_return, then we need
4303 to restore the saved EH data registers. */
4304 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4306 unsigned int i, regno;
4308 for (i = 0; ; ++i)
4310 regno = EH_RETURN_DATA_REGNO (i);
4311 if (regno == INVALID_REGNUM)
4312 break;
4314 load_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4315 offset += UNITS_PER_WORD;
4319 for (i = 18; i >= 4; i--)
4320 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4322 load_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4323 offset += UNITS_PER_WORD;
4326 else
4328 offset = local_fsize - actual_fsize;
4330 /* If the current function calls __builtin_eh_return, then we need
4331 to restore the saved EH data registers. */
4332 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4334 unsigned int i, regno;
4336 for (i = 0; ; ++i)
4338 regno = EH_RETURN_DATA_REGNO (i);
4339 if (regno == INVALID_REGNUM)
4340 break;
4342 /* Only for the first load.
4343 merge_sp_adjust_with_load holds the register load
4344 with which we will merge the sp adjustment. */
4345 if (merge_sp_adjust_with_load == 0
4346 && local_fsize == 0
4347 && VAL_14_BITS_P (-actual_fsize))
4348 merge_sp_adjust_with_load = regno;
4349 else
4350 load_reg (regno, offset, STACK_POINTER_REGNUM);
4351 offset += UNITS_PER_WORD;
4355 for (i = 18; i >= 3; i--)
4357 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4359 /* Only for the first load.
4360 merge_sp_adjust_with_load holds the register load
4361 with which we will merge the sp adjustment. */
4362 if (merge_sp_adjust_with_load == 0
4363 && local_fsize == 0
4364 && VAL_14_BITS_P (-actual_fsize))
4365 merge_sp_adjust_with_load = i;
4366 else
4367 load_reg (i, offset, STACK_POINTER_REGNUM);
4368 offset += UNITS_PER_WORD;
4373 /* Align pointer properly (doubleword boundary). */
4374 offset = (offset + 7) & ~7;
4376 /* FP register restores. */
4377 if (save_fregs)
4379 /* Adjust the register to index off of. */
4380 if (frame_pointer_needed)
4381 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4382 else
4383 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4385 /* Actually do the restores now. */
4386 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4387 if (df_regs_ever_live_p (i)
4388 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4390 rtx src = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
4391 rtx dest = gen_rtx_REG (DFmode, i);
4392 emit_move_insn (dest, src);
4396 /* Emit a blockage insn here to keep these insns from being moved to
4397 an earlier spot in the epilogue, or into the main instruction stream.
4399 This is necessary as we must not cut the stack back before all the
4400 restores are finished. */
4401 emit_insn (gen_blockage ());
4403 /* Reset stack pointer (and possibly frame pointer). The stack
4404 pointer is initially set to fp + 64 to avoid a race condition. */
4405 if (frame_pointer_needed)
4407 rtx delta = GEN_INT (-64);
4409 set_reg_plus_d (STACK_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM, 64, 0);
4410 emit_insn (gen_pre_load (hard_frame_pointer_rtx,
4411 stack_pointer_rtx, delta));
4413 /* If we were deferring a callee register restore, do it now. */
4414 else if (merge_sp_adjust_with_load)
4416 rtx delta = GEN_INT (-actual_fsize);
4417 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4419 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4421 else if (actual_fsize != 0)
4422 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4423 - actual_fsize, 0);
4425 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4426 frame greater than 8k), do so now. */
4427 if (ret_off != 0)
4428 load_reg (2, ret_off, STACK_POINTER_REGNUM);
4430 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4432 rtx sa = EH_RETURN_STACKADJ_RTX;
4434 emit_insn (gen_blockage ());
4435 emit_insn (TARGET_64BIT
4436 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4437 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4442 hppa_pic_save_rtx (void)
4444 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4447 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4448 #define NO_DEFERRED_PROFILE_COUNTERS 0
4449 #endif
4452 /* Vector of funcdef numbers. */
4453 static VEC(int,heap) *funcdef_nos;
4455 /* Output deferred profile counters. */
4456 static void
4457 output_deferred_profile_counters (void)
4459 unsigned int i;
4460 int align, n;
4462 if (VEC_empty (int, funcdef_nos))
4463 return;
4465 switch_to_section (data_section);
4466 align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4467 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4469 for (i = 0; VEC_iterate (int, funcdef_nos, i, n); i++)
4471 targetm.asm_out.internal_label (asm_out_file, "LP", n);
4472 assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4475 VEC_free (int, heap, funcdef_nos);
4478 void
4479 hppa_profile_hook (int label_no)
4481 /* We use SImode for the address of the function in both 32 and
4482 64-bit code to avoid having to provide DImode versions of the
4483 lcla2 and load_offset_label_address insn patterns. */
4484 rtx reg = gen_reg_rtx (SImode);
4485 rtx label_rtx = gen_label_rtx ();
4486 rtx begin_label_rtx, call_insn;
4487 char begin_label_name[16];
4489 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4490 label_no);
4491 begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4493 if (TARGET_64BIT)
4494 emit_move_insn (arg_pointer_rtx,
4495 gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
4496 GEN_INT (64)));
4498 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4500 /* The address of the function is loaded into %r25 with an instruction-
4501 relative sequence that avoids the use of relocations. The sequence
4502 is split so that the load_offset_label_address instruction can
4503 occupy the delay slot of the call to _mcount. */
4504 if (TARGET_PA_20)
4505 emit_insn (gen_lcla2 (reg, label_rtx));
4506 else
4507 emit_insn (gen_lcla1 (reg, label_rtx));
4509 emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25),
4510 reg, begin_label_rtx, label_rtx));
4512 #if !NO_DEFERRED_PROFILE_COUNTERS
4514 rtx count_label_rtx, addr, r24;
4515 char count_label_name[16];
4517 VEC_safe_push (int, heap, funcdef_nos, label_no);
4518 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4519 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name));
4521 addr = force_reg (Pmode, count_label_rtx);
4522 r24 = gen_rtx_REG (Pmode, 24);
4523 emit_move_insn (r24, addr);
4525 call_insn =
4526 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4527 gen_rtx_SYMBOL_REF (Pmode,
4528 "_mcount")),
4529 GEN_INT (TARGET_64BIT ? 24 : 12)));
4531 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4533 #else
4535 call_insn =
4536 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4537 gen_rtx_SYMBOL_REF (Pmode,
4538 "_mcount")),
4539 GEN_INT (TARGET_64BIT ? 16 : 8)));
4541 #endif
4543 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4544 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4546 /* Indicate the _mcount call cannot throw, nor will it execute a
4547 non-local goto. */
4548 make_reg_eh_region_note_nothrow_nononlocal (call_insn);
4551 /* Fetch the return address for the frame COUNT steps up from
4552 the current frame, after the prologue. FRAMEADDR is the
4553 frame pointer of the COUNT frame.
4555 We want to ignore any export stub remnants here. To handle this,
4556 we examine the code at the return address, and if it is an export
4557 stub, we return a memory rtx for the stub return address stored
4558 at frame-24.
4560 The value returned is used in two different ways:
4562 1. To find a function's caller.
4564 2. To change the return address for a function.
4566 This function handles most instances of case 1; however, it will
4567 fail if there are two levels of stubs to execute on the return
4568 path. The only way I believe that can happen is if the return value
4569 needs a parameter relocation, which never happens for C code.
4571 This function handles most instances of case 2; however, it will
4572 fail if we did not originally have stub code on the return path
4573 but will need stub code on the new return path. This can happen if
4574 the caller & callee are both in the main program, but the new
4575 return location is in a shared library. */
4578 return_addr_rtx (int count, rtx frameaddr)
4580 rtx label;
4581 rtx rp;
4582 rtx saved_rp;
4583 rtx ins;
4585 /* Instruction stream at the normal return address for the export stub:
4587 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4588 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4589 0x00011820 | stub+16: mtsp r1,sr0
4590 0xe0400002 | stub+20: be,n 0(sr0,rp)
4592 0xe0400002 must be specified as -532676606 so that it won't be
4593 rejected as an invalid immediate operand on 64-bit hosts. */
4595 HOST_WIDE_INT insns[4] = {0x4bc23fd1, 0x004010a1, 0x00011820, -532676606};
4596 int i;
4598 if (count != 0)
4599 return NULL_RTX;
4601 rp = get_hard_reg_initial_val (Pmode, 2);
4603 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4604 return rp;
4606 /* If there is no export stub then just use the value saved from
4607 the return pointer register. */
4609 saved_rp = gen_reg_rtx (Pmode);
4610 emit_move_insn (saved_rp, rp);
4612 /* Get pointer to the instruction stream. We have to mask out the
4613 privilege level from the two low order bits of the return address
4614 pointer here so that ins will point to the start of the first
4615 instruction that would have been executed if we returned. */
4616 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4617 label = gen_label_rtx ();
4619 /* Check the instruction stream at the normal return address for the
4620 export stub. If it is an export stub, than our return address is
4621 really in -24[frameaddr]. */
4623 for (i = 0; i < 3; i++)
4625 rtx op0 = gen_rtx_MEM (SImode, plus_constant (ins, i * 4));
4626 rtx op1 = GEN_INT (insns[i]);
4627 emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label);
4630 /* Here we know that our return address points to an export
4631 stub. We don't want to return the address of the export stub,
4632 but rather the return address of the export stub. That return
4633 address is stored at -24[frameaddr]. */
4635 emit_move_insn (saved_rp,
4636 gen_rtx_MEM (Pmode,
4637 memory_address (Pmode,
4638 plus_constant (frameaddr,
4639 -24))));
4641 emit_label (label);
4643 return saved_rp;
4646 void
4647 emit_bcond_fp (rtx operands[])
4649 enum rtx_code code = GET_CODE (operands[0]);
4650 rtx operand0 = operands[1];
4651 rtx operand1 = operands[2];
4652 rtx label = operands[3];
4654 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0),
4655 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1)));
4657 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4658 gen_rtx_IF_THEN_ELSE (VOIDmode,
4659 gen_rtx_fmt_ee (NE,
4660 VOIDmode,
4661 gen_rtx_REG (CCFPmode, 0),
4662 const0_rtx),
4663 gen_rtx_LABEL_REF (VOIDmode, label),
4664 pc_rtx)));
4668 /* Adjust the cost of a scheduling dependency. Return the new cost of
4669 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4671 static int
4672 pa_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
4674 enum attr_type attr_type;
4676 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4677 true dependencies as they are described with bypasses now. */
4678 if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0)
4679 return cost;
4681 if (! recog_memoized (insn))
4682 return 0;
4684 attr_type = get_attr_type (insn);
4686 switch (REG_NOTE_KIND (link))
4688 case REG_DEP_ANTI:
4689 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4690 cycles later. */
4692 if (attr_type == TYPE_FPLOAD)
4694 rtx pat = PATTERN (insn);
4695 rtx dep_pat = PATTERN (dep_insn);
4696 if (GET_CODE (pat) == PARALLEL)
4698 /* This happens for the fldXs,mb patterns. */
4699 pat = XVECEXP (pat, 0, 0);
4701 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4702 /* If this happens, we have to extend this to schedule
4703 optimally. Return 0 for now. */
4704 return 0;
4706 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4708 if (! recog_memoized (dep_insn))
4709 return 0;
4710 switch (get_attr_type (dep_insn))
4712 case TYPE_FPALU:
4713 case TYPE_FPMULSGL:
4714 case TYPE_FPMULDBL:
4715 case TYPE_FPDIVSGL:
4716 case TYPE_FPDIVDBL:
4717 case TYPE_FPSQRTSGL:
4718 case TYPE_FPSQRTDBL:
4719 /* A fpload can't be issued until one cycle before a
4720 preceding arithmetic operation has finished if
4721 the target of the fpload is any of the sources
4722 (or destination) of the arithmetic operation. */
4723 return insn_default_latency (dep_insn) - 1;
4725 default:
4726 return 0;
4730 else if (attr_type == TYPE_FPALU)
4732 rtx pat = PATTERN (insn);
4733 rtx dep_pat = PATTERN (dep_insn);
4734 if (GET_CODE (pat) == PARALLEL)
4736 /* This happens for the fldXs,mb patterns. */
4737 pat = XVECEXP (pat, 0, 0);
4739 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4740 /* If this happens, we have to extend this to schedule
4741 optimally. Return 0 for now. */
4742 return 0;
4744 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4746 if (! recog_memoized (dep_insn))
4747 return 0;
4748 switch (get_attr_type (dep_insn))
4750 case TYPE_FPDIVSGL:
4751 case TYPE_FPDIVDBL:
4752 case TYPE_FPSQRTSGL:
4753 case TYPE_FPSQRTDBL:
4754 /* An ALU flop can't be issued until two cycles before a
4755 preceding divide or sqrt operation has finished if
4756 the target of the ALU flop is any of the sources
4757 (or destination) of the divide or sqrt operation. */
4758 return insn_default_latency (dep_insn) - 2;
4760 default:
4761 return 0;
4766 /* For other anti dependencies, the cost is 0. */
4767 return 0;
4769 case REG_DEP_OUTPUT:
4770 /* Output dependency; DEP_INSN writes a register that INSN writes some
4771 cycles later. */
4772 if (attr_type == TYPE_FPLOAD)
4774 rtx pat = PATTERN (insn);
4775 rtx dep_pat = PATTERN (dep_insn);
4776 if (GET_CODE (pat) == PARALLEL)
4778 /* This happens for the fldXs,mb patterns. */
4779 pat = XVECEXP (pat, 0, 0);
4781 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4782 /* If this happens, we have to extend this to schedule
4783 optimally. Return 0 for now. */
4784 return 0;
4786 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4788 if (! recog_memoized (dep_insn))
4789 return 0;
4790 switch (get_attr_type (dep_insn))
4792 case TYPE_FPALU:
4793 case TYPE_FPMULSGL:
4794 case TYPE_FPMULDBL:
4795 case TYPE_FPDIVSGL:
4796 case TYPE_FPDIVDBL:
4797 case TYPE_FPSQRTSGL:
4798 case TYPE_FPSQRTDBL:
4799 /* A fpload can't be issued until one cycle before a
4800 preceding arithmetic operation has finished if
4801 the target of the fpload is the destination of the
4802 arithmetic operation.
4804 Exception: For PA7100LC, PA7200 and PA7300, the cost
4805 is 3 cycles, unless they bundle together. We also
4806 pay the penalty if the second insn is a fpload. */
4807 return insn_default_latency (dep_insn) - 1;
4809 default:
4810 return 0;
4814 else if (attr_type == TYPE_FPALU)
4816 rtx pat = PATTERN (insn);
4817 rtx dep_pat = PATTERN (dep_insn);
4818 if (GET_CODE (pat) == PARALLEL)
4820 /* This happens for the fldXs,mb patterns. */
4821 pat = XVECEXP (pat, 0, 0);
4823 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4824 /* If this happens, we have to extend this to schedule
4825 optimally. Return 0 for now. */
4826 return 0;
4828 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4830 if (! recog_memoized (dep_insn))
4831 return 0;
4832 switch (get_attr_type (dep_insn))
4834 case TYPE_FPDIVSGL:
4835 case TYPE_FPDIVDBL:
4836 case TYPE_FPSQRTSGL:
4837 case TYPE_FPSQRTDBL:
4838 /* An ALU flop can't be issued until two cycles before a
4839 preceding divide or sqrt operation has finished if
4840 the target of the ALU flop is also the target of
4841 the divide or sqrt operation. */
4842 return insn_default_latency (dep_insn) - 2;
4844 default:
4845 return 0;
4850 /* For other output dependencies, the cost is 0. */
4851 return 0;
4853 default:
4854 gcc_unreachable ();
4858 /* Adjust scheduling priorities. We use this to try and keep addil
4859 and the next use of %r1 close together. */
4860 static int
4861 pa_adjust_priority (rtx insn, int priority)
4863 rtx set = single_set (insn);
4864 rtx src, dest;
4865 if (set)
4867 src = SET_SRC (set);
4868 dest = SET_DEST (set);
4869 if (GET_CODE (src) == LO_SUM
4870 && symbolic_operand (XEXP (src, 1), VOIDmode)
4871 && ! read_only_operand (XEXP (src, 1), VOIDmode))
4872 priority >>= 3;
4874 else if (GET_CODE (src) == MEM
4875 && GET_CODE (XEXP (src, 0)) == LO_SUM
4876 && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
4877 && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
4878 priority >>= 1;
4880 else if (GET_CODE (dest) == MEM
4881 && GET_CODE (XEXP (dest, 0)) == LO_SUM
4882 && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
4883 && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
4884 priority >>= 3;
4886 return priority;
4889 /* The 700 can only issue a single insn at a time.
4890 The 7XXX processors can issue two insns at a time.
4891 The 8000 can issue 4 insns at a time. */
4892 static int
4893 pa_issue_rate (void)
4895 switch (pa_cpu)
4897 case PROCESSOR_700: return 1;
4898 case PROCESSOR_7100: return 2;
4899 case PROCESSOR_7100LC: return 2;
4900 case PROCESSOR_7200: return 2;
4901 case PROCESSOR_7300: return 2;
4902 case PROCESSOR_8000: return 4;
4904 default:
4905 gcc_unreachable ();
4911 /* Return any length adjustment needed by INSN which already has its length
4912 computed as LENGTH. Return zero if no adjustment is necessary.
4914 For the PA: function calls, millicode calls, and backwards short
4915 conditional branches with unfilled delay slots need an adjustment by +1
4916 (to account for the NOP which will be inserted into the instruction stream).
4918 Also compute the length of an inline block move here as it is too
4919 complicated to express as a length attribute in pa.md. */
4921 pa_adjust_insn_length (rtx insn, int length)
4923 rtx pat = PATTERN (insn);
4925 /* Jumps inside switch tables which have unfilled delay slots need
4926 adjustment. */
4927 if (GET_CODE (insn) == JUMP_INSN
4928 && GET_CODE (pat) == PARALLEL
4929 && get_attr_type (insn) == TYPE_BTABLE_BRANCH)
4930 return 4;
4931 /* Millicode insn with an unfilled delay slot. */
4932 else if (GET_CODE (insn) == INSN
4933 && GET_CODE (pat) != SEQUENCE
4934 && GET_CODE (pat) != USE
4935 && GET_CODE (pat) != CLOBBER
4936 && get_attr_type (insn) == TYPE_MILLI)
4937 return 4;
4938 /* Block move pattern. */
4939 else if (GET_CODE (insn) == INSN
4940 && GET_CODE (pat) == PARALLEL
4941 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4942 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4943 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
4944 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
4945 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
4946 return compute_movmem_length (insn) - 4;
4947 /* Block clear pattern. */
4948 else if (GET_CODE (insn) == INSN
4949 && GET_CODE (pat) == PARALLEL
4950 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4951 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4952 && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
4953 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
4954 return compute_clrmem_length (insn) - 4;
4955 /* Conditional branch with an unfilled delay slot. */
4956 else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
4958 /* Adjust a short backwards conditional with an unfilled delay slot. */
4959 if (GET_CODE (pat) == SET
4960 && length == 4
4961 && JUMP_LABEL (insn) != NULL_RTX
4962 && ! forward_branch_p (insn))
4963 return 4;
4964 else if (GET_CODE (pat) == PARALLEL
4965 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
4966 && length == 4)
4967 return 4;
4968 /* Adjust dbra insn with short backwards conditional branch with
4969 unfilled delay slot -- only for case where counter is in a
4970 general register register. */
4971 else if (GET_CODE (pat) == PARALLEL
4972 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
4973 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
4974 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
4975 && length == 4
4976 && ! forward_branch_p (insn))
4977 return 4;
4978 else
4979 return 0;
4981 return 0;
4984 /* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook. */
4986 static bool
4987 pa_print_operand_punct_valid_p (unsigned char code)
4989 if (code == '@'
4990 || code == '#'
4991 || code == '*'
4992 || code == '^')
4993 return true;
4995 return false;
4998 /* Print operand X (an rtx) in assembler syntax to file FILE.
4999 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
5000 For `%' followed by punctuation, CODE is the punctuation and X is null. */
5002 void
5003 print_operand (FILE *file, rtx x, int code)
5005 switch (code)
5007 case '#':
5008 /* Output a 'nop' if there's nothing for the delay slot. */
5009 if (dbr_sequence_length () == 0)
5010 fputs ("\n\tnop", file);
5011 return;
5012 case '*':
5013 /* Output a nullification completer if there's nothing for the */
5014 /* delay slot or nullification is requested. */
5015 if (dbr_sequence_length () == 0 ||
5016 (final_sequence &&
5017 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
5018 fputs (",n", file);
5019 return;
5020 case 'R':
5021 /* Print out the second register name of a register pair.
5022 I.e., R (6) => 7. */
5023 fputs (reg_names[REGNO (x) + 1], file);
5024 return;
5025 case 'r':
5026 /* A register or zero. */
5027 if (x == const0_rtx
5028 || (x == CONST0_RTX (DFmode))
5029 || (x == CONST0_RTX (SFmode)))
5031 fputs ("%r0", file);
5032 return;
5034 else
5035 break;
5036 case 'f':
5037 /* A register or zero (floating point). */
5038 if (x == const0_rtx
5039 || (x == CONST0_RTX (DFmode))
5040 || (x == CONST0_RTX (SFmode)))
5042 fputs ("%fr0", file);
5043 return;
5045 else
5046 break;
5047 case 'A':
5049 rtx xoperands[2];
5051 xoperands[0] = XEXP (XEXP (x, 0), 0);
5052 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
5053 output_global_address (file, xoperands[1], 0);
5054 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
5055 return;
5058 case 'C': /* Plain (C)ondition */
5059 case 'X':
5060 switch (GET_CODE (x))
5062 case EQ:
5063 fputs ("=", file); break;
5064 case NE:
5065 fputs ("<>", file); break;
5066 case GT:
5067 fputs (">", file); break;
5068 case GE:
5069 fputs (">=", file); break;
5070 case GEU:
5071 fputs (">>=", file); break;
5072 case GTU:
5073 fputs (">>", file); break;
5074 case LT:
5075 fputs ("<", file); break;
5076 case LE:
5077 fputs ("<=", file); break;
5078 case LEU:
5079 fputs ("<<=", file); break;
5080 case LTU:
5081 fputs ("<<", file); break;
5082 default:
5083 gcc_unreachable ();
5085 return;
5086 case 'N': /* Condition, (N)egated */
5087 switch (GET_CODE (x))
5089 case EQ:
5090 fputs ("<>", file); break;
5091 case NE:
5092 fputs ("=", file); break;
5093 case GT:
5094 fputs ("<=", file); break;
5095 case GE:
5096 fputs ("<", file); break;
5097 case GEU:
5098 fputs ("<<", file); break;
5099 case GTU:
5100 fputs ("<<=", file); break;
5101 case LT:
5102 fputs (">=", file); break;
5103 case LE:
5104 fputs (">", file); break;
5105 case LEU:
5106 fputs (">>", file); break;
5107 case LTU:
5108 fputs (">>=", file); break;
5109 default:
5110 gcc_unreachable ();
5112 return;
5113 /* For floating point comparisons. Note that the output
5114 predicates are the complement of the desired mode. The
5115 conditions for GT, GE, LT, LE and LTGT cause an invalid
5116 operation exception if the result is unordered and this
5117 exception is enabled in the floating-point status register. */
5118 case 'Y':
5119 switch (GET_CODE (x))
5121 case EQ:
5122 fputs ("!=", file); break;
5123 case NE:
5124 fputs ("=", file); break;
5125 case GT:
5126 fputs ("!>", file); break;
5127 case GE:
5128 fputs ("!>=", file); break;
5129 case LT:
5130 fputs ("!<", file); break;
5131 case LE:
5132 fputs ("!<=", file); break;
5133 case LTGT:
5134 fputs ("!<>", file); break;
5135 case UNLE:
5136 fputs ("!?<=", file); break;
5137 case UNLT:
5138 fputs ("!?<", file); break;
5139 case UNGE:
5140 fputs ("!?>=", file); break;
5141 case UNGT:
5142 fputs ("!?>", file); break;
5143 case UNEQ:
5144 fputs ("!?=", file); break;
5145 case UNORDERED:
5146 fputs ("!?", file); break;
5147 case ORDERED:
5148 fputs ("?", file); break;
5149 default:
5150 gcc_unreachable ();
5152 return;
5153 case 'S': /* Condition, operands are (S)wapped. */
5154 switch (GET_CODE (x))
5156 case EQ:
5157 fputs ("=", file); break;
5158 case NE:
5159 fputs ("<>", file); break;
5160 case GT:
5161 fputs ("<", file); break;
5162 case GE:
5163 fputs ("<=", file); break;
5164 case GEU:
5165 fputs ("<<=", file); break;
5166 case GTU:
5167 fputs ("<<", file); break;
5168 case LT:
5169 fputs (">", file); break;
5170 case LE:
5171 fputs (">=", file); break;
5172 case LEU:
5173 fputs (">>=", file); break;
5174 case LTU:
5175 fputs (">>", file); break;
5176 default:
5177 gcc_unreachable ();
5179 return;
5180 case 'B': /* Condition, (B)oth swapped and negate. */
5181 switch (GET_CODE (x))
5183 case EQ:
5184 fputs ("<>", file); break;
5185 case NE:
5186 fputs ("=", file); break;
5187 case GT:
5188 fputs (">=", file); break;
5189 case GE:
5190 fputs (">", file); break;
5191 case GEU:
5192 fputs (">>", file); break;
5193 case GTU:
5194 fputs (">>=", file); break;
5195 case LT:
5196 fputs ("<=", file); break;
5197 case LE:
5198 fputs ("<", file); break;
5199 case LEU:
5200 fputs ("<<", file); break;
5201 case LTU:
5202 fputs ("<<=", file); break;
5203 default:
5204 gcc_unreachable ();
5206 return;
5207 case 'k':
5208 gcc_assert (GET_CODE (x) == CONST_INT);
5209 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
5210 return;
5211 case 'Q':
5212 gcc_assert (GET_CODE (x) == CONST_INT);
5213 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
5214 return;
5215 case 'L':
5216 gcc_assert (GET_CODE (x) == CONST_INT);
5217 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
5218 return;
5219 case 'O':
5220 gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
5221 fprintf (file, "%d", exact_log2 (INTVAL (x)));
5222 return;
5223 case 'p':
5224 gcc_assert (GET_CODE (x) == CONST_INT);
5225 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
5226 return;
5227 case 'P':
5228 gcc_assert (GET_CODE (x) == CONST_INT);
5229 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
5230 return;
5231 case 'I':
5232 if (GET_CODE (x) == CONST_INT)
5233 fputs ("i", file);
5234 return;
5235 case 'M':
5236 case 'F':
5237 switch (GET_CODE (XEXP (x, 0)))
5239 case PRE_DEC:
5240 case PRE_INC:
5241 if (ASSEMBLER_DIALECT == 0)
5242 fputs ("s,mb", file);
5243 else
5244 fputs (",mb", file);
5245 break;
5246 case POST_DEC:
5247 case POST_INC:
5248 if (ASSEMBLER_DIALECT == 0)
5249 fputs ("s,ma", file);
5250 else
5251 fputs (",ma", file);
5252 break;
5253 case PLUS:
5254 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5255 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5257 if (ASSEMBLER_DIALECT == 0)
5258 fputs ("x", file);
5260 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5261 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5263 if (ASSEMBLER_DIALECT == 0)
5264 fputs ("x,s", file);
5265 else
5266 fputs (",s", file);
5268 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
5269 fputs ("s", file);
5270 break;
5271 default:
5272 if (code == 'F' && ASSEMBLER_DIALECT == 0)
5273 fputs ("s", file);
5274 break;
5276 return;
5277 case 'G':
5278 output_global_address (file, x, 0);
5279 return;
5280 case 'H':
5281 output_global_address (file, x, 1);
5282 return;
5283 case 0: /* Don't do anything special */
5284 break;
5285 case 'Z':
5287 unsigned op[3];
5288 compute_zdepwi_operands (INTVAL (x), op);
5289 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5290 return;
5292 case 'z':
5294 unsigned op[3];
5295 compute_zdepdi_operands (INTVAL (x), op);
5296 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5297 return;
5299 case 'c':
5300 /* We can get here from a .vtable_inherit due to our
5301 CONSTANT_ADDRESS_P rejecting perfectly good constant
5302 addresses. */
5303 break;
5304 default:
5305 gcc_unreachable ();
5307 if (GET_CODE (x) == REG)
5309 fputs (reg_names [REGNO (x)], file);
5310 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5312 fputs ("R", file);
5313 return;
5315 if (FP_REG_P (x)
5316 && GET_MODE_SIZE (GET_MODE (x)) <= 4
5317 && (REGNO (x) & 1) == 0)
5318 fputs ("L", file);
5320 else if (GET_CODE (x) == MEM)
5322 int size = GET_MODE_SIZE (GET_MODE (x));
5323 rtx base = NULL_RTX;
5324 switch (GET_CODE (XEXP (x, 0)))
5326 case PRE_DEC:
5327 case POST_DEC:
5328 base = XEXP (XEXP (x, 0), 0);
5329 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5330 break;
5331 case PRE_INC:
5332 case POST_INC:
5333 base = XEXP (XEXP (x, 0), 0);
5334 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5335 break;
5336 case PLUS:
5337 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5338 fprintf (file, "%s(%s)",
5339 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5340 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5341 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5342 fprintf (file, "%s(%s)",
5343 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5344 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5345 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5346 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5348 /* Because the REG_POINTER flag can get lost during reload,
5349 GO_IF_LEGITIMATE_ADDRESS canonicalizes the order of the
5350 index and base registers in the combined move patterns. */
5351 rtx base = XEXP (XEXP (x, 0), 1);
5352 rtx index = XEXP (XEXP (x, 0), 0);
5354 fprintf (file, "%s(%s)",
5355 reg_names [REGNO (index)], reg_names [REGNO (base)]);
5357 else
5358 output_address (XEXP (x, 0));
5359 break;
5360 default:
5361 output_address (XEXP (x, 0));
5362 break;
5365 else
5366 output_addr_const (file, x);
5369 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
5371 void
5372 output_global_address (FILE *file, rtx x, int round_constant)
5375 /* Imagine (high (const (plus ...))). */
5376 if (GET_CODE (x) == HIGH)
5377 x = XEXP (x, 0);
5379 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5380 output_addr_const (file, x);
5381 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5383 output_addr_const (file, x);
5384 fputs ("-$global$", file);
5386 else if (GET_CODE (x) == CONST)
5388 const char *sep = "";
5389 int offset = 0; /* assembler wants -$global$ at end */
5390 rtx base = NULL_RTX;
5392 switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
5394 case SYMBOL_REF:
5395 base = XEXP (XEXP (x, 0), 0);
5396 output_addr_const (file, base);
5397 break;
5398 case CONST_INT:
5399 offset = INTVAL (XEXP (XEXP (x, 0), 0));
5400 break;
5401 default:
5402 gcc_unreachable ();
5405 switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
5407 case SYMBOL_REF:
5408 base = XEXP (XEXP (x, 0), 1);
5409 output_addr_const (file, base);
5410 break;
5411 case CONST_INT:
5412 offset = INTVAL (XEXP (XEXP (x, 0), 1));
5413 break;
5414 default:
5415 gcc_unreachable ();
5418 /* How bogus. The compiler is apparently responsible for
5419 rounding the constant if it uses an LR field selector.
5421 The linker and/or assembler seem a better place since
5422 they have to do this kind of thing already.
5424 If we fail to do this, HP's optimizing linker may eliminate
5425 an addil, but not update the ldw/stw/ldo instruction that
5426 uses the result of the addil. */
5427 if (round_constant)
5428 offset = ((offset + 0x1000) & ~0x1fff);
5430 switch (GET_CODE (XEXP (x, 0)))
5432 case PLUS:
5433 if (offset < 0)
5435 offset = -offset;
5436 sep = "-";
5438 else
5439 sep = "+";
5440 break;
5442 case MINUS:
5443 gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5444 sep = "-";
5445 break;
5447 default:
5448 gcc_unreachable ();
5451 if (!read_only_operand (base, VOIDmode) && !flag_pic)
5452 fputs ("-$global$", file);
5453 if (offset)
5454 fprintf (file, "%s%d", sep, offset);
5456 else
5457 output_addr_const (file, x);
5460 /* Output boilerplate text to appear at the beginning of the file.
5461 There are several possible versions. */
5462 #define aputs(x) fputs(x, asm_out_file)
5463 static inline void
5464 pa_file_start_level (void)
5466 if (TARGET_64BIT)
5467 aputs ("\t.LEVEL 2.0w\n");
5468 else if (TARGET_PA_20)
5469 aputs ("\t.LEVEL 2.0\n");
5470 else if (TARGET_PA_11)
5471 aputs ("\t.LEVEL 1.1\n");
5472 else
5473 aputs ("\t.LEVEL 1.0\n");
5476 static inline void
5477 pa_file_start_space (int sortspace)
5479 aputs ("\t.SPACE $PRIVATE$");
5480 if (sortspace)
5481 aputs (",SORT=16");
5482 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31"
5483 "\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5484 "\n\t.SPACE $TEXT$");
5485 if (sortspace)
5486 aputs (",SORT=8");
5487 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5488 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5491 static inline void
5492 pa_file_start_file (int want_version)
5494 if (write_symbols != NO_DEBUG)
5496 output_file_directive (asm_out_file, main_input_filename);
5497 if (want_version)
5498 aputs ("\t.version\t\"01.01\"\n");
5502 static inline void
5503 pa_file_start_mcount (const char *aswhat)
5505 if (profile_flag)
5506 fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5509 static void
5510 pa_elf_file_start (void)
5512 pa_file_start_level ();
5513 pa_file_start_mcount ("ENTRY");
5514 pa_file_start_file (0);
5517 static void
5518 pa_som_file_start (void)
5520 pa_file_start_level ();
5521 pa_file_start_space (0);
5522 aputs ("\t.IMPORT $global$,DATA\n"
5523 "\t.IMPORT $$dyncall,MILLICODE\n");
5524 pa_file_start_mcount ("CODE");
5525 pa_file_start_file (0);
5528 static void
5529 pa_linux_file_start (void)
5531 pa_file_start_file (1);
5532 pa_file_start_level ();
5533 pa_file_start_mcount ("CODE");
5536 static void
5537 pa_hpux64_gas_file_start (void)
5539 pa_file_start_level ();
5540 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5541 if (profile_flag)
5542 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5543 #endif
5544 pa_file_start_file (1);
5547 static void
5548 pa_hpux64_hpas_file_start (void)
5550 pa_file_start_level ();
5551 pa_file_start_space (1);
5552 pa_file_start_mcount ("CODE");
5553 pa_file_start_file (0);
5555 #undef aputs
5557 /* Search the deferred plabel list for SYMBOL and return its internal
5558 label. If an entry for SYMBOL is not found, a new entry is created. */
5561 get_deferred_plabel (rtx symbol)
5563 const char *fname = XSTR (symbol, 0);
5564 size_t i;
5566 /* See if we have already put this function on the list of deferred
5567 plabels. This list is generally small, so a liner search is not
5568 too ugly. If it proves too slow replace it with something faster. */
5569 for (i = 0; i < n_deferred_plabels; i++)
5570 if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
5571 break;
5573 /* If the deferred plabel list is empty, or this entry was not found
5574 on the list, create a new entry on the list. */
5575 if (deferred_plabels == NULL || i == n_deferred_plabels)
5577 tree id;
5579 if (deferred_plabels == 0)
5580 deferred_plabels = ggc_alloc_deferred_plabel ();
5581 else
5582 deferred_plabels = GGC_RESIZEVEC (struct deferred_plabel,
5583 deferred_plabels,
5584 n_deferred_plabels + 1);
5586 i = n_deferred_plabels++;
5587 deferred_plabels[i].internal_label = gen_label_rtx ();
5588 deferred_plabels[i].symbol = symbol;
5590 /* Gross. We have just implicitly taken the address of this
5591 function. Mark it in the same manner as assemble_name. */
5592 id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5593 if (id)
5594 mark_referenced (id);
5597 return deferred_plabels[i].internal_label;
5600 static void
5601 output_deferred_plabels (void)
5603 size_t i;
5605 /* If we have some deferred plabels, then we need to switch into the
5606 data or readonly data section, and align it to a 4 byte boundary
5607 before outputting the deferred plabels. */
5608 if (n_deferred_plabels)
5610 switch_to_section (flag_pic ? data_section : readonly_data_section);
5611 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5614 /* Now output the deferred plabels. */
5615 for (i = 0; i < n_deferred_plabels; i++)
5617 targetm.asm_out.internal_label (asm_out_file, "L",
5618 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5619 assemble_integer (deferred_plabels[i].symbol,
5620 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5624 #if HPUX_LONG_DOUBLE_LIBRARY
5625 /* Initialize optabs to point to HPUX long double emulation routines. */
5626 static void
5627 pa_hpux_init_libfuncs (void)
5629 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5630 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5631 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5632 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5633 set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5634 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5635 set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5636 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5637 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5639 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5640 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5641 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5642 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5643 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5644 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5645 set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5647 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5648 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5649 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5650 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5652 set_conv_libfunc (sfix_optab, SImode, TFmode, TARGET_64BIT
5653 ? "__U_Qfcnvfxt_quad_to_sgl"
5654 : "_U_Qfcnvfxt_quad_to_sgl");
5655 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
5656 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_usgl");
5657 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_udbl");
5659 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
5660 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
5661 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_U_Qfcnvxf_usgl_to_quad");
5662 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxf_udbl_to_quad");
5664 #endif
5666 /* HP's millicode routines mean something special to the assembler.
5667 Keep track of which ones we have used. */
5669 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5670 static void import_milli (enum millicodes);
5671 static char imported[(int) end1000];
5672 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5673 static const char import_string[] = ".IMPORT $$....,MILLICODE";
5674 #define MILLI_START 10
5676 static void
5677 import_milli (enum millicodes code)
5679 char str[sizeof (import_string)];
5681 if (!imported[(int) code])
5683 imported[(int) code] = 1;
5684 strcpy (str, import_string);
5685 strncpy (str + MILLI_START, milli_names[(int) code], 4);
5686 output_asm_insn (str, 0);
5690 /* The register constraints have put the operands and return value in
5691 the proper registers. */
5693 const char *
5694 output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx insn)
5696 import_milli (mulI);
5697 return output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5700 /* Emit the rtl for doing a division by a constant. */
5702 /* Do magic division millicodes exist for this value? */
5703 const int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5705 /* We'll use an array to keep track of the magic millicodes and
5706 whether or not we've used them already. [n][0] is signed, [n][1] is
5707 unsigned. */
5709 static int div_milli[16][2];
5712 emit_hpdiv_const (rtx *operands, int unsignedp)
5714 if (GET_CODE (operands[2]) == CONST_INT
5715 && INTVAL (operands[2]) > 0
5716 && INTVAL (operands[2]) < 16
5717 && magic_milli[INTVAL (operands[2])])
5719 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5721 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
5722 emit
5723 (gen_rtx_PARALLEL
5724 (VOIDmode,
5725 gen_rtvec (6, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29),
5726 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5727 SImode,
5728 gen_rtx_REG (SImode, 26),
5729 operands[2])),
5730 gen_rtx_CLOBBER (VOIDmode, operands[4]),
5731 gen_rtx_CLOBBER (VOIDmode, operands[3]),
5732 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5733 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
5734 gen_rtx_CLOBBER (VOIDmode, ret))));
5735 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
5736 return 1;
5738 return 0;
5741 const char *
5742 output_div_insn (rtx *operands, int unsignedp, rtx insn)
5744 int divisor;
5746 /* If the divisor is a constant, try to use one of the special
5747 opcodes .*/
5748 if (GET_CODE (operands[0]) == CONST_INT)
5750 static char buf[100];
5751 divisor = INTVAL (operands[0]);
5752 if (!div_milli[divisor][unsignedp])
5754 div_milli[divisor][unsignedp] = 1;
5755 if (unsignedp)
5756 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
5757 else
5758 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
5760 if (unsignedp)
5762 sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
5763 INTVAL (operands[0]));
5764 return output_millicode_call (insn,
5765 gen_rtx_SYMBOL_REF (SImode, buf));
5767 else
5769 sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
5770 INTVAL (operands[0]));
5771 return output_millicode_call (insn,
5772 gen_rtx_SYMBOL_REF (SImode, buf));
5775 /* Divisor isn't a special constant. */
5776 else
5778 if (unsignedp)
5780 import_milli (divU);
5781 return output_millicode_call (insn,
5782 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
5784 else
5786 import_milli (divI);
5787 return output_millicode_call (insn,
5788 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
5793 /* Output a $$rem millicode to do mod. */
5795 const char *
5796 output_mod_insn (int unsignedp, rtx insn)
5798 if (unsignedp)
5800 import_milli (remU);
5801 return output_millicode_call (insn,
5802 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
5804 else
5806 import_milli (remI);
5807 return output_millicode_call (insn,
5808 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
5812 void
5813 output_arg_descriptor (rtx call_insn)
5815 const char *arg_regs[4];
5816 enum machine_mode arg_mode;
5817 rtx link;
5818 int i, output_flag = 0;
5819 int regno;
5821 /* We neither need nor want argument location descriptors for the
5822 64bit runtime environment or the ELF32 environment. */
5823 if (TARGET_64BIT || TARGET_ELF32)
5824 return;
5826 for (i = 0; i < 4; i++)
5827 arg_regs[i] = 0;
5829 /* Specify explicitly that no argument relocations should take place
5830 if using the portable runtime calling conventions. */
5831 if (TARGET_PORTABLE_RUNTIME)
5833 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5834 asm_out_file);
5835 return;
5838 gcc_assert (GET_CODE (call_insn) == CALL_INSN);
5839 for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
5840 link; link = XEXP (link, 1))
5842 rtx use = XEXP (link, 0);
5844 if (! (GET_CODE (use) == USE
5845 && GET_CODE (XEXP (use, 0)) == REG
5846 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5847 continue;
5849 arg_mode = GET_MODE (XEXP (use, 0));
5850 regno = REGNO (XEXP (use, 0));
5851 if (regno >= 23 && regno <= 26)
5853 arg_regs[26 - regno] = "GR";
5854 if (arg_mode == DImode)
5855 arg_regs[25 - regno] = "GR";
5857 else if (regno >= 32 && regno <= 39)
5859 if (arg_mode == SFmode)
5860 arg_regs[(regno - 32) / 2] = "FR";
5861 else
5863 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
5864 arg_regs[(regno - 34) / 2] = "FR";
5865 arg_regs[(regno - 34) / 2 + 1] = "FU";
5866 #else
5867 arg_regs[(regno - 34) / 2] = "FU";
5868 arg_regs[(regno - 34) / 2 + 1] = "FR";
5869 #endif
5873 fputs ("\t.CALL ", asm_out_file);
5874 for (i = 0; i < 4; i++)
5876 if (arg_regs[i])
5878 if (output_flag++)
5879 fputc (',', asm_out_file);
5880 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
5883 fputc ('\n', asm_out_file);
5886 static reg_class_t
5887 pa_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
5888 enum machine_mode mode, secondary_reload_info *sri)
5890 int regno;
5891 enum reg_class rclass = (enum reg_class) rclass_i;
5893 /* Handle the easy stuff first. */
5894 if (rclass == R1_REGS)
5895 return NO_REGS;
5897 if (REG_P (x))
5899 regno = REGNO (x);
5900 if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
5901 return NO_REGS;
5903 else
5904 regno = -1;
5906 /* If we have something like (mem (mem (...)), we can safely assume the
5907 inner MEM will end up in a general register after reloading, so there's
5908 no need for a secondary reload. */
5909 if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
5910 return NO_REGS;
5912 /* Trying to load a constant into a FP register during PIC code
5913 generation requires %r1 as a scratch register. */
5914 if (flag_pic
5915 && (mode == SImode || mode == DImode)
5916 && FP_REG_CLASS_P (rclass)
5917 && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
5919 sri->icode = (mode == SImode ? CODE_FOR_reload_insi_r1
5920 : CODE_FOR_reload_indi_r1);
5921 return NO_REGS;
5924 /* Secondary reloads of symbolic operands require %r1 as a scratch
5925 register when we're generating PIC code and when the operand isn't
5926 readonly. */
5927 if (symbolic_expression_p (x))
5929 if (GET_CODE (x) == HIGH)
5930 x = XEXP (x, 0);
5932 if (flag_pic || !read_only_operand (x, VOIDmode))
5934 gcc_assert (mode == SImode || mode == DImode);
5935 sri->icode = (mode == SImode ? CODE_FOR_reload_insi_r1
5936 : CODE_FOR_reload_indi_r1);
5937 return NO_REGS;
5941 /* Profiling showed the PA port spends about 1.3% of its compilation
5942 time in true_regnum from calls inside pa_secondary_reload_class. */
5943 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
5944 regno = true_regnum (x);
5946 /* In order to allow 14-bit displacements in integer loads and stores,
5947 we need to prevent reload from generating out of range integer mode
5948 loads and stores to the floating point registers. Previously, we
5949 used to call for a secondary reload and have emit_move_sequence()
5950 fix the instruction sequence. However, reload occasionally wouldn't
5951 generate the reload and we would end up with an invalid REG+D memory
5952 address. So, now we use an intermediate general register for most
5953 memory loads and stores. */
5954 if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
5955 && GET_MODE_CLASS (mode) == MODE_INT
5956 && FP_REG_CLASS_P (rclass))
5958 /* Reload passes (mem:SI (reg/f:DI 30 %r30) when it wants to check
5959 the secondary reload needed for a pseudo. It never passes a
5960 REG+D address. */
5961 if (GET_CODE (x) == MEM)
5963 x = XEXP (x, 0);
5965 /* We don't need an intermediate for indexed and LO_SUM DLT
5966 memory addresses. When INT14_OK_STRICT is true, it might
5967 appear that we could directly allow register indirect
5968 memory addresses. However, this doesn't work because we
5969 don't support SUBREGs in floating-point register copies
5970 and reload doesn't tell us when it's going to use a SUBREG. */
5971 if (IS_INDEX_ADDR_P (x)
5972 || IS_LO_SUM_DLT_ADDR_P (x))
5973 return NO_REGS;
5975 /* Otherwise, we need an intermediate general register. */
5976 return GENERAL_REGS;
5979 /* Request a secondary reload with a general scratch register
5980 for everthing else. ??? Could symbolic operands be handled
5981 directly when generating non-pic PA 2.0 code? */
5982 sri->icode = (in_p
5983 ? direct_optab_handler (reload_in_optab, mode)
5984 : direct_optab_handler (reload_out_optab, mode));
5985 return NO_REGS;
5988 /* We need a secondary register (GPR) for copies between the SAR
5989 and anything other than a general register. */
5990 if (rclass == SHIFT_REGS && (regno <= 0 || regno >= 32))
5992 sri->icode = (in_p
5993 ? direct_optab_handler (reload_in_optab, mode)
5994 : direct_optab_handler (reload_out_optab, mode));
5995 return NO_REGS;
5998 /* A SAR<->FP register copy requires a secondary register (GPR) as
5999 well as secondary memory. */
6000 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
6001 && (REGNO_REG_CLASS (regno) == SHIFT_REGS
6002 && FP_REG_CLASS_P (rclass)))
6003 sri->icode = (in_p
6004 ? direct_optab_handler (reload_in_optab, mode)
6005 : direct_optab_handler (reload_out_optab, mode));
6007 return NO_REGS;
6010 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY. The argument pointer
6011 is only marked as live on entry by df-scan when it is a fixed
6012 register. It isn't a fixed register in the 64-bit runtime,
6013 so we need to mark it here. */
6015 static void
6016 pa_extra_live_on_entry (bitmap regs)
6018 if (TARGET_64BIT)
6019 bitmap_set_bit (regs, ARG_POINTER_REGNUM);
6022 /* Implement EH_RETURN_HANDLER_RTX. The MEM needs to be volatile
6023 to prevent it from being deleted. */
6026 pa_eh_return_handler_rtx (void)
6028 rtx tmp;
6030 tmp = gen_rtx_PLUS (word_mode, hard_frame_pointer_rtx,
6031 TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20));
6032 tmp = gen_rtx_MEM (word_mode, tmp);
6033 tmp->volatil = 1;
6034 return tmp;
6037 /* In the 32-bit runtime, arguments larger than eight bytes are passed
6038 by invisible reference. As a GCC extension, we also pass anything
6039 with a zero or variable size by reference.
6041 The 64-bit runtime does not describe passing any types by invisible
6042 reference. The internals of GCC can't currently handle passing
6043 empty structures, and zero or variable length arrays when they are
6044 not passed entirely on the stack or by reference. Thus, as a GCC
6045 extension, we pass these types by reference. The HP compiler doesn't
6046 support these types, so hopefully there shouldn't be any compatibility
6047 issues. This may have to be revisited when HP releases a C99 compiler
6048 or updates the ABI. */
6050 static bool
6051 pa_pass_by_reference (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED,
6052 enum machine_mode mode, const_tree type,
6053 bool named ATTRIBUTE_UNUSED)
6055 HOST_WIDE_INT size;
6057 if (type)
6058 size = int_size_in_bytes (type);
6059 else
6060 size = GET_MODE_SIZE (mode);
6062 if (TARGET_64BIT)
6063 return size <= 0;
6064 else
6065 return size <= 0 || size > 8;
6068 enum direction
6069 function_arg_padding (enum machine_mode mode, const_tree type)
6071 if (mode == BLKmode
6072 || (TARGET_64BIT
6073 && type
6074 && (AGGREGATE_TYPE_P (type)
6075 || TREE_CODE (type) == COMPLEX_TYPE
6076 || TREE_CODE (type) == VECTOR_TYPE)))
6078 /* Return none if justification is not required. */
6079 if (type
6080 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
6081 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
6082 return none;
6084 /* The directions set here are ignored when a BLKmode argument larger
6085 than a word is placed in a register. Different code is used for
6086 the stack and registers. This makes it difficult to have a
6087 consistent data representation for both the stack and registers.
6088 For both runtimes, the justification and padding for arguments on
6089 the stack and in registers should be identical. */
6090 if (TARGET_64BIT)
6091 /* The 64-bit runtime specifies left justification for aggregates. */
6092 return upward;
6093 else
6094 /* The 32-bit runtime architecture specifies right justification.
6095 When the argument is passed on the stack, the argument is padded
6096 with garbage on the left. The HP compiler pads with zeros. */
6097 return downward;
6100 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
6101 return downward;
6102 else
6103 return none;
6107 /* Do what is necessary for `va_start'. We look at the current function
6108 to determine if stdargs or varargs is used and fill in an initial
6109 va_list. A pointer to this constructor is returned. */
6111 static rtx
6112 hppa_builtin_saveregs (void)
6114 rtx offset, dest;
6115 tree fntype = TREE_TYPE (current_function_decl);
6116 int argadj = ((!stdarg_p (fntype))
6117 ? UNITS_PER_WORD : 0);
6119 if (argadj)
6120 offset = plus_constant (crtl->args.arg_offset_rtx, argadj);
6121 else
6122 offset = crtl->args.arg_offset_rtx;
6124 if (TARGET_64BIT)
6126 int i, off;
6128 /* Adjust for varargs/stdarg differences. */
6129 if (argadj)
6130 offset = plus_constant (crtl->args.arg_offset_rtx, -argadj);
6131 else
6132 offset = crtl->args.arg_offset_rtx;
6134 /* We need to save %r26 .. %r19 inclusive starting at offset -64
6135 from the incoming arg pointer and growing to larger addresses. */
6136 for (i = 26, off = -64; i >= 19; i--, off += 8)
6137 emit_move_insn (gen_rtx_MEM (word_mode,
6138 plus_constant (arg_pointer_rtx, off)),
6139 gen_rtx_REG (word_mode, i));
6141 /* The incoming args pointer points just beyond the flushback area;
6142 normally this is not a serious concern. However, when we are doing
6143 varargs/stdargs we want to make the arg pointer point to the start
6144 of the incoming argument area. */
6145 emit_move_insn (virtual_incoming_args_rtx,
6146 plus_constant (arg_pointer_rtx, -64));
6148 /* Now return a pointer to the first anonymous argument. */
6149 return copy_to_reg (expand_binop (Pmode, add_optab,
6150 virtual_incoming_args_rtx,
6151 offset, 0, 0, OPTAB_LIB_WIDEN));
6154 /* Store general registers on the stack. */
6155 dest = gen_rtx_MEM (BLKmode,
6156 plus_constant (crtl->args.internal_arg_pointer,
6157 -16));
6158 set_mem_alias_set (dest, get_varargs_alias_set ());
6159 set_mem_align (dest, BITS_PER_WORD);
6160 move_block_from_reg (23, dest, 4);
6162 /* move_block_from_reg will emit code to store the argument registers
6163 individually as scalar stores.
6165 However, other insns may later load from the same addresses for
6166 a structure load (passing a struct to a varargs routine).
6168 The alias code assumes that such aliasing can never happen, so we
6169 have to keep memory referencing insns from moving up beyond the
6170 last argument register store. So we emit a blockage insn here. */
6171 emit_insn (gen_blockage ());
6173 return copy_to_reg (expand_binop (Pmode, add_optab,
6174 crtl->args.internal_arg_pointer,
6175 offset, 0, 0, OPTAB_LIB_WIDEN));
6178 static void
6179 hppa_va_start (tree valist, rtx nextarg)
6181 nextarg = expand_builtin_saveregs ();
6182 std_expand_builtin_va_start (valist, nextarg);
6185 static tree
6186 hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6187 gimple_seq *post_p)
6189 if (TARGET_64BIT)
6191 /* Args grow upward. We can use the generic routines. */
6192 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6194 else /* !TARGET_64BIT */
6196 tree ptr = build_pointer_type (type);
6197 tree valist_type;
6198 tree t, u;
6199 unsigned int size, ofs;
6200 bool indirect;
6202 indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0);
6203 if (indirect)
6205 type = ptr;
6206 ptr = build_pointer_type (type);
6208 size = int_size_in_bytes (type);
6209 valist_type = TREE_TYPE (valist);
6211 /* Args grow down. Not handled by generic routines. */
6213 u = fold_convert (sizetype, size_in_bytes (type));
6214 u = fold_build1 (NEGATE_EXPR, sizetype, u);
6215 t = build2 (POINTER_PLUS_EXPR, valist_type, valist, u);
6217 /* Align to 4 or 8 byte boundary depending on argument size. */
6219 u = build_int_cst (TREE_TYPE (t), (HOST_WIDE_INT)(size > 4 ? -8 : -4));
6220 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u);
6221 t = fold_convert (valist_type, t);
6223 t = build2 (MODIFY_EXPR, valist_type, valist, t);
6225 ofs = (8 - size) % 4;
6226 if (ofs != 0)
6228 u = size_int (ofs);
6229 t = build2 (POINTER_PLUS_EXPR, valist_type, t, u);
6232 t = fold_convert (ptr, t);
6233 t = build_va_arg_indirect_ref (t);
6235 if (indirect)
6236 t = build_va_arg_indirect_ref (t);
6238 return t;
6242 /* True if MODE is valid for the target. By "valid", we mean able to
6243 be manipulated in non-trivial ways. In particular, this means all
6244 the arithmetic is supported.
6246 Currently, TImode is not valid as the HP 64-bit runtime documentation
6247 doesn't document the alignment and calling conventions for this type.
6248 Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
6249 2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE. */
6251 static bool
6252 pa_scalar_mode_supported_p (enum machine_mode mode)
6254 int precision = GET_MODE_PRECISION (mode);
6256 switch (GET_MODE_CLASS (mode))
6258 case MODE_PARTIAL_INT:
6259 case MODE_INT:
6260 if (precision == CHAR_TYPE_SIZE)
6261 return true;
6262 if (precision == SHORT_TYPE_SIZE)
6263 return true;
6264 if (precision == INT_TYPE_SIZE)
6265 return true;
6266 if (precision == LONG_TYPE_SIZE)
6267 return true;
6268 if (precision == LONG_LONG_TYPE_SIZE)
6269 return true;
6270 return false;
6272 case MODE_FLOAT:
6273 if (precision == FLOAT_TYPE_SIZE)
6274 return true;
6275 if (precision == DOUBLE_TYPE_SIZE)
6276 return true;
6277 if (precision == LONG_DOUBLE_TYPE_SIZE)
6278 return true;
6279 return false;
6281 case MODE_DECIMAL_FLOAT:
6282 return false;
6284 default:
6285 gcc_unreachable ();
6289 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
6290 it branches into the delay slot. Otherwise, return FALSE. */
6292 static bool
6293 branch_to_delay_slot_p (rtx insn)
6295 rtx jump_insn;
6297 if (dbr_sequence_length ())
6298 return FALSE;
6300 jump_insn = next_active_insn (JUMP_LABEL (insn));
6301 while (insn)
6303 insn = next_active_insn (insn);
6304 if (jump_insn == insn)
6305 return TRUE;
6307 /* We can't rely on the length of asms. So, we return FALSE when
6308 the branch is followed by an asm. */
6309 if (!insn
6310 || GET_CODE (PATTERN (insn)) == ASM_INPUT
6311 || extract_asm_operands (PATTERN (insn)) != NULL_RTX
6312 || get_attr_length (insn) > 0)
6313 break;
6316 return FALSE;
6319 /* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot.
6321 This occurs when INSN has an unfilled delay slot and is followed
6322 by an asm. Disaster can occur if the asm is empty and the jump
6323 branches into the delay slot. So, we add a nop in the delay slot
6324 when this occurs. */
6326 static bool
6327 branch_needs_nop_p (rtx insn)
6329 rtx jump_insn;
6331 if (dbr_sequence_length ())
6332 return FALSE;
6334 jump_insn = next_active_insn (JUMP_LABEL (insn));
6335 while (insn)
6337 insn = next_active_insn (insn);
6338 if (!insn || jump_insn == insn)
6339 return TRUE;
6341 if (!(GET_CODE (PATTERN (insn)) == ASM_INPUT
6342 || extract_asm_operands (PATTERN (insn)) != NULL_RTX)
6343 && get_attr_length (insn) > 0)
6344 break;
6347 return FALSE;
6350 /* Return TRUE if INSN, a forward jump insn, can use nullification
6351 to skip the following instruction. This avoids an extra cycle due
6352 to a mis-predicted branch when we fall through. */
6354 static bool
6355 use_skip_p (rtx insn)
6357 rtx jump_insn = next_active_insn (JUMP_LABEL (insn));
6359 while (insn)
6361 insn = next_active_insn (insn);
6363 /* We can't rely on the length of asms, so we can't skip asms. */
6364 if (!insn
6365 || GET_CODE (PATTERN (insn)) == ASM_INPUT
6366 || extract_asm_operands (PATTERN (insn)) != NULL_RTX)
6367 break;
6368 if (get_attr_length (insn) == 4
6369 && jump_insn == next_active_insn (insn))
6370 return TRUE;
6371 if (get_attr_length (insn) > 0)
6372 break;
6375 return FALSE;
6378 /* This routine handles all the normal conditional branch sequences we
6379 might need to generate. It handles compare immediate vs compare
6380 register, nullification of delay slots, varying length branches,
6381 negated branches, and all combinations of the above. It returns the
6382 output appropriate to emit the branch corresponding to all given
6383 parameters. */
6385 const char *
6386 output_cbranch (rtx *operands, int negated, rtx insn)
6388 static char buf[100];
6389 bool useskip;
6390 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6391 int length = get_attr_length (insn);
6392 int xdelay;
6394 /* A conditional branch to the following instruction (e.g. the delay slot)
6395 is asking for a disaster. This can happen when not optimizing and
6396 when jump optimization fails.
6398 While it is usually safe to emit nothing, this can fail if the
6399 preceding instruction is a nullified branch with an empty delay
6400 slot and the same branch target as this branch. We could check
6401 for this but jump optimization should eliminate nop jumps. It
6402 is always safe to emit a nop. */
6403 if (branch_to_delay_slot_p (insn))
6404 return "nop";
6406 /* The doubleword form of the cmpib instruction doesn't have the LEU
6407 and GTU conditions while the cmpb instruction does. Since we accept
6408 zero for cmpb, we must ensure that we use cmpb for the comparison. */
6409 if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6410 operands[2] = gen_rtx_REG (DImode, 0);
6411 if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
6412 operands[1] = gen_rtx_REG (DImode, 0);
6414 /* If this is a long branch with its delay slot unfilled, set `nullify'
6415 as it can nullify the delay slot and save a nop. */
6416 if (length == 8 && dbr_sequence_length () == 0)
6417 nullify = 1;
6419 /* If this is a short forward conditional branch which did not get
6420 its delay slot filled, the delay slot can still be nullified. */
6421 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6422 nullify = forward_branch_p (insn);
6424 /* A forward branch over a single nullified insn can be done with a
6425 comclr instruction. This avoids a single cycle penalty due to
6426 mis-predicted branch if we fall through (branch not taken). */
6427 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6429 switch (length)
6431 /* All short conditional branches except backwards with an unfilled
6432 delay slot. */
6433 case 4:
6434 if (useskip)
6435 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6436 else
6437 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6438 if (GET_MODE (operands[1]) == DImode)
6439 strcat (buf, "*");
6440 if (negated)
6441 strcat (buf, "%B3");
6442 else
6443 strcat (buf, "%S3");
6444 if (useskip)
6445 strcat (buf, " %2,%r1,%%r0");
6446 else if (nullify)
6448 if (branch_needs_nop_p (insn))
6449 strcat (buf, ",n %2,%r1,%0%#");
6450 else
6451 strcat (buf, ",n %2,%r1,%0");
6453 else
6454 strcat (buf, " %2,%r1,%0");
6455 break;
6457 /* All long conditionals. Note a short backward branch with an
6458 unfilled delay slot is treated just like a long backward branch
6459 with an unfilled delay slot. */
6460 case 8:
6461 /* Handle weird backwards branch with a filled delay slot
6462 which is nullified. */
6463 if (dbr_sequence_length () != 0
6464 && ! forward_branch_p (insn)
6465 && nullify)
6467 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6468 if (GET_MODE (operands[1]) == DImode)
6469 strcat (buf, "*");
6470 if (negated)
6471 strcat (buf, "%S3");
6472 else
6473 strcat (buf, "%B3");
6474 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6476 /* Handle short backwards branch with an unfilled delay slot.
6477 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6478 taken and untaken branches. */
6479 else if (dbr_sequence_length () == 0
6480 && ! forward_branch_p (insn)
6481 && INSN_ADDRESSES_SET_P ()
6482 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6483 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6485 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6486 if (GET_MODE (operands[1]) == DImode)
6487 strcat (buf, "*");
6488 if (negated)
6489 strcat (buf, "%B3 %2,%r1,%0%#");
6490 else
6491 strcat (buf, "%S3 %2,%r1,%0%#");
6493 else
6495 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6496 if (GET_MODE (operands[1]) == DImode)
6497 strcat (buf, "*");
6498 if (negated)
6499 strcat (buf, "%S3");
6500 else
6501 strcat (buf, "%B3");
6502 if (nullify)
6503 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6504 else
6505 strcat (buf, " %2,%r1,%%r0\n\tb %0");
6507 break;
6509 default:
6510 /* The reversed conditional branch must branch over one additional
6511 instruction if the delay slot is filled and needs to be extracted
6512 by output_lbranch. If the delay slot is empty or this is a
6513 nullified forward branch, the instruction after the reversed
6514 condition branch must be nullified. */
6515 if (dbr_sequence_length () == 0
6516 || (nullify && forward_branch_p (insn)))
6518 nullify = 1;
6519 xdelay = 0;
6520 operands[4] = GEN_INT (length);
6522 else
6524 xdelay = 1;
6525 operands[4] = GEN_INT (length + 4);
6528 /* Create a reversed conditional branch which branches around
6529 the following insns. */
6530 if (GET_MODE (operands[1]) != DImode)
6532 if (nullify)
6534 if (negated)
6535 strcpy (buf,
6536 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6537 else
6538 strcpy (buf,
6539 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6541 else
6543 if (negated)
6544 strcpy (buf,
6545 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6546 else
6547 strcpy (buf,
6548 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6551 else
6553 if (nullify)
6555 if (negated)
6556 strcpy (buf,
6557 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6558 else
6559 strcpy (buf,
6560 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6562 else
6564 if (negated)
6565 strcpy (buf,
6566 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6567 else
6568 strcpy (buf,
6569 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6573 output_asm_insn (buf, operands);
6574 return output_lbranch (operands[0], insn, xdelay);
6576 return buf;
6579 /* This routine handles output of long unconditional branches that
6580 exceed the maximum range of a simple branch instruction. Since
6581 we don't have a register available for the branch, we save register
6582 %r1 in the frame marker, load the branch destination DEST into %r1,
6583 execute the branch, and restore %r1 in the delay slot of the branch.
6585 Since long branches may have an insn in the delay slot and the
6586 delay slot is used to restore %r1, we in general need to extract
6587 this insn and execute it before the branch. However, to facilitate
6588 use of this function by conditional branches, we also provide an
6589 option to not extract the delay insn so that it will be emitted
6590 after the long branch. So, if there is an insn in the delay slot,
6591 it is extracted if XDELAY is nonzero.
6593 The lengths of the various long-branch sequences are 20, 16 and 24
6594 bytes for the portable runtime, non-PIC and PIC cases, respectively. */
6596 const char *
6597 output_lbranch (rtx dest, rtx insn, int xdelay)
6599 rtx xoperands[2];
6601 xoperands[0] = dest;
6603 /* First, free up the delay slot. */
6604 if (xdelay && dbr_sequence_length () != 0)
6606 /* We can't handle a jump in the delay slot. */
6607 gcc_assert (GET_CODE (NEXT_INSN (insn)) != JUMP_INSN);
6609 final_scan_insn (NEXT_INSN (insn), asm_out_file,
6610 optimize, 0, NULL);
6612 /* Now delete the delay insn. */
6613 SET_INSN_DELETED (NEXT_INSN (insn));
6616 /* Output an insn to save %r1. The runtime documentation doesn't
6617 specify whether the "Clean Up" slot in the callers frame can
6618 be clobbered by the callee. It isn't copied by HP's builtin
6619 alloca, so this suggests that it can be clobbered if necessary.
6620 The "Static Link" location is copied by HP builtin alloca, so
6621 we avoid using it. Using the cleanup slot might be a problem
6622 if we have to interoperate with languages that pass cleanup
6623 information. However, it should be possible to handle these
6624 situations with GCC's asm feature.
6626 The "Current RP" slot is reserved for the called procedure, so
6627 we try to use it when we don't have a frame of our own. It's
6628 rather unlikely that we won't have a frame when we need to emit
6629 a very long branch.
6631 Really the way to go long term is a register scavenger; goto
6632 the target of the jump and find a register which we can use
6633 as a scratch to hold the value in %r1. Then, we wouldn't have
6634 to free up the delay slot or clobber a slot that may be needed
6635 for other purposes. */
6636 if (TARGET_64BIT)
6638 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6639 /* Use the return pointer slot in the frame marker. */
6640 output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
6641 else
6642 /* Use the slot at -40 in the frame marker since HP builtin
6643 alloca doesn't copy it. */
6644 output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
6646 else
6648 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6649 /* Use the return pointer slot in the frame marker. */
6650 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
6651 else
6652 /* Use the "Clean Up" slot in the frame marker. In GCC,
6653 the only other use of this location is for copying a
6654 floating point double argument from a floating-point
6655 register to two general registers. The copy is done
6656 as an "atomic" operation when outputting a call, so it
6657 won't interfere with our using the location here. */
6658 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
6661 if (TARGET_PORTABLE_RUNTIME)
6663 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6664 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6665 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6667 else if (flag_pic)
6669 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6670 if (TARGET_SOM || !TARGET_GAS)
6672 xoperands[1] = gen_label_rtx ();
6673 output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands);
6674 targetm.asm_out.internal_label (asm_out_file, "L",
6675 CODE_LABEL_NUMBER (xoperands[1]));
6676 output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands);
6678 else
6680 output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands);
6681 output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
6683 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6685 else
6686 /* Now output a very long branch to the original target. */
6687 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
6689 /* Now restore the value of %r1 in the delay slot. */
6690 if (TARGET_64BIT)
6692 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6693 return "ldd -16(%%r30),%%r1";
6694 else
6695 return "ldd -40(%%r30),%%r1";
6697 else
6699 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6700 return "ldw -20(%%r30),%%r1";
6701 else
6702 return "ldw -12(%%r30),%%r1";
6706 /* This routine handles all the branch-on-bit conditional branch sequences we
6707 might need to generate. It handles nullification of delay slots,
6708 varying length branches, negated branches and all combinations of the
6709 above. it returns the appropriate output template to emit the branch. */
6711 const char *
6712 output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn, int which)
6714 static char buf[100];
6715 bool useskip;
6716 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6717 int length = get_attr_length (insn);
6718 int xdelay;
6720 /* A conditional branch to the following instruction (e.g. the delay slot) is
6721 asking for a disaster. I do not think this can happen as this pattern
6722 is only used when optimizing; jump optimization should eliminate the
6723 jump. But be prepared just in case. */
6725 if (branch_to_delay_slot_p (insn))
6726 return "nop";
6728 /* If this is a long branch with its delay slot unfilled, set `nullify'
6729 as it can nullify the delay slot and save a nop. */
6730 if (length == 8 && dbr_sequence_length () == 0)
6731 nullify = 1;
6733 /* If this is a short forward conditional branch which did not get
6734 its delay slot filled, the delay slot can still be nullified. */
6735 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6736 nullify = forward_branch_p (insn);
6738 /* A forward branch over a single nullified insn can be done with a
6739 extrs instruction. This avoids a single cycle penalty due to
6740 mis-predicted branch if we fall through (branch not taken). */
6741 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6743 switch (length)
6746 /* All short conditional branches except backwards with an unfilled
6747 delay slot. */
6748 case 4:
6749 if (useskip)
6750 strcpy (buf, "{extrs,|extrw,s,}");
6751 else
6752 strcpy (buf, "bb,");
6753 if (useskip && GET_MODE (operands[0]) == DImode)
6754 strcpy (buf, "extrd,s,*");
6755 else if (GET_MODE (operands[0]) == DImode)
6756 strcpy (buf, "bb,*");
6757 if ((which == 0 && negated)
6758 || (which == 1 && ! negated))
6759 strcat (buf, ">=");
6760 else
6761 strcat (buf, "<");
6762 if (useskip)
6763 strcat (buf, " %0,%1,1,%%r0");
6764 else if (nullify && negated)
6766 if (branch_needs_nop_p (insn))
6767 strcat (buf, ",n %0,%1,%3%#");
6768 else
6769 strcat (buf, ",n %0,%1,%3");
6771 else if (nullify && ! negated)
6773 if (branch_needs_nop_p (insn))
6774 strcat (buf, ",n %0,%1,%2%#");
6775 else
6776 strcat (buf, ",n %0,%1,%2");
6778 else if (! nullify && negated)
6779 strcat (buf, " %0,%1,%3");
6780 else if (! nullify && ! negated)
6781 strcat (buf, " %0,%1,%2");
6782 break;
6784 /* All long conditionals. Note a short backward branch with an
6785 unfilled delay slot is treated just like a long backward branch
6786 with an unfilled delay slot. */
6787 case 8:
6788 /* Handle weird backwards branch with a filled delay slot
6789 which is nullified. */
6790 if (dbr_sequence_length () != 0
6791 && ! forward_branch_p (insn)
6792 && nullify)
6794 strcpy (buf, "bb,");
6795 if (GET_MODE (operands[0]) == DImode)
6796 strcat (buf, "*");
6797 if ((which == 0 && negated)
6798 || (which == 1 && ! negated))
6799 strcat (buf, "<");
6800 else
6801 strcat (buf, ">=");
6802 if (negated)
6803 strcat (buf, ",n %0,%1,.+12\n\tb %3");
6804 else
6805 strcat (buf, ",n %0,%1,.+12\n\tb %2");
6807 /* Handle short backwards branch with an unfilled delay slot.
6808 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6809 taken and untaken branches. */
6810 else if (dbr_sequence_length () == 0
6811 && ! forward_branch_p (insn)
6812 && INSN_ADDRESSES_SET_P ()
6813 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6814 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6816 strcpy (buf, "bb,");
6817 if (GET_MODE (operands[0]) == DImode)
6818 strcat (buf, "*");
6819 if ((which == 0 && negated)
6820 || (which == 1 && ! negated))
6821 strcat (buf, ">=");
6822 else
6823 strcat (buf, "<");
6824 if (negated)
6825 strcat (buf, " %0,%1,%3%#");
6826 else
6827 strcat (buf, " %0,%1,%2%#");
6829 else
6831 if (GET_MODE (operands[0]) == DImode)
6832 strcpy (buf, "extrd,s,*");
6833 else
6834 strcpy (buf, "{extrs,|extrw,s,}");
6835 if ((which == 0 && negated)
6836 || (which == 1 && ! negated))
6837 strcat (buf, "<");
6838 else
6839 strcat (buf, ">=");
6840 if (nullify && negated)
6841 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
6842 else if (nullify && ! negated)
6843 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
6844 else if (negated)
6845 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
6846 else
6847 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
6849 break;
6851 default:
6852 /* The reversed conditional branch must branch over one additional
6853 instruction if the delay slot is filled and needs to be extracted
6854 by output_lbranch. If the delay slot is empty or this is a
6855 nullified forward branch, the instruction after the reversed
6856 condition branch must be nullified. */
6857 if (dbr_sequence_length () == 0
6858 || (nullify && forward_branch_p (insn)))
6860 nullify = 1;
6861 xdelay = 0;
6862 operands[4] = GEN_INT (length);
6864 else
6866 xdelay = 1;
6867 operands[4] = GEN_INT (length + 4);
6870 if (GET_MODE (operands[0]) == DImode)
6871 strcpy (buf, "bb,*");
6872 else
6873 strcpy (buf, "bb,");
6874 if ((which == 0 && negated)
6875 || (which == 1 && !negated))
6876 strcat (buf, "<");
6877 else
6878 strcat (buf, ">=");
6879 if (nullify)
6880 strcat (buf, ",n %0,%1,.+%4");
6881 else
6882 strcat (buf, " %0,%1,.+%4");
6883 output_asm_insn (buf, operands);
6884 return output_lbranch (negated ? operands[3] : operands[2],
6885 insn, xdelay);
6887 return buf;
6890 /* This routine handles all the branch-on-variable-bit conditional branch
6891 sequences we might need to generate. It handles nullification of delay
6892 slots, varying length branches, negated branches and all combinations
6893 of the above. it returns the appropriate output template to emit the
6894 branch. */
6896 const char *
6897 output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn, int which)
6899 static char buf[100];
6900 bool useskip;
6901 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6902 int length = get_attr_length (insn);
6903 int xdelay;
6905 /* A conditional branch to the following instruction (e.g. the delay slot) is
6906 asking for a disaster. I do not think this can happen as this pattern
6907 is only used when optimizing; jump optimization should eliminate the
6908 jump. But be prepared just in case. */
6910 if (branch_to_delay_slot_p (insn))
6911 return "nop";
6913 /* If this is a long branch with its delay slot unfilled, set `nullify'
6914 as it can nullify the delay slot and save a nop. */
6915 if (length == 8 && dbr_sequence_length () == 0)
6916 nullify = 1;
6918 /* If this is a short forward conditional branch which did not get
6919 its delay slot filled, the delay slot can still be nullified. */
6920 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6921 nullify = forward_branch_p (insn);
6923 /* A forward branch over a single nullified insn can be done with a
6924 extrs instruction. This avoids a single cycle penalty due to
6925 mis-predicted branch if we fall through (branch not taken). */
6926 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6928 switch (length)
6931 /* All short conditional branches except backwards with an unfilled
6932 delay slot. */
6933 case 4:
6934 if (useskip)
6935 strcpy (buf, "{vextrs,|extrw,s,}");
6936 else
6937 strcpy (buf, "{bvb,|bb,}");
6938 if (useskip && GET_MODE (operands[0]) == DImode)
6939 strcpy (buf, "extrd,s,*");
6940 else if (GET_MODE (operands[0]) == DImode)
6941 strcpy (buf, "bb,*");
6942 if ((which == 0 && negated)
6943 || (which == 1 && ! negated))
6944 strcat (buf, ">=");
6945 else
6946 strcat (buf, "<");
6947 if (useskip)
6948 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
6949 else if (nullify && negated)
6951 if (branch_needs_nop_p (insn))
6952 strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
6953 else
6954 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
6956 else if (nullify && ! negated)
6958 if (branch_needs_nop_p (insn))
6959 strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
6960 else
6961 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
6963 else if (! nullify && negated)
6964 strcat (buf, "{ %0,%3| %0,%%sar,%3}");
6965 else if (! nullify && ! negated)
6966 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
6967 break;
6969 /* All long conditionals. Note a short backward branch with an
6970 unfilled delay slot is treated just like a long backward branch
6971 with an unfilled delay slot. */
6972 case 8:
6973 /* Handle weird backwards branch with a filled delay slot
6974 which is nullified. */
6975 if (dbr_sequence_length () != 0
6976 && ! forward_branch_p (insn)
6977 && nullify)
6979 strcpy (buf, "{bvb,|bb,}");
6980 if (GET_MODE (operands[0]) == DImode)
6981 strcat (buf, "*");
6982 if ((which == 0 && negated)
6983 || (which == 1 && ! negated))
6984 strcat (buf, "<");
6985 else
6986 strcat (buf, ">=");
6987 if (negated)
6988 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
6989 else
6990 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
6992 /* Handle short backwards branch with an unfilled delay slot.
6993 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6994 taken and untaken branches. */
6995 else if (dbr_sequence_length () == 0
6996 && ! forward_branch_p (insn)
6997 && INSN_ADDRESSES_SET_P ()
6998 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6999 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7001 strcpy (buf, "{bvb,|bb,}");
7002 if (GET_MODE (operands[0]) == DImode)
7003 strcat (buf, "*");
7004 if ((which == 0 && negated)
7005 || (which == 1 && ! negated))
7006 strcat (buf, ">=");
7007 else
7008 strcat (buf, "<");
7009 if (negated)
7010 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
7011 else
7012 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
7014 else
7016 strcpy (buf, "{vextrs,|extrw,s,}");
7017 if (GET_MODE (operands[0]) == DImode)
7018 strcpy (buf, "extrd,s,*");
7019 if ((which == 0 && negated)
7020 || (which == 1 && ! negated))
7021 strcat (buf, "<");
7022 else
7023 strcat (buf, ">=");
7024 if (nullify && negated)
7025 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
7026 else if (nullify && ! negated)
7027 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
7028 else if (negated)
7029 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
7030 else
7031 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
7033 break;
7035 default:
7036 /* The reversed conditional branch must branch over one additional
7037 instruction if the delay slot is filled and needs to be extracted
7038 by output_lbranch. If the delay slot is empty or this is a
7039 nullified forward branch, the instruction after the reversed
7040 condition branch must be nullified. */
7041 if (dbr_sequence_length () == 0
7042 || (nullify && forward_branch_p (insn)))
7044 nullify = 1;
7045 xdelay = 0;
7046 operands[4] = GEN_INT (length);
7048 else
7050 xdelay = 1;
7051 operands[4] = GEN_INT (length + 4);
7054 if (GET_MODE (operands[0]) == DImode)
7055 strcpy (buf, "bb,*");
7056 else
7057 strcpy (buf, "{bvb,|bb,}");
7058 if ((which == 0 && negated)
7059 || (which == 1 && !negated))
7060 strcat (buf, "<");
7061 else
7062 strcat (buf, ">=");
7063 if (nullify)
7064 strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
7065 else
7066 strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
7067 output_asm_insn (buf, operands);
7068 return output_lbranch (negated ? operands[3] : operands[2],
7069 insn, xdelay);
7071 return buf;
7074 /* Return the output template for emitting a dbra type insn.
7076 Note it may perform some output operations on its own before
7077 returning the final output string. */
7078 const char *
7079 output_dbra (rtx *operands, rtx insn, int which_alternative)
7081 int length = get_attr_length (insn);
7083 /* A conditional branch to the following instruction (e.g. the delay slot) is
7084 asking for a disaster. Be prepared! */
7086 if (branch_to_delay_slot_p (insn))
7088 if (which_alternative == 0)
7089 return "ldo %1(%0),%0";
7090 else if (which_alternative == 1)
7092 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
7093 output_asm_insn ("ldw -16(%%r30),%4", operands);
7094 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7095 return "{fldws|fldw} -16(%%r30),%0";
7097 else
7099 output_asm_insn ("ldw %0,%4", operands);
7100 return "ldo %1(%4),%4\n\tstw %4,%0";
7104 if (which_alternative == 0)
7106 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7107 int xdelay;
7109 /* If this is a long branch with its delay slot unfilled, set `nullify'
7110 as it can nullify the delay slot and save a nop. */
7111 if (length == 8 && dbr_sequence_length () == 0)
7112 nullify = 1;
7114 /* If this is a short forward conditional branch which did not get
7115 its delay slot filled, the delay slot can still be nullified. */
7116 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7117 nullify = forward_branch_p (insn);
7119 switch (length)
7121 case 4:
7122 if (nullify)
7124 if (branch_needs_nop_p (insn))
7125 return "addib,%C2,n %1,%0,%3%#";
7126 else
7127 return "addib,%C2,n %1,%0,%3";
7129 else
7130 return "addib,%C2 %1,%0,%3";
7132 case 8:
7133 /* Handle weird backwards branch with a fulled delay slot
7134 which is nullified. */
7135 if (dbr_sequence_length () != 0
7136 && ! forward_branch_p (insn)
7137 && nullify)
7138 return "addib,%N2,n %1,%0,.+12\n\tb %3";
7139 /* Handle short backwards branch with an unfilled delay slot.
7140 Using a addb;nop rather than addi;bl saves 1 cycle for both
7141 taken and untaken branches. */
7142 else if (dbr_sequence_length () == 0
7143 && ! forward_branch_p (insn)
7144 && INSN_ADDRESSES_SET_P ()
7145 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7146 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7147 return "addib,%C2 %1,%0,%3%#";
7149 /* Handle normal cases. */
7150 if (nullify)
7151 return "addi,%N2 %1,%0,%0\n\tb,n %3";
7152 else
7153 return "addi,%N2 %1,%0,%0\n\tb %3";
7155 default:
7156 /* The reversed conditional branch must branch over one additional
7157 instruction if the delay slot is filled and needs to be extracted
7158 by output_lbranch. If the delay slot is empty or this is a
7159 nullified forward branch, the instruction after the reversed
7160 condition branch must be nullified. */
7161 if (dbr_sequence_length () == 0
7162 || (nullify && forward_branch_p (insn)))
7164 nullify = 1;
7165 xdelay = 0;
7166 operands[4] = GEN_INT (length);
7168 else
7170 xdelay = 1;
7171 operands[4] = GEN_INT (length + 4);
7174 if (nullify)
7175 output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
7176 else
7177 output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
7179 return output_lbranch (operands[3], insn, xdelay);
7183 /* Deal with gross reload from FP register case. */
7184 else if (which_alternative == 1)
7186 /* Move loop counter from FP register to MEM then into a GR,
7187 increment the GR, store the GR into MEM, and finally reload
7188 the FP register from MEM from within the branch's delay slot. */
7189 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
7190 operands);
7191 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7192 if (length == 24)
7193 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
7194 else if (length == 28)
7195 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7196 else
7198 operands[5] = GEN_INT (length - 16);
7199 output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
7200 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7201 return output_lbranch (operands[3], insn, 0);
7204 /* Deal with gross reload from memory case. */
7205 else
7207 /* Reload loop counter from memory, the store back to memory
7208 happens in the branch's delay slot. */
7209 output_asm_insn ("ldw %0,%4", operands);
7210 if (length == 12)
7211 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
7212 else if (length == 16)
7213 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
7214 else
7216 operands[5] = GEN_INT (length - 4);
7217 output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
7218 return output_lbranch (operands[3], insn, 0);
7223 /* Return the output template for emitting a movb type insn.
7225 Note it may perform some output operations on its own before
7226 returning the final output string. */
7227 const char *
7228 output_movb (rtx *operands, rtx insn, int which_alternative,
7229 int reverse_comparison)
7231 int length = get_attr_length (insn);
7233 /* A conditional branch to the following instruction (e.g. the delay slot) is
7234 asking for a disaster. Be prepared! */
7236 if (branch_to_delay_slot_p (insn))
7238 if (which_alternative == 0)
7239 return "copy %1,%0";
7240 else if (which_alternative == 1)
7242 output_asm_insn ("stw %1,-16(%%r30)", operands);
7243 return "{fldws|fldw} -16(%%r30),%0";
7245 else if (which_alternative == 2)
7246 return "stw %1,%0";
7247 else
7248 return "mtsar %r1";
7251 /* Support the second variant. */
7252 if (reverse_comparison)
7253 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
7255 if (which_alternative == 0)
7257 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7258 int xdelay;
7260 /* If this is a long branch with its delay slot unfilled, set `nullify'
7261 as it can nullify the delay slot and save a nop. */
7262 if (length == 8 && dbr_sequence_length () == 0)
7263 nullify = 1;
7265 /* If this is a short forward conditional branch which did not get
7266 its delay slot filled, the delay slot can still be nullified. */
7267 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7268 nullify = forward_branch_p (insn);
7270 switch (length)
7272 case 4:
7273 if (nullify)
7275 if (branch_needs_nop_p (insn))
7276 return "movb,%C2,n %1,%0,%3%#";
7277 else
7278 return "movb,%C2,n %1,%0,%3";
7280 else
7281 return "movb,%C2 %1,%0,%3";
7283 case 8:
7284 /* Handle weird backwards branch with a filled delay slot
7285 which is nullified. */
7286 if (dbr_sequence_length () != 0
7287 && ! forward_branch_p (insn)
7288 && nullify)
7289 return "movb,%N2,n %1,%0,.+12\n\tb %3";
7291 /* Handle short backwards branch with an unfilled delay slot.
7292 Using a movb;nop rather than or;bl saves 1 cycle for both
7293 taken and untaken branches. */
7294 else if (dbr_sequence_length () == 0
7295 && ! forward_branch_p (insn)
7296 && INSN_ADDRESSES_SET_P ()
7297 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7298 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7299 return "movb,%C2 %1,%0,%3%#";
7300 /* Handle normal cases. */
7301 if (nullify)
7302 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
7303 else
7304 return "or,%N2 %1,%%r0,%0\n\tb %3";
7306 default:
7307 /* The reversed conditional branch must branch over one additional
7308 instruction if the delay slot is filled and needs to be extracted
7309 by output_lbranch. If the delay slot is empty or this is a
7310 nullified forward branch, the instruction after the reversed
7311 condition branch must be nullified. */
7312 if (dbr_sequence_length () == 0
7313 || (nullify && forward_branch_p (insn)))
7315 nullify = 1;
7316 xdelay = 0;
7317 operands[4] = GEN_INT (length);
7319 else
7321 xdelay = 1;
7322 operands[4] = GEN_INT (length + 4);
7325 if (nullify)
7326 output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
7327 else
7328 output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
7330 return output_lbranch (operands[3], insn, xdelay);
7333 /* Deal with gross reload for FP destination register case. */
7334 else if (which_alternative == 1)
7336 /* Move source register to MEM, perform the branch test, then
7337 finally load the FP register from MEM from within the branch's
7338 delay slot. */
7339 output_asm_insn ("stw %1,-16(%%r30)", operands);
7340 if (length == 12)
7341 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7342 else if (length == 16)
7343 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7344 else
7346 operands[4] = GEN_INT (length - 4);
7347 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
7348 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7349 return output_lbranch (operands[3], insn, 0);
7352 /* Deal with gross reload from memory case. */
7353 else if (which_alternative == 2)
7355 /* Reload loop counter from memory, the store back to memory
7356 happens in the branch's delay slot. */
7357 if (length == 8)
7358 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7359 else if (length == 12)
7360 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7361 else
7363 operands[4] = GEN_INT (length);
7364 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7365 operands);
7366 return output_lbranch (operands[3], insn, 0);
7369 /* Handle SAR as a destination. */
7370 else
7372 if (length == 8)
7373 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7374 else if (length == 12)
7375 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7376 else
7378 operands[4] = GEN_INT (length);
7379 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7380 operands);
7381 return output_lbranch (operands[3], insn, 0);
7386 /* Copy any FP arguments in INSN into integer registers. */
7387 static void
7388 copy_fp_args (rtx insn)
7390 rtx link;
7391 rtx xoperands[2];
7393 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7395 int arg_mode, regno;
7396 rtx use = XEXP (link, 0);
7398 if (! (GET_CODE (use) == USE
7399 && GET_CODE (XEXP (use, 0)) == REG
7400 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7401 continue;
7403 arg_mode = GET_MODE (XEXP (use, 0));
7404 regno = REGNO (XEXP (use, 0));
7406 /* Is it a floating point register? */
7407 if (regno >= 32 && regno <= 39)
7409 /* Copy the FP register into an integer register via memory. */
7410 if (arg_mode == SFmode)
7412 xoperands[0] = XEXP (use, 0);
7413 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
7414 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
7415 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7417 else
7419 xoperands[0] = XEXP (use, 0);
7420 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
7421 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
7422 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
7423 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7429 /* Compute length of the FP argument copy sequence for INSN. */
7430 static int
7431 length_fp_args (rtx insn)
7433 int length = 0;
7434 rtx link;
7436 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7438 int arg_mode, regno;
7439 rtx use = XEXP (link, 0);
7441 if (! (GET_CODE (use) == USE
7442 && GET_CODE (XEXP (use, 0)) == REG
7443 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7444 continue;
7446 arg_mode = GET_MODE (XEXP (use, 0));
7447 regno = REGNO (XEXP (use, 0));
7449 /* Is it a floating point register? */
7450 if (regno >= 32 && regno <= 39)
7452 if (arg_mode == SFmode)
7453 length += 8;
7454 else
7455 length += 12;
7459 return length;
7462 /* Return the attribute length for the millicode call instruction INSN.
7463 The length must match the code generated by output_millicode_call.
7464 We include the delay slot in the returned length as it is better to
7465 over estimate the length than to under estimate it. */
7468 attr_length_millicode_call (rtx insn)
7470 unsigned long distance = -1;
7471 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7473 if (INSN_ADDRESSES_SET_P ())
7475 distance = (total + insn_current_reference_address (insn));
7476 if (distance < total)
7477 distance = -1;
7480 if (TARGET_64BIT)
7482 if (!TARGET_LONG_CALLS && distance < 7600000)
7483 return 8;
7485 return 20;
7487 else if (TARGET_PORTABLE_RUNTIME)
7488 return 24;
7489 else
7491 if (!TARGET_LONG_CALLS && distance < 240000)
7492 return 8;
7494 if (TARGET_LONG_ABS_CALL && !flag_pic)
7495 return 12;
7497 return 24;
7501 /* INSN is a function call. It may have an unconditional jump
7502 in its delay slot.
7504 CALL_DEST is the routine we are calling. */
7506 const char *
7507 output_millicode_call (rtx insn, rtx call_dest)
7509 int attr_length = get_attr_length (insn);
7510 int seq_length = dbr_sequence_length ();
7511 int distance;
7512 rtx seq_insn;
7513 rtx xoperands[3];
7515 xoperands[0] = call_dest;
7516 xoperands[2] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
7518 /* Handle the common case where we are sure that the branch will
7519 reach the beginning of the $CODE$ subspace. The within reach
7520 form of the $$sh_func_adrs call has a length of 28. Because
7521 it has an attribute type of multi, it never has a nonzero
7522 sequence length. The length of the $$sh_func_adrs is the same
7523 as certain out of reach PIC calls to other routines. */
7524 if (!TARGET_LONG_CALLS
7525 && ((seq_length == 0
7526 && (attr_length == 12
7527 || (attr_length == 28 && get_attr_type (insn) == TYPE_MULTI)))
7528 || (seq_length != 0 && attr_length == 8)))
7530 output_asm_insn ("{bl|b,l} %0,%2", xoperands);
7532 else
7534 if (TARGET_64BIT)
7536 /* It might seem that one insn could be saved by accessing
7537 the millicode function using the linkage table. However,
7538 this doesn't work in shared libraries and other dynamically
7539 loaded objects. Using a pc-relative sequence also avoids
7540 problems related to the implicit use of the gp register. */
7541 output_asm_insn ("b,l .+8,%%r1", xoperands);
7543 if (TARGET_GAS)
7545 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
7546 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
7548 else
7550 xoperands[1] = gen_label_rtx ();
7551 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7552 targetm.asm_out.internal_label (asm_out_file, "L",
7553 CODE_LABEL_NUMBER (xoperands[1]));
7554 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7557 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7559 else if (TARGET_PORTABLE_RUNTIME)
7561 /* Pure portable runtime doesn't allow be/ble; we also don't
7562 have PIC support in the assembler/linker, so this sequence
7563 is needed. */
7565 /* Get the address of our target into %r1. */
7566 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7567 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7569 /* Get our return address into %r31. */
7570 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7571 output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
7573 /* Jump to our target address in %r1. */
7574 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7576 else if (!flag_pic)
7578 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7579 if (TARGET_PA_20)
7580 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
7581 else
7582 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7584 else
7586 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7587 output_asm_insn ("addi 16,%%r1,%%r31", xoperands);
7589 if (TARGET_SOM || !TARGET_GAS)
7591 /* The HP assembler can generate relocations for the
7592 difference of two symbols. GAS can do this for a
7593 millicode symbol but not an arbitrary external
7594 symbol when generating SOM output. */
7595 xoperands[1] = gen_label_rtx ();
7596 targetm.asm_out.internal_label (asm_out_file, "L",
7597 CODE_LABEL_NUMBER (xoperands[1]));
7598 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7599 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7601 else
7603 output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands);
7604 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1",
7605 xoperands);
7608 /* Jump to our target address in %r1. */
7609 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7613 if (seq_length == 0)
7614 output_asm_insn ("nop", xoperands);
7616 /* We are done if there isn't a jump in the delay slot. */
7617 if (seq_length == 0 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7618 return "";
7620 /* This call has an unconditional jump in its delay slot. */
7621 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7623 /* See if the return address can be adjusted. Use the containing
7624 sequence insn's address. */
7625 if (INSN_ADDRESSES_SET_P ())
7627 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7628 distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7629 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7631 if (VAL_14_BITS_P (distance))
7633 xoperands[1] = gen_label_rtx ();
7634 output_asm_insn ("ldo %0-%1(%2),%2", xoperands);
7635 targetm.asm_out.internal_label (asm_out_file, "L",
7636 CODE_LABEL_NUMBER (xoperands[1]));
7638 else
7639 /* ??? This branch may not reach its target. */
7640 output_asm_insn ("nop\n\tb,n %0", xoperands);
7642 else
7643 /* ??? This branch may not reach its target. */
7644 output_asm_insn ("nop\n\tb,n %0", xoperands);
7646 /* Delete the jump. */
7647 SET_INSN_DELETED (NEXT_INSN (insn));
7649 return "";
7652 /* Return the attribute length of the call instruction INSN. The SIBCALL
7653 flag indicates whether INSN is a regular call or a sibling call. The
7654 length returned must be longer than the code actually generated by
7655 output_call. Since branch shortening is done before delay branch
7656 sequencing, there is no way to determine whether or not the delay
7657 slot will be filled during branch shortening. Even when the delay
7658 slot is filled, we may have to add a nop if the delay slot contains
7659 a branch that can't reach its target. Thus, we always have to include
7660 the delay slot in the length estimate. This used to be done in
7661 pa_adjust_insn_length but we do it here now as some sequences always
7662 fill the delay slot and we can save four bytes in the estimate for
7663 these sequences. */
7666 attr_length_call (rtx insn, int sibcall)
7668 int local_call;
7669 rtx call, call_dest;
7670 tree call_decl;
7671 int length = 0;
7672 rtx pat = PATTERN (insn);
7673 unsigned long distance = -1;
7675 gcc_assert (GET_CODE (insn) == CALL_INSN);
7677 if (INSN_ADDRESSES_SET_P ())
7679 unsigned long total;
7681 total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7682 distance = (total + insn_current_reference_address (insn));
7683 if (distance < total)
7684 distance = -1;
7687 gcc_assert (GET_CODE (pat) == PARALLEL);
7689 /* Get the call rtx. */
7690 call = XVECEXP (pat, 0, 0);
7691 if (GET_CODE (call) == SET)
7692 call = SET_SRC (call);
7694 gcc_assert (GET_CODE (call) == CALL);
7696 /* Determine if this is a local call. */
7697 call_dest = XEXP (XEXP (call, 0), 0);
7698 call_decl = SYMBOL_REF_DECL (call_dest);
7699 local_call = call_decl && targetm.binds_local_p (call_decl);
7701 /* pc-relative branch. */
7702 if (!TARGET_LONG_CALLS
7703 && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7704 || distance < 240000))
7705 length += 8;
7707 /* 64-bit plabel sequence. */
7708 else if (TARGET_64BIT && !local_call)
7709 length += sibcall ? 28 : 24;
7711 /* non-pic long absolute branch sequence. */
7712 else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7713 length += 12;
7715 /* long pc-relative branch sequence. */
7716 else if (TARGET_LONG_PIC_SDIFF_CALL
7717 || (TARGET_GAS && !TARGET_SOM
7718 && (TARGET_LONG_PIC_PCREL_CALL || local_call)))
7720 length += 20;
7722 if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7723 length += 8;
7726 /* 32-bit plabel sequence. */
7727 else
7729 length += 32;
7731 if (TARGET_SOM)
7732 length += length_fp_args (insn);
7734 if (flag_pic)
7735 length += 4;
7737 if (!TARGET_PA_20)
7739 if (!sibcall)
7740 length += 8;
7742 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7743 length += 8;
7747 return length;
7750 /* INSN is a function call. It may have an unconditional jump
7751 in its delay slot.
7753 CALL_DEST is the routine we are calling. */
7755 const char *
7756 output_call (rtx insn, rtx call_dest, int sibcall)
7758 int delay_insn_deleted = 0;
7759 int delay_slot_filled = 0;
7760 int seq_length = dbr_sequence_length ();
7761 tree call_decl = SYMBOL_REF_DECL (call_dest);
7762 int local_call = call_decl && targetm.binds_local_p (call_decl);
7763 rtx xoperands[2];
7765 xoperands[0] = call_dest;
7767 /* Handle the common case where we're sure that the branch will reach
7768 the beginning of the "$CODE$" subspace. This is the beginning of
7769 the current function if we are in a named section. */
7770 if (!TARGET_LONG_CALLS && attr_length_call (insn, sibcall) == 8)
7772 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
7773 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7775 else
7777 if (TARGET_64BIT && !local_call)
7779 /* ??? As far as I can tell, the HP linker doesn't support the
7780 long pc-relative sequence described in the 64-bit runtime
7781 architecture. So, we use a slightly longer indirect call. */
7782 xoperands[0] = get_deferred_plabel (call_dest);
7783 xoperands[1] = gen_label_rtx ();
7785 /* If this isn't a sibcall, we put the load of %r27 into the
7786 delay slot. We can't do this in a sibcall as we don't
7787 have a second call-clobbered scratch register available. */
7788 if (seq_length != 0
7789 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7790 && !sibcall)
7792 final_scan_insn (NEXT_INSN (insn), asm_out_file,
7793 optimize, 0, NULL);
7795 /* Now delete the delay insn. */
7796 SET_INSN_DELETED (NEXT_INSN (insn));
7797 delay_insn_deleted = 1;
7800 output_asm_insn ("addil LT'%0,%%r27", xoperands);
7801 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
7802 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
7804 if (sibcall)
7806 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7807 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands);
7808 output_asm_insn ("bve (%%r1)", xoperands);
7810 else
7812 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
7813 output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
7814 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7815 delay_slot_filled = 1;
7818 else
7820 int indirect_call = 0;
7822 /* Emit a long call. There are several different sequences
7823 of increasing length and complexity. In most cases,
7824 they don't allow an instruction in the delay slot. */
7825 if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7826 && !TARGET_LONG_PIC_SDIFF_CALL
7827 && !(TARGET_GAS && !TARGET_SOM
7828 && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7829 && !TARGET_64BIT)
7830 indirect_call = 1;
7832 if (seq_length != 0
7833 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7834 && !sibcall
7835 && (!TARGET_PA_20
7836 || indirect_call
7837 || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)))
7839 /* A non-jump insn in the delay slot. By definition we can
7840 emit this insn before the call (and in fact before argument
7841 relocating. */
7842 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
7843 NULL);
7845 /* Now delete the delay insn. */
7846 SET_INSN_DELETED (NEXT_INSN (insn));
7847 delay_insn_deleted = 1;
7850 if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7852 /* This is the best sequence for making long calls in
7853 non-pic code. Unfortunately, GNU ld doesn't provide
7854 the stub needed for external calls, and GAS's support
7855 for this with the SOM linker is buggy. It is safe
7856 to use this for local calls. */
7857 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7858 if (sibcall)
7859 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
7860 else
7862 if (TARGET_PA_20)
7863 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
7864 xoperands);
7865 else
7866 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7868 output_asm_insn ("copy %%r31,%%r2", xoperands);
7869 delay_slot_filled = 1;
7872 else
7874 if (TARGET_LONG_PIC_SDIFF_CALL)
7876 /* The HP assembler and linker can handle relocations
7877 for the difference of two symbols. The HP assembler
7878 recognizes the sequence as a pc-relative call and
7879 the linker provides stubs when needed. */
7880 xoperands[1] = gen_label_rtx ();
7881 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7882 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7883 targetm.asm_out.internal_label (asm_out_file, "L",
7884 CODE_LABEL_NUMBER (xoperands[1]));
7885 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7887 else if (TARGET_GAS && !TARGET_SOM
7888 && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7890 /* GAS currently can't generate the relocations that
7891 are needed for the SOM linker under HP-UX using this
7892 sequence. The GNU linker doesn't generate the stubs
7893 that are needed for external calls on TARGET_ELF32
7894 with this sequence. For now, we have to use a
7895 longer plabel sequence when using GAS. */
7896 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7897 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1",
7898 xoperands);
7899 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1",
7900 xoperands);
7902 else
7904 /* Emit a long plabel-based call sequence. This is
7905 essentially an inline implementation of $$dyncall.
7906 We don't actually try to call $$dyncall as this is
7907 as difficult as calling the function itself. */
7908 xoperands[0] = get_deferred_plabel (call_dest);
7909 xoperands[1] = gen_label_rtx ();
7911 /* Since the call is indirect, FP arguments in registers
7912 need to be copied to the general registers. Then, the
7913 argument relocation stub will copy them back. */
7914 if (TARGET_SOM)
7915 copy_fp_args (insn);
7917 if (flag_pic)
7919 output_asm_insn ("addil LT'%0,%%r19", xoperands);
7920 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
7921 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands);
7923 else
7925 output_asm_insn ("addil LR'%0-$global$,%%r27",
7926 xoperands);
7927 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
7928 xoperands);
7931 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands);
7932 output_asm_insn ("depi 0,31,2,%%r1", xoperands);
7933 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands);
7934 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands);
7936 if (!sibcall && !TARGET_PA_20)
7938 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
7939 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
7940 output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
7941 else
7942 output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
7946 if (TARGET_PA_20)
7948 if (sibcall)
7949 output_asm_insn ("bve (%%r1)", xoperands);
7950 else
7952 if (indirect_call)
7954 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7955 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
7956 delay_slot_filled = 1;
7958 else
7959 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7962 else
7964 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7965 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
7966 xoperands);
7968 if (sibcall)
7970 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
7971 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
7972 else
7973 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
7975 else
7977 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
7978 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
7979 else
7980 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
7982 if (indirect_call)
7983 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
7984 else
7985 output_asm_insn ("copy %%r31,%%r2", xoperands);
7986 delay_slot_filled = 1;
7993 if (!delay_slot_filled && (seq_length == 0 || delay_insn_deleted))
7994 output_asm_insn ("nop", xoperands);
7996 /* We are done if there isn't a jump in the delay slot. */
7997 if (seq_length == 0
7998 || delay_insn_deleted
7999 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
8000 return "";
8002 /* A sibcall should never have a branch in the delay slot. */
8003 gcc_assert (!sibcall);
8005 /* This call has an unconditional jump in its delay slot. */
8006 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
8008 if (!delay_slot_filled && INSN_ADDRESSES_SET_P ())
8010 /* See if the return address can be adjusted. Use the containing
8011 sequence insn's address. This would break the regular call/return@
8012 relationship assumed by the table based eh unwinder, so only do that
8013 if the call is not possibly throwing. */
8014 rtx seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
8015 int distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
8016 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
8018 if (VAL_14_BITS_P (distance)
8019 && !(can_throw_internal (insn) || can_throw_external (insn)))
8021 xoperands[1] = gen_label_rtx ();
8022 output_asm_insn ("ldo %0-%1(%%r2),%%r2", xoperands);
8023 targetm.asm_out.internal_label (asm_out_file, "L",
8024 CODE_LABEL_NUMBER (xoperands[1]));
8026 else
8027 output_asm_insn ("nop\n\tb,n %0", xoperands);
8029 else
8030 output_asm_insn ("b,n %0", xoperands);
8032 /* Delete the jump. */
8033 SET_INSN_DELETED (NEXT_INSN (insn));
8035 return "";
8038 /* Return the attribute length of the indirect call instruction INSN.
8039 The length must match the code generated by output_indirect call.
8040 The returned length includes the delay slot. Currently, the delay
8041 slot of an indirect call sequence is not exposed and it is used by
8042 the sequence itself. */
8045 attr_length_indirect_call (rtx insn)
8047 unsigned long distance = -1;
8048 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
8050 if (INSN_ADDRESSES_SET_P ())
8052 distance = (total + insn_current_reference_address (insn));
8053 if (distance < total)
8054 distance = -1;
8057 if (TARGET_64BIT)
8058 return 12;
8060 if (TARGET_FAST_INDIRECT_CALLS
8061 || (!TARGET_PORTABLE_RUNTIME
8062 && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
8063 || distance < 240000)))
8064 return 8;
8066 if (flag_pic)
8067 return 24;
8069 if (TARGET_PORTABLE_RUNTIME)
8070 return 20;
8072 /* Out of reach, can use ble. */
8073 return 12;
8076 const char *
8077 output_indirect_call (rtx insn, rtx call_dest)
8079 rtx xoperands[1];
8081 if (TARGET_64BIT)
8083 xoperands[0] = call_dest;
8084 output_asm_insn ("ldd 16(%0),%%r2", xoperands);
8085 output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands);
8086 return "";
8089 /* First the special case for kernels, level 0 systems, etc. */
8090 if (TARGET_FAST_INDIRECT_CALLS)
8091 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8093 /* Now the normal case -- we can reach $$dyncall directly or
8094 we're sure that we can get there via a long-branch stub.
8096 No need to check target flags as the length uniquely identifies
8097 the remaining cases. */
8098 if (attr_length_indirect_call (insn) == 8)
8100 /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
8101 $$dyncall. Since BLE uses %r31 as the link register, the 22-bit
8102 variant of the B,L instruction can't be used on the SOM target. */
8103 if (TARGET_PA_20 && !TARGET_SOM)
8104 return ".CALL\tARGW0=GR\n\tb,l $$dyncall,%%r2\n\tcopy %%r2,%%r31";
8105 else
8106 return ".CALL\tARGW0=GR\n\tbl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
8109 /* Long millicode call, but we are not generating PIC or portable runtime
8110 code. */
8111 if (attr_length_indirect_call (insn) == 12)
8112 return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
8114 /* Long millicode call for portable runtime. */
8115 if (attr_length_indirect_call (insn) == 20)
8116 return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)\n\tnop";
8118 /* We need a long PIC call to $$dyncall. */
8119 xoperands[0] = NULL_RTX;
8120 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
8121 if (TARGET_SOM || !TARGET_GAS)
8123 xoperands[0] = gen_label_rtx ();
8124 output_asm_insn ("addil L'$$dyncall-%0,%%r1", xoperands);
8125 targetm.asm_out.internal_label (asm_out_file, "L",
8126 CODE_LABEL_NUMBER (xoperands[0]));
8127 output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands);
8129 else
8131 output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r1", xoperands);
8132 output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
8133 xoperands);
8135 output_asm_insn ("blr %%r0,%%r2", xoperands);
8136 output_asm_insn ("bv,n %%r0(%%r1)\n\tnop", xoperands);
8137 return "";
8140 /* Return the total length of the save and restore instructions needed for
8141 the data linkage table pointer (i.e., the PIC register) across the call
8142 instruction INSN. No-return calls do not require a save and restore.
8143 In addition, we may be able to avoid the save and restore for calls
8144 within the same translation unit. */
8147 attr_length_save_restore_dltp (rtx insn)
8149 if (find_reg_note (insn, REG_NORETURN, NULL_RTX))
8150 return 0;
8152 return 8;
8155 /* In HPUX 8.0's shared library scheme, special relocations are needed
8156 for function labels if they might be passed to a function
8157 in a shared library (because shared libraries don't live in code
8158 space), and special magic is needed to construct their address. */
8160 void
8161 hppa_encode_label (rtx sym)
8163 const char *str = XSTR (sym, 0);
8164 int len = strlen (str) + 1;
8165 char *newstr, *p;
8167 p = newstr = XALLOCAVEC (char, len + 1);
8168 *p++ = '@';
8169 strcpy (p, str);
8171 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
8174 static void
8175 pa_encode_section_info (tree decl, rtx rtl, int first)
8177 int old_referenced = 0;
8179 if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF)
8180 old_referenced
8181 = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED;
8183 default_encode_section_info (decl, rtl, first);
8185 if (first && TEXT_SPACE_P (decl))
8187 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
8188 if (TREE_CODE (decl) == FUNCTION_DECL)
8189 hppa_encode_label (XEXP (rtl, 0));
8191 else if (old_referenced)
8192 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced;
8195 /* This is sort of inverse to pa_encode_section_info. */
8197 static const char *
8198 pa_strip_name_encoding (const char *str)
8200 str += (*str == '@');
8201 str += (*str == '*');
8202 return str;
8206 function_label_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8208 return GET_CODE (op) == SYMBOL_REF && FUNCTION_NAME_P (XSTR (op, 0));
8211 /* Returns 1 if OP is a function label involved in a simple addition
8212 with a constant. Used to keep certain patterns from matching
8213 during instruction combination. */
8215 is_function_label_plus_const (rtx op)
8217 /* Strip off any CONST. */
8218 if (GET_CODE (op) == CONST)
8219 op = XEXP (op, 0);
8221 return (GET_CODE (op) == PLUS
8222 && function_label_operand (XEXP (op, 0), Pmode)
8223 && GET_CODE (XEXP (op, 1)) == CONST_INT);
8226 /* Output assembly code for a thunk to FUNCTION. */
8228 static void
8229 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
8230 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
8231 tree function)
8233 static unsigned int current_thunk_number;
8234 int val_14 = VAL_14_BITS_P (delta);
8235 unsigned int old_last_address = last_address, nbytes = 0;
8236 char label[16];
8237 rtx xoperands[4];
8239 xoperands[0] = XEXP (DECL_RTL (function), 0);
8240 xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
8241 xoperands[2] = GEN_INT (delta);
8243 ASM_OUTPUT_LABEL (file, XSTR (xoperands[1], 0));
8244 fprintf (file, "\t.PROC\n\t.CALLINFO FRAME=0,NO_CALLS\n\t.ENTRY\n");
8246 /* Output the thunk. We know that the function is in the same
8247 translation unit (i.e., the same space) as the thunk, and that
8248 thunks are output after their method. Thus, we don't need an
8249 external branch to reach the function. With SOM and GAS,
8250 functions and thunks are effectively in different sections.
8251 Thus, we can always use a IA-relative branch and the linker
8252 will add a long branch stub if necessary.
8254 However, we have to be careful when generating PIC code on the
8255 SOM port to ensure that the sequence does not transfer to an
8256 import stub for the target function as this could clobber the
8257 return value saved at SP-24. This would also apply to the
8258 32-bit linux port if the multi-space model is implemented. */
8259 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8260 && !(flag_pic && TREE_PUBLIC (function))
8261 && (TARGET_GAS || last_address < 262132))
8262 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8263 && ((targetm.have_named_sections
8264 && DECL_SECTION_NAME (thunk_fndecl) != NULL
8265 /* The GNU 64-bit linker has rather poor stub management.
8266 So, we use a long branch from thunks that aren't in
8267 the same section as the target function. */
8268 && ((!TARGET_64BIT
8269 && (DECL_SECTION_NAME (thunk_fndecl)
8270 != DECL_SECTION_NAME (function)))
8271 || ((DECL_SECTION_NAME (thunk_fndecl)
8272 == DECL_SECTION_NAME (function))
8273 && last_address < 262132)))
8274 || (targetm.have_named_sections
8275 && DECL_SECTION_NAME (thunk_fndecl) == NULL
8276 && DECL_SECTION_NAME (function) == NULL
8277 && last_address < 262132)
8278 || (!targetm.have_named_sections && last_address < 262132))))
8280 if (!val_14)
8281 output_asm_insn ("addil L'%2,%%r26", xoperands);
8283 output_asm_insn ("b %0", xoperands);
8285 if (val_14)
8287 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8288 nbytes += 8;
8290 else
8292 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8293 nbytes += 12;
8296 else if (TARGET_64BIT)
8298 /* We only have one call-clobbered scratch register, so we can't
8299 make use of the delay slot if delta doesn't fit in 14 bits. */
8300 if (!val_14)
8302 output_asm_insn ("addil L'%2,%%r26", xoperands);
8303 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8306 output_asm_insn ("b,l .+8,%%r1", xoperands);
8308 if (TARGET_GAS)
8310 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
8311 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
8313 else
8315 xoperands[3] = GEN_INT (val_14 ? 8 : 16);
8316 output_asm_insn ("addil L'%0-%1-%3,%%r1", xoperands);
8319 if (val_14)
8321 output_asm_insn ("bv %%r0(%%r1)", xoperands);
8322 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8323 nbytes += 20;
8325 else
8327 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8328 nbytes += 24;
8331 else if (TARGET_PORTABLE_RUNTIME)
8333 output_asm_insn ("ldil L'%0,%%r1", xoperands);
8334 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
8336 if (!val_14)
8337 output_asm_insn ("addil L'%2,%%r26", xoperands);
8339 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8341 if (val_14)
8343 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8344 nbytes += 16;
8346 else
8348 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8349 nbytes += 20;
8352 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8354 /* The function is accessible from outside this module. The only
8355 way to avoid an import stub between the thunk and function is to
8356 call the function directly with an indirect sequence similar to
8357 that used by $$dyncall. This is possible because $$dyncall acts
8358 as the import stub in an indirect call. */
8359 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8360 xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8361 output_asm_insn ("addil LT'%3,%%r19", xoperands);
8362 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8363 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8364 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8365 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8366 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8367 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8369 if (!val_14)
8371 output_asm_insn ("addil L'%2,%%r26", xoperands);
8372 nbytes += 4;
8375 if (TARGET_PA_20)
8377 output_asm_insn ("bve (%%r22)", xoperands);
8378 nbytes += 36;
8380 else if (TARGET_NO_SPACE_REGS)
8382 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
8383 nbytes += 36;
8385 else
8387 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8388 output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8389 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
8390 nbytes += 44;
8393 if (val_14)
8394 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8395 else
8396 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8398 else if (flag_pic)
8400 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
8402 if (TARGET_SOM || !TARGET_GAS)
8404 output_asm_insn ("addil L'%0-%1-8,%%r1", xoperands);
8405 output_asm_insn ("ldo R'%0-%1-8(%%r1),%%r22", xoperands);
8407 else
8409 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
8410 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r22", xoperands);
8413 if (!val_14)
8414 output_asm_insn ("addil L'%2,%%r26", xoperands);
8416 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8418 if (val_14)
8420 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8421 nbytes += 20;
8423 else
8425 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8426 nbytes += 24;
8429 else
8431 if (!val_14)
8432 output_asm_insn ("addil L'%2,%%r26", xoperands);
8434 output_asm_insn ("ldil L'%0,%%r22", xoperands);
8435 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8437 if (val_14)
8439 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8440 nbytes += 12;
8442 else
8444 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8445 nbytes += 16;
8449 fprintf (file, "\t.EXIT\n\t.PROCEND\n");
8451 if (TARGET_SOM && TARGET_GAS)
8453 /* We done with this subspace except possibly for some additional
8454 debug information. Forget that we are in this subspace to ensure
8455 that the next function is output in its own subspace. */
8456 in_section = NULL;
8457 cfun->machine->in_nsubspa = 2;
8460 if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8462 switch_to_section (data_section);
8463 output_asm_insn (".align 4", xoperands);
8464 ASM_OUTPUT_LABEL (file, label);
8465 output_asm_insn (".word P'%0", xoperands);
8468 current_thunk_number++;
8469 nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
8470 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
8471 last_address += nbytes;
8472 if (old_last_address > last_address)
8473 last_address = UINT_MAX;
8474 update_total_code_bytes (nbytes);
8477 /* Only direct calls to static functions are allowed to be sibling (tail)
8478 call optimized.
8480 This restriction is necessary because some linker generated stubs will
8481 store return pointers into rp' in some cases which might clobber a
8482 live value already in rp'.
8484 In a sibcall the current function and the target function share stack
8485 space. Thus if the path to the current function and the path to the
8486 target function save a value in rp', they save the value into the
8487 same stack slot, which has undesirable consequences.
8489 Because of the deferred binding nature of shared libraries any function
8490 with external scope could be in a different load module and thus require
8491 rp' to be saved when calling that function. So sibcall optimizations
8492 can only be safe for static function.
8494 Note that GCC never needs return value relocations, so we don't have to
8495 worry about static calls with return value relocations (which require
8496 saving rp').
8498 It is safe to perform a sibcall optimization when the target function
8499 will never return. */
8500 static bool
8501 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8503 if (TARGET_PORTABLE_RUNTIME)
8504 return false;
8506 /* Sibcalls are ok for TARGET_ELF32 as along as the linker is used in
8507 single subspace mode and the call is not indirect. As far as I know,
8508 there is no operating system support for the multiple subspace mode.
8509 It might be possible to support indirect calls if we didn't use
8510 $$dyncall (see the indirect sequence generated in output_call). */
8511 if (TARGET_ELF32)
8512 return (decl != NULL_TREE);
8514 /* Sibcalls are not ok because the arg pointer register is not a fixed
8515 register. This prevents the sibcall optimization from occurring. In
8516 addition, there are problems with stub placement using GNU ld. This
8517 is because a normal sibcall branch uses a 17-bit relocation while
8518 a regular call branch uses a 22-bit relocation. As a result, more
8519 care needs to be taken in the placement of long-branch stubs. */
8520 if (TARGET_64BIT)
8521 return false;
8523 /* Sibcalls are only ok within a translation unit. */
8524 return (decl && !TREE_PUBLIC (decl));
8527 /* ??? Addition is not commutative on the PA due to the weird implicit
8528 space register selection rules for memory addresses. Therefore, we
8529 don't consider a + b == b + a, as this might be inside a MEM. */
8530 static bool
8531 pa_commutative_p (const_rtx x, int outer_code)
8533 return (COMMUTATIVE_P (x)
8534 && (TARGET_NO_SPACE_REGS
8535 || (outer_code != UNKNOWN && outer_code != MEM)
8536 || GET_CODE (x) != PLUS));
8539 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8540 use in fmpyadd instructions. */
8542 fmpyaddoperands (rtx *operands)
8544 enum machine_mode mode = GET_MODE (operands[0]);
8546 /* Must be a floating point mode. */
8547 if (mode != SFmode && mode != DFmode)
8548 return 0;
8550 /* All modes must be the same. */
8551 if (! (mode == GET_MODE (operands[1])
8552 && mode == GET_MODE (operands[2])
8553 && mode == GET_MODE (operands[3])
8554 && mode == GET_MODE (operands[4])
8555 && mode == GET_MODE (operands[5])))
8556 return 0;
8558 /* All operands must be registers. */
8559 if (! (GET_CODE (operands[1]) == REG
8560 && GET_CODE (operands[2]) == REG
8561 && GET_CODE (operands[3]) == REG
8562 && GET_CODE (operands[4]) == REG
8563 && GET_CODE (operands[5]) == REG))
8564 return 0;
8566 /* Only 2 real operands to the addition. One of the input operands must
8567 be the same as the output operand. */
8568 if (! rtx_equal_p (operands[3], operands[4])
8569 && ! rtx_equal_p (operands[3], operands[5]))
8570 return 0;
8572 /* Inout operand of add cannot conflict with any operands from multiply. */
8573 if (rtx_equal_p (operands[3], operands[0])
8574 || rtx_equal_p (operands[3], operands[1])
8575 || rtx_equal_p (operands[3], operands[2]))
8576 return 0;
8578 /* multiply cannot feed into addition operands. */
8579 if (rtx_equal_p (operands[4], operands[0])
8580 || rtx_equal_p (operands[5], operands[0]))
8581 return 0;
8583 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8584 if (mode == SFmode
8585 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8586 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8587 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8588 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8589 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8590 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8591 return 0;
8593 /* Passed. Operands are suitable for fmpyadd. */
8594 return 1;
8597 #if !defined(USE_COLLECT2)
8598 static void
8599 pa_asm_out_constructor (rtx symbol, int priority)
8601 if (!function_label_operand (symbol, VOIDmode))
8602 hppa_encode_label (symbol);
8604 #ifdef CTORS_SECTION_ASM_OP
8605 default_ctor_section_asm_out_constructor (symbol, priority);
8606 #else
8607 # ifdef TARGET_ASM_NAMED_SECTION
8608 default_named_section_asm_out_constructor (symbol, priority);
8609 # else
8610 default_stabs_asm_out_constructor (symbol, priority);
8611 # endif
8612 #endif
8615 static void
8616 pa_asm_out_destructor (rtx symbol, int priority)
8618 if (!function_label_operand (symbol, VOIDmode))
8619 hppa_encode_label (symbol);
8621 #ifdef DTORS_SECTION_ASM_OP
8622 default_dtor_section_asm_out_destructor (symbol, priority);
8623 #else
8624 # ifdef TARGET_ASM_NAMED_SECTION
8625 default_named_section_asm_out_destructor (symbol, priority);
8626 # else
8627 default_stabs_asm_out_destructor (symbol, priority);
8628 # endif
8629 #endif
8631 #endif
8633 /* This function places uninitialized global data in the bss section.
8634 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
8635 function on the SOM port to prevent uninitialized global data from
8636 being placed in the data section. */
8638 void
8639 pa_asm_output_aligned_bss (FILE *stream,
8640 const char *name,
8641 unsigned HOST_WIDE_INT size,
8642 unsigned int align)
8644 switch_to_section (bss_section);
8645 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8647 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
8648 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
8649 #endif
8651 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
8652 ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
8653 #endif
8655 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8656 ASM_OUTPUT_LABEL (stream, name);
8657 fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8660 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
8661 that doesn't allow the alignment of global common storage to be directly
8662 specified. The SOM linker aligns common storage based on the rounded
8663 value of the NUM_BYTES parameter in the .comm directive. It's not
8664 possible to use the .align directive as it doesn't affect the alignment
8665 of the label associated with a .comm directive. */
8667 void
8668 pa_asm_output_aligned_common (FILE *stream,
8669 const char *name,
8670 unsigned HOST_WIDE_INT size,
8671 unsigned int align)
8673 unsigned int max_common_align;
8675 max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
8676 if (align > max_common_align)
8678 warning (0, "alignment (%u) for %s exceeds maximum alignment "
8679 "for global common data. Using %u",
8680 align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT);
8681 align = max_common_align;
8684 switch_to_section (bss_section);
8686 assemble_name (stream, name);
8687 fprintf (stream, "\t.comm "HOST_WIDE_INT_PRINT_UNSIGNED"\n",
8688 MAX (size, align / BITS_PER_UNIT));
8691 /* We can't use .comm for local common storage as the SOM linker effectively
8692 treats the symbol as universal and uses the same storage for local symbols
8693 with the same name in different object files. The .block directive
8694 reserves an uninitialized block of storage. However, it's not common
8695 storage. Fortunately, GCC never requests common storage with the same
8696 name in any given translation unit. */
8698 void
8699 pa_asm_output_aligned_local (FILE *stream,
8700 const char *name,
8701 unsigned HOST_WIDE_INT size,
8702 unsigned int align)
8704 switch_to_section (bss_section);
8705 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8707 #ifdef LOCAL_ASM_OP
8708 fprintf (stream, "%s", LOCAL_ASM_OP);
8709 assemble_name (stream, name);
8710 fprintf (stream, "\n");
8711 #endif
8713 ASM_OUTPUT_LABEL (stream, name);
8714 fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8717 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8718 use in fmpysub instructions. */
8720 fmpysuboperands (rtx *operands)
8722 enum machine_mode mode = GET_MODE (operands[0]);
8724 /* Must be a floating point mode. */
8725 if (mode != SFmode && mode != DFmode)
8726 return 0;
8728 /* All modes must be the same. */
8729 if (! (mode == GET_MODE (operands[1])
8730 && mode == GET_MODE (operands[2])
8731 && mode == GET_MODE (operands[3])
8732 && mode == GET_MODE (operands[4])
8733 && mode == GET_MODE (operands[5])))
8734 return 0;
8736 /* All operands must be registers. */
8737 if (! (GET_CODE (operands[1]) == REG
8738 && GET_CODE (operands[2]) == REG
8739 && GET_CODE (operands[3]) == REG
8740 && GET_CODE (operands[4]) == REG
8741 && GET_CODE (operands[5]) == REG))
8742 return 0;
8744 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
8745 operation, so operands[4] must be the same as operand[3]. */
8746 if (! rtx_equal_p (operands[3], operands[4]))
8747 return 0;
8749 /* multiply cannot feed into subtraction. */
8750 if (rtx_equal_p (operands[5], operands[0]))
8751 return 0;
8753 /* Inout operand of sub cannot conflict with any operands from multiply. */
8754 if (rtx_equal_p (operands[3], operands[0])
8755 || rtx_equal_p (operands[3], operands[1])
8756 || rtx_equal_p (operands[3], operands[2]))
8757 return 0;
8759 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8760 if (mode == SFmode
8761 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8762 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8763 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8764 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8765 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8766 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8767 return 0;
8769 /* Passed. Operands are suitable for fmpysub. */
8770 return 1;
8773 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
8774 constants for shadd instructions. */
8776 shadd_constant_p (int val)
8778 if (val == 2 || val == 4 || val == 8)
8779 return 1;
8780 else
8781 return 0;
8784 /* Return 1 if OP is valid as a base or index register in a
8785 REG+REG address. */
8788 borx_reg_operand (rtx op, enum machine_mode mode)
8790 if (GET_CODE (op) != REG)
8791 return 0;
8793 /* We must reject virtual registers as the only expressions that
8794 can be instantiated are REG and REG+CONST. */
8795 if (op == virtual_incoming_args_rtx
8796 || op == virtual_stack_vars_rtx
8797 || op == virtual_stack_dynamic_rtx
8798 || op == virtual_outgoing_args_rtx
8799 || op == virtual_cfa_rtx)
8800 return 0;
8802 /* While it's always safe to index off the frame pointer, it's not
8803 profitable to do so when the frame pointer is being eliminated. */
8804 if (!reload_completed
8805 && flag_omit_frame_pointer
8806 && !cfun->calls_alloca
8807 && op == frame_pointer_rtx)
8808 return 0;
8810 return register_operand (op, mode);
8813 /* Return 1 if this operand is anything other than a hard register. */
8816 non_hard_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8818 return ! (GET_CODE (op) == REG && REGNO (op) < FIRST_PSEUDO_REGISTER);
8821 /* Return TRUE if INSN branches forward. */
8823 static bool
8824 forward_branch_p (rtx insn)
8826 rtx lab = JUMP_LABEL (insn);
8828 /* The INSN must have a jump label. */
8829 gcc_assert (lab != NULL_RTX);
8831 if (INSN_ADDRESSES_SET_P ())
8832 return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn));
8834 while (insn)
8836 if (insn == lab)
8837 return true;
8838 else
8839 insn = NEXT_INSN (insn);
8842 return false;
8845 /* Return 1 if OP is an equality comparison, else return 0. */
8847 eq_neq_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8849 return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
8852 /* Return 1 if INSN is in the delay slot of a call instruction. */
8854 jump_in_call_delay (rtx insn)
8857 if (GET_CODE (insn) != JUMP_INSN)
8858 return 0;
8860 if (PREV_INSN (insn)
8861 && PREV_INSN (PREV_INSN (insn))
8862 && GET_CODE (next_real_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
8864 rtx test_insn = next_real_insn (PREV_INSN (PREV_INSN (insn)));
8866 return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
8867 && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
8870 else
8871 return 0;
8874 /* Output an unconditional move and branch insn. */
8876 const char *
8877 output_parallel_movb (rtx *operands, rtx insn)
8879 int length = get_attr_length (insn);
8881 /* These are the cases in which we win. */
8882 if (length == 4)
8883 return "mov%I1b,tr %1,%0,%2";
8885 /* None of the following cases win, but they don't lose either. */
8886 if (length == 8)
8888 if (dbr_sequence_length () == 0)
8890 /* Nothing in the delay slot, fake it by putting the combined
8891 insn (the copy or add) in the delay slot of a bl. */
8892 if (GET_CODE (operands[1]) == CONST_INT)
8893 return "b %2\n\tldi %1,%0";
8894 else
8895 return "b %2\n\tcopy %1,%0";
8897 else
8899 /* Something in the delay slot, but we've got a long branch. */
8900 if (GET_CODE (operands[1]) == CONST_INT)
8901 return "ldi %1,%0\n\tb %2";
8902 else
8903 return "copy %1,%0\n\tb %2";
8907 if (GET_CODE (operands[1]) == CONST_INT)
8908 output_asm_insn ("ldi %1,%0", operands);
8909 else
8910 output_asm_insn ("copy %1,%0", operands);
8911 return output_lbranch (operands[2], insn, 1);
8914 /* Output an unconditional add and branch insn. */
8916 const char *
8917 output_parallel_addb (rtx *operands, rtx insn)
8919 int length = get_attr_length (insn);
8921 /* To make life easy we want operand0 to be the shared input/output
8922 operand and operand1 to be the readonly operand. */
8923 if (operands[0] == operands[1])
8924 operands[1] = operands[2];
8926 /* These are the cases in which we win. */
8927 if (length == 4)
8928 return "add%I1b,tr %1,%0,%3";
8930 /* None of the following cases win, but they don't lose either. */
8931 if (length == 8)
8933 if (dbr_sequence_length () == 0)
8934 /* Nothing in the delay slot, fake it by putting the combined
8935 insn (the copy or add) in the delay slot of a bl. */
8936 return "b %3\n\tadd%I1 %1,%0,%0";
8937 else
8938 /* Something in the delay slot, but we've got a long branch. */
8939 return "add%I1 %1,%0,%0\n\tb %3";
8942 output_asm_insn ("add%I1 %1,%0,%0", operands);
8943 return output_lbranch (operands[3], insn, 1);
8946 /* Return nonzero if INSN (a jump insn) immediately follows a call
8947 to a named function. This is used to avoid filling the delay slot
8948 of the jump since it can usually be eliminated by modifying RP in
8949 the delay slot of the call. */
8952 following_call (rtx insn)
8954 if (! TARGET_JUMP_IN_DELAY)
8955 return 0;
8957 /* Find the previous real insn, skipping NOTEs. */
8958 insn = PREV_INSN (insn);
8959 while (insn && GET_CODE (insn) == NOTE)
8960 insn = PREV_INSN (insn);
8962 /* Check for CALL_INSNs and millicode calls. */
8963 if (insn
8964 && ((GET_CODE (insn) == CALL_INSN
8965 && get_attr_type (insn) != TYPE_DYNCALL)
8966 || (GET_CODE (insn) == INSN
8967 && GET_CODE (PATTERN (insn)) != SEQUENCE
8968 && GET_CODE (PATTERN (insn)) != USE
8969 && GET_CODE (PATTERN (insn)) != CLOBBER
8970 && get_attr_type (insn) == TYPE_MILLI)))
8971 return 1;
8973 return 0;
8976 /* We use this hook to perform a PA specific optimization which is difficult
8977 to do in earlier passes.
8979 We want the delay slots of branches within jump tables to be filled.
8980 None of the compiler passes at the moment even has the notion that a
8981 PA jump table doesn't contain addresses, but instead contains actual
8982 instructions!
8984 Because we actually jump into the table, the addresses of each entry
8985 must stay constant in relation to the beginning of the table (which
8986 itself must stay constant relative to the instruction to jump into
8987 it). I don't believe we can guarantee earlier passes of the compiler
8988 will adhere to those rules.
8990 So, late in the compilation process we find all the jump tables, and
8991 expand them into real code -- e.g. each entry in the jump table vector
8992 will get an appropriate label followed by a jump to the final target.
8994 Reorg and the final jump pass can then optimize these branches and
8995 fill their delay slots. We end up with smaller, more efficient code.
8997 The jump instructions within the table are special; we must be able
8998 to identify them during assembly output (if the jumps don't get filled
8999 we need to emit a nop rather than nullifying the delay slot)). We
9000 identify jumps in switch tables by using insns with the attribute
9001 type TYPE_BTABLE_BRANCH.
9003 We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB
9004 insns. This serves two purposes, first it prevents jump.c from
9005 noticing that the last N entries in the table jump to the instruction
9006 immediately after the table and deleting the jumps. Second, those
9007 insns mark where we should emit .begin_brtab and .end_brtab directives
9008 when using GAS (allows for better link time optimizations). */
9010 static void
9011 pa_reorg (void)
9013 rtx insn;
9015 remove_useless_addtr_insns (1);
9017 if (pa_cpu < PROCESSOR_8000)
9018 pa_combine_instructions ();
9021 /* This is fairly cheap, so always run it if optimizing. */
9022 if (optimize > 0 && !TARGET_BIG_SWITCH)
9024 /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns. */
9025 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9027 rtx pattern, tmp, location, label;
9028 unsigned int length, i;
9030 /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode. */
9031 if (GET_CODE (insn) != JUMP_INSN
9032 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
9033 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
9034 continue;
9036 /* Emit marker for the beginning of the branch table. */
9037 emit_insn_before (gen_begin_brtab (), insn);
9039 pattern = PATTERN (insn);
9040 location = PREV_INSN (insn);
9041 length = XVECLEN (pattern, GET_CODE (pattern) == ADDR_DIFF_VEC);
9043 for (i = 0; i < length; i++)
9045 /* Emit a label before each jump to keep jump.c from
9046 removing this code. */
9047 tmp = gen_label_rtx ();
9048 LABEL_NUSES (tmp) = 1;
9049 emit_label_after (tmp, location);
9050 location = NEXT_INSN (location);
9052 if (GET_CODE (pattern) == ADDR_VEC)
9053 label = XEXP (XVECEXP (pattern, 0, i), 0);
9054 else
9055 label = XEXP (XVECEXP (pattern, 1, i), 0);
9057 tmp = gen_short_jump (label);
9059 /* Emit the jump itself. */
9060 tmp = emit_jump_insn_after (tmp, location);
9061 JUMP_LABEL (tmp) = label;
9062 LABEL_NUSES (label)++;
9063 location = NEXT_INSN (location);
9065 /* Emit a BARRIER after the jump. */
9066 emit_barrier_after (location);
9067 location = NEXT_INSN (location);
9070 /* Emit marker for the end of the branch table. */
9071 emit_insn_before (gen_end_brtab (), location);
9072 location = NEXT_INSN (location);
9073 emit_barrier_after (location);
9075 /* Delete the ADDR_VEC or ADDR_DIFF_VEC. */
9076 delete_insn (insn);
9079 else
9081 /* Still need brtab marker insns. FIXME: the presence of these
9082 markers disables output of the branch table to readonly memory,
9083 and any alignment directives that might be needed. Possibly,
9084 the begin_brtab insn should be output before the label for the
9085 table. This doesn't matter at the moment since the tables are
9086 always output in the text section. */
9087 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9089 /* Find an ADDR_VEC insn. */
9090 if (GET_CODE (insn) != JUMP_INSN
9091 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
9092 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
9093 continue;
9095 /* Now generate markers for the beginning and end of the
9096 branch table. */
9097 emit_insn_before (gen_begin_brtab (), insn);
9098 emit_insn_after (gen_end_brtab (), insn);
9103 /* The PA has a number of odd instructions which can perform multiple
9104 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
9105 it may be profitable to combine two instructions into one instruction
9106 with two outputs. It's not profitable PA2.0 machines because the
9107 two outputs would take two slots in the reorder buffers.
9109 This routine finds instructions which can be combined and combines
9110 them. We only support some of the potential combinations, and we
9111 only try common ways to find suitable instructions.
9113 * addb can add two registers or a register and a small integer
9114 and jump to a nearby (+-8k) location. Normally the jump to the
9115 nearby location is conditional on the result of the add, but by
9116 using the "true" condition we can make the jump unconditional.
9117 Thus addb can perform two independent operations in one insn.
9119 * movb is similar to addb in that it can perform a reg->reg
9120 or small immediate->reg copy and jump to a nearby (+-8k location).
9122 * fmpyadd and fmpysub can perform a FP multiply and either an
9123 FP add or FP sub if the operands of the multiply and add/sub are
9124 independent (there are other minor restrictions). Note both
9125 the fmpy and fadd/fsub can in theory move to better spots according
9126 to data dependencies, but for now we require the fmpy stay at a
9127 fixed location.
9129 * Many of the memory operations can perform pre & post updates
9130 of index registers. GCC's pre/post increment/decrement addressing
9131 is far too simple to take advantage of all the possibilities. This
9132 pass may not be suitable since those insns may not be independent.
9134 * comclr can compare two ints or an int and a register, nullify
9135 the following instruction and zero some other register. This
9136 is more difficult to use as it's harder to find an insn which
9137 will generate a comclr than finding something like an unconditional
9138 branch. (conditional moves & long branches create comclr insns).
9140 * Most arithmetic operations can conditionally skip the next
9141 instruction. They can be viewed as "perform this operation
9142 and conditionally jump to this nearby location" (where nearby
9143 is an insns away). These are difficult to use due to the
9144 branch length restrictions. */
9146 static void
9147 pa_combine_instructions (void)
9149 rtx anchor, new_rtx;
9151 /* This can get expensive since the basic algorithm is on the
9152 order of O(n^2) (or worse). Only do it for -O2 or higher
9153 levels of optimization. */
9154 if (optimize < 2)
9155 return;
9157 /* Walk down the list of insns looking for "anchor" insns which
9158 may be combined with "floating" insns. As the name implies,
9159 "anchor" instructions don't move, while "floating" insns may
9160 move around. */
9161 new_rtx = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
9162 new_rtx = make_insn_raw (new_rtx);
9164 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
9166 enum attr_pa_combine_type anchor_attr;
9167 enum attr_pa_combine_type floater_attr;
9169 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
9170 Also ignore any special USE insns. */
9171 if ((GET_CODE (anchor) != INSN
9172 && GET_CODE (anchor) != JUMP_INSN
9173 && GET_CODE (anchor) != CALL_INSN)
9174 || GET_CODE (PATTERN (anchor)) == USE
9175 || GET_CODE (PATTERN (anchor)) == CLOBBER
9176 || GET_CODE (PATTERN (anchor)) == ADDR_VEC
9177 || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC)
9178 continue;
9180 anchor_attr = get_attr_pa_combine_type (anchor);
9181 /* See if anchor is an insn suitable for combination. */
9182 if (anchor_attr == PA_COMBINE_TYPE_FMPY
9183 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
9184 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9185 && ! forward_branch_p (anchor)))
9187 rtx floater;
9189 for (floater = PREV_INSN (anchor);
9190 floater;
9191 floater = PREV_INSN (floater))
9193 if (GET_CODE (floater) == NOTE
9194 || (GET_CODE (floater) == INSN
9195 && (GET_CODE (PATTERN (floater)) == USE
9196 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9197 continue;
9199 /* Anything except a regular INSN will stop our search. */
9200 if (GET_CODE (floater) != INSN
9201 || GET_CODE (PATTERN (floater)) == ADDR_VEC
9202 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
9204 floater = NULL_RTX;
9205 break;
9208 /* See if FLOATER is suitable for combination with the
9209 anchor. */
9210 floater_attr = get_attr_pa_combine_type (floater);
9211 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9212 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9213 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9214 && floater_attr == PA_COMBINE_TYPE_FMPY))
9216 /* If ANCHOR and FLOATER can be combined, then we're
9217 done with this pass. */
9218 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9219 SET_DEST (PATTERN (floater)),
9220 XEXP (SET_SRC (PATTERN (floater)), 0),
9221 XEXP (SET_SRC (PATTERN (floater)), 1)))
9222 break;
9225 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9226 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
9228 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
9230 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9231 SET_DEST (PATTERN (floater)),
9232 XEXP (SET_SRC (PATTERN (floater)), 0),
9233 XEXP (SET_SRC (PATTERN (floater)), 1)))
9234 break;
9236 else
9238 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9239 SET_DEST (PATTERN (floater)),
9240 SET_SRC (PATTERN (floater)),
9241 SET_SRC (PATTERN (floater))))
9242 break;
9247 /* If we didn't find anything on the backwards scan try forwards. */
9248 if (!floater
9249 && (anchor_attr == PA_COMBINE_TYPE_FMPY
9250 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
9252 for (floater = anchor; floater; floater = NEXT_INSN (floater))
9254 if (GET_CODE (floater) == NOTE
9255 || (GET_CODE (floater) == INSN
9256 && (GET_CODE (PATTERN (floater)) == USE
9257 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9259 continue;
9261 /* Anything except a regular INSN will stop our search. */
9262 if (GET_CODE (floater) != INSN
9263 || GET_CODE (PATTERN (floater)) == ADDR_VEC
9264 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
9266 floater = NULL_RTX;
9267 break;
9270 /* See if FLOATER is suitable for combination with the
9271 anchor. */
9272 floater_attr = get_attr_pa_combine_type (floater);
9273 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9274 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9275 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9276 && floater_attr == PA_COMBINE_TYPE_FMPY))
9278 /* If ANCHOR and FLOATER can be combined, then we're
9279 done with this pass. */
9280 if (pa_can_combine_p (new_rtx, anchor, floater, 1,
9281 SET_DEST (PATTERN (floater)),
9282 XEXP (SET_SRC (PATTERN (floater)),
9284 XEXP (SET_SRC (PATTERN (floater)),
9285 1)))
9286 break;
9291 /* FLOATER will be nonzero if we found a suitable floating
9292 insn for combination with ANCHOR. */
9293 if (floater
9294 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9295 || anchor_attr == PA_COMBINE_TYPE_FMPY))
9297 /* Emit the new instruction and delete the old anchor. */
9298 emit_insn_before (gen_rtx_PARALLEL
9299 (VOIDmode,
9300 gen_rtvec (2, PATTERN (anchor),
9301 PATTERN (floater))),
9302 anchor);
9304 SET_INSN_DELETED (anchor);
9306 /* Emit a special USE insn for FLOATER, then delete
9307 the floating insn. */
9308 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
9309 delete_insn (floater);
9311 continue;
9313 else if (floater
9314 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
9316 rtx temp;
9317 /* Emit the new_jump instruction and delete the old anchor. */
9318 temp
9319 = emit_jump_insn_before (gen_rtx_PARALLEL
9320 (VOIDmode,
9321 gen_rtvec (2, PATTERN (anchor),
9322 PATTERN (floater))),
9323 anchor);
9325 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
9326 SET_INSN_DELETED (anchor);
9328 /* Emit a special USE insn for FLOATER, then delete
9329 the floating insn. */
9330 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
9331 delete_insn (floater);
9332 continue;
9338 static int
9339 pa_can_combine_p (rtx new_rtx, rtx anchor, rtx floater, int reversed, rtx dest,
9340 rtx src1, rtx src2)
9342 int insn_code_number;
9343 rtx start, end;
9345 /* Create a PARALLEL with the patterns of ANCHOR and
9346 FLOATER, try to recognize it, then test constraints
9347 for the resulting pattern.
9349 If the pattern doesn't match or the constraints
9350 aren't met keep searching for a suitable floater
9351 insn. */
9352 XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor);
9353 XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater);
9354 INSN_CODE (new_rtx) = -1;
9355 insn_code_number = recog_memoized (new_rtx);
9356 if (insn_code_number < 0
9357 || (extract_insn (new_rtx), ! constrain_operands (1)))
9358 return 0;
9360 if (reversed)
9362 start = anchor;
9363 end = floater;
9365 else
9367 start = floater;
9368 end = anchor;
9371 /* There's up to three operands to consider. One
9372 output and two inputs.
9374 The output must not be used between FLOATER & ANCHOR
9375 exclusive. The inputs must not be set between
9376 FLOATER and ANCHOR exclusive. */
9378 if (reg_used_between_p (dest, start, end))
9379 return 0;
9381 if (reg_set_between_p (src1, start, end))
9382 return 0;
9384 if (reg_set_between_p (src2, start, end))
9385 return 0;
9387 /* If we get here, then everything is good. */
9388 return 1;
9391 /* Return nonzero if references for INSN are delayed.
9393 Millicode insns are actually function calls with some special
9394 constraints on arguments and register usage.
9396 Millicode calls always expect their arguments in the integer argument
9397 registers, and always return their result in %r29 (ret1). They
9398 are expected to clobber their arguments, %r1, %r29, and the return
9399 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9401 This function tells reorg that the references to arguments and
9402 millicode calls do not appear to happen until after the millicode call.
9403 This allows reorg to put insns which set the argument registers into the
9404 delay slot of the millicode call -- thus they act more like traditional
9405 CALL_INSNs.
9407 Note we cannot consider side effects of the insn to be delayed because
9408 the branch and link insn will clobber the return pointer. If we happened
9409 to use the return pointer in the delay slot of the call, then we lose.
9411 get_attr_type will try to recognize the given insn, so make sure to
9412 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9413 in particular. */
9415 insn_refs_are_delayed (rtx insn)
9417 return ((GET_CODE (insn) == INSN
9418 && GET_CODE (PATTERN (insn)) != SEQUENCE
9419 && GET_CODE (PATTERN (insn)) != USE
9420 && GET_CODE (PATTERN (insn)) != CLOBBER
9421 && get_attr_type (insn) == TYPE_MILLI));
9424 /* Promote the return value, but not the arguments. */
9426 static enum machine_mode
9427 pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9428 enum machine_mode mode,
9429 int *punsignedp ATTRIBUTE_UNUSED,
9430 const_tree fntype ATTRIBUTE_UNUSED,
9431 int for_return)
9433 if (for_return == 0)
9434 return mode;
9435 return promote_mode (type, mode, punsignedp);
9438 /* On the HP-PA the value is found in register(s) 28(-29), unless
9439 the mode is SF or DF. Then the value is returned in fr4 (32).
9441 This must perform the same promotions as PROMOTE_MODE, else promoting
9442 return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
9444 Small structures must be returned in a PARALLEL on PA64 in order
9445 to match the HP Compiler ABI. */
9447 static rtx
9448 pa_function_value (const_tree valtype,
9449 const_tree func ATTRIBUTE_UNUSED,
9450 bool outgoing ATTRIBUTE_UNUSED)
9452 enum machine_mode valmode;
9454 if (AGGREGATE_TYPE_P (valtype)
9455 || TREE_CODE (valtype) == COMPLEX_TYPE
9456 || TREE_CODE (valtype) == VECTOR_TYPE)
9458 if (TARGET_64BIT)
9460 /* Aggregates with a size less than or equal to 128 bits are
9461 returned in GR 28(-29). They are left justified. The pad
9462 bits are undefined. Larger aggregates are returned in
9463 memory. */
9464 rtx loc[2];
9465 int i, offset = 0;
9466 int ub = int_size_in_bytes (valtype) <= UNITS_PER_WORD ? 1 : 2;
9468 for (i = 0; i < ub; i++)
9470 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9471 gen_rtx_REG (DImode, 28 + i),
9472 GEN_INT (offset));
9473 offset += 8;
9476 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
9478 else if (int_size_in_bytes (valtype) > UNITS_PER_WORD)
9480 /* Aggregates 5 to 8 bytes in size are returned in general
9481 registers r28-r29 in the same manner as other non
9482 floating-point objects. The data is right-justified and
9483 zero-extended to 64 bits. This is opposite to the normal
9484 justification used on big endian targets and requires
9485 special treatment. */
9486 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9487 gen_rtx_REG (DImode, 28), const0_rtx);
9488 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9492 if ((INTEGRAL_TYPE_P (valtype)
9493 && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD)
9494 || POINTER_TYPE_P (valtype))
9495 valmode = word_mode;
9496 else
9497 valmode = TYPE_MODE (valtype);
9499 if (TREE_CODE (valtype) == REAL_TYPE
9500 && !AGGREGATE_TYPE_P (valtype)
9501 && TYPE_MODE (valtype) != TFmode
9502 && !TARGET_SOFT_FLOAT)
9503 return gen_rtx_REG (valmode, 32);
9505 return gen_rtx_REG (valmode, 28);
9508 /* Implement the TARGET_LIBCALL_VALUE hook. */
9510 static rtx
9511 pa_libcall_value (enum machine_mode mode,
9512 const_rtx fun ATTRIBUTE_UNUSED)
9514 if (! TARGET_SOFT_FLOAT
9515 && (mode == SFmode || mode == DFmode))
9516 return gen_rtx_REG (mode, 32);
9517 else
9518 return gen_rtx_REG (mode, 28);
9521 /* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook. */
9523 static bool
9524 pa_function_value_regno_p (const unsigned int regno)
9526 if (regno == 28
9527 || (! TARGET_SOFT_FLOAT && regno == 32))
9528 return true;
9530 return false;
9533 /* Update the data in CUM to advance over an argument
9534 of mode MODE and data type TYPE.
9535 (TYPE is null for libcalls where that information may not be available.) */
9537 static void
9538 pa_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
9539 const_tree type, bool named ATTRIBUTE_UNUSED)
9541 int arg_size = FUNCTION_ARG_SIZE (mode, type);
9543 cum->nargs_prototype--;
9544 cum->words += (arg_size
9545 + ((cum->words & 01)
9546 && type != NULL_TREE
9547 && arg_size > 1));
9550 /* Return the location of a parameter that is passed in a register or NULL
9551 if the parameter has any component that is passed in memory.
9553 This is new code and will be pushed to into the net sources after
9554 further testing.
9556 ??? We might want to restructure this so that it looks more like other
9557 ports. */
9558 static rtx
9559 pa_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode,
9560 const_tree type, bool named ATTRIBUTE_UNUSED)
9562 int max_arg_words = (TARGET_64BIT ? 8 : 4);
9563 int alignment = 0;
9564 int arg_size;
9565 int fpr_reg_base;
9566 int gpr_reg_base;
9567 rtx retval;
9569 if (mode == VOIDmode)
9570 return NULL_RTX;
9572 arg_size = FUNCTION_ARG_SIZE (mode, type);
9574 /* If this arg would be passed partially or totally on the stack, then
9575 this routine should return zero. pa_arg_partial_bytes will
9576 handle arguments which are split between regs and stack slots if
9577 the ABI mandates split arguments. */
9578 if (!TARGET_64BIT)
9580 /* The 32-bit ABI does not split arguments. */
9581 if (cum->words + arg_size > max_arg_words)
9582 return NULL_RTX;
9584 else
9586 if (arg_size > 1)
9587 alignment = cum->words & 1;
9588 if (cum->words + alignment >= max_arg_words)
9589 return NULL_RTX;
9592 /* The 32bit ABIs and the 64bit ABIs are rather different,
9593 particularly in their handling of FP registers. We might
9594 be able to cleverly share code between them, but I'm not
9595 going to bother in the hope that splitting them up results
9596 in code that is more easily understood. */
9598 if (TARGET_64BIT)
9600 /* Advance the base registers to their current locations.
9602 Remember, gprs grow towards smaller register numbers while
9603 fprs grow to higher register numbers. Also remember that
9604 although FP regs are 32-bit addressable, we pretend that
9605 the registers are 64-bits wide. */
9606 gpr_reg_base = 26 - cum->words;
9607 fpr_reg_base = 32 + cum->words;
9609 /* Arguments wider than one word and small aggregates need special
9610 treatment. */
9611 if (arg_size > 1
9612 || mode == BLKmode
9613 || (type && (AGGREGATE_TYPE_P (type)
9614 || TREE_CODE (type) == COMPLEX_TYPE
9615 || TREE_CODE (type) == VECTOR_TYPE)))
9617 /* Double-extended precision (80-bit), quad-precision (128-bit)
9618 and aggregates including complex numbers are aligned on
9619 128-bit boundaries. The first eight 64-bit argument slots
9620 are associated one-to-one, with general registers r26
9621 through r19, and also with floating-point registers fr4
9622 through fr11. Arguments larger than one word are always
9623 passed in general registers.
9625 Using a PARALLEL with a word mode register results in left
9626 justified data on a big-endian target. */
9628 rtx loc[8];
9629 int i, offset = 0, ub = arg_size;
9631 /* Align the base register. */
9632 gpr_reg_base -= alignment;
9634 ub = MIN (ub, max_arg_words - cum->words - alignment);
9635 for (i = 0; i < ub; i++)
9637 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9638 gen_rtx_REG (DImode, gpr_reg_base),
9639 GEN_INT (offset));
9640 gpr_reg_base -= 1;
9641 offset += 8;
9644 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
9647 else
9649 /* If the argument is larger than a word, then we know precisely
9650 which registers we must use. */
9651 if (arg_size > 1)
9653 if (cum->words)
9655 gpr_reg_base = 23;
9656 fpr_reg_base = 38;
9658 else
9660 gpr_reg_base = 25;
9661 fpr_reg_base = 34;
9664 /* Structures 5 to 8 bytes in size are passed in the general
9665 registers in the same manner as other non floating-point
9666 objects. The data is right-justified and zero-extended
9667 to 64 bits. This is opposite to the normal justification
9668 used on big endian targets and requires special treatment.
9669 We now define BLOCK_REG_PADDING to pad these objects.
9670 Aggregates, complex and vector types are passed in the same
9671 manner as structures. */
9672 if (mode == BLKmode
9673 || (type && (AGGREGATE_TYPE_P (type)
9674 || TREE_CODE (type) == COMPLEX_TYPE
9675 || TREE_CODE (type) == VECTOR_TYPE)))
9677 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9678 gen_rtx_REG (DImode, gpr_reg_base),
9679 const0_rtx);
9680 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9683 else
9685 /* We have a single word (32 bits). A simple computation
9686 will get us the register #s we need. */
9687 gpr_reg_base = 26 - cum->words;
9688 fpr_reg_base = 32 + 2 * cum->words;
9692 /* Determine if the argument needs to be passed in both general and
9693 floating point registers. */
9694 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
9695 /* If we are doing soft-float with portable runtime, then there
9696 is no need to worry about FP regs. */
9697 && !TARGET_SOFT_FLOAT
9698 /* The parameter must be some kind of scalar float, else we just
9699 pass it in integer registers. */
9700 && GET_MODE_CLASS (mode) == MODE_FLOAT
9701 /* The target function must not have a prototype. */
9702 && cum->nargs_prototype <= 0
9703 /* libcalls do not need to pass items in both FP and general
9704 registers. */
9705 && type != NULL_TREE
9706 /* All this hair applies to "outgoing" args only. This includes
9707 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
9708 && !cum->incoming)
9709 /* Also pass outgoing floating arguments in both registers in indirect
9710 calls with the 32 bit ABI and the HP assembler since there is no
9711 way to the specify argument locations in static functions. */
9712 || (!TARGET_64BIT
9713 && !TARGET_GAS
9714 && !cum->incoming
9715 && cum->indirect
9716 && GET_MODE_CLASS (mode) == MODE_FLOAT))
9718 retval
9719 = gen_rtx_PARALLEL
9720 (mode,
9721 gen_rtvec (2,
9722 gen_rtx_EXPR_LIST (VOIDmode,
9723 gen_rtx_REG (mode, fpr_reg_base),
9724 const0_rtx),
9725 gen_rtx_EXPR_LIST (VOIDmode,
9726 gen_rtx_REG (mode, gpr_reg_base),
9727 const0_rtx)));
9729 else
9731 /* See if we should pass this parameter in a general register. */
9732 if (TARGET_SOFT_FLOAT
9733 /* Indirect calls in the normal 32bit ABI require all arguments
9734 to be passed in general registers. */
9735 || (!TARGET_PORTABLE_RUNTIME
9736 && !TARGET_64BIT
9737 && !TARGET_ELF32
9738 && cum->indirect)
9739 /* If the parameter is not a scalar floating-point parameter,
9740 then it belongs in GPRs. */
9741 || GET_MODE_CLASS (mode) != MODE_FLOAT
9742 /* Structure with single SFmode field belongs in GPR. */
9743 || (type && AGGREGATE_TYPE_P (type)))
9744 retval = gen_rtx_REG (mode, gpr_reg_base);
9745 else
9746 retval = gen_rtx_REG (mode, fpr_reg_base);
9748 return retval;
9751 /* Arguments larger than one word are double word aligned. */
9753 static unsigned int
9754 pa_function_arg_boundary (enum machine_mode mode, const_tree type)
9756 bool singleword = (type
9757 ? (integer_zerop (TYPE_SIZE (type))
9758 || !TREE_CONSTANT (TYPE_SIZE (type))
9759 || int_size_in_bytes (type) <= UNITS_PER_WORD)
9760 : GET_MODE_SIZE (mode) <= UNITS_PER_WORD);
9762 return singleword ? PARM_BOUNDARY : MAX_PARM_BOUNDARY;
9765 /* If this arg would be passed totally in registers or totally on the stack,
9766 then this routine should return zero. */
9768 static int
9769 pa_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
9770 tree type, bool named ATTRIBUTE_UNUSED)
9772 unsigned int max_arg_words = 8;
9773 unsigned int offset = 0;
9775 if (!TARGET_64BIT)
9776 return 0;
9778 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
9779 offset = 1;
9781 if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
9782 /* Arg fits fully into registers. */
9783 return 0;
9784 else if (cum->words + offset >= max_arg_words)
9785 /* Arg fully on the stack. */
9786 return 0;
9787 else
9788 /* Arg is split. */
9789 return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
9793 /* A get_unnamed_section callback for switching to the text section.
9795 This function is only used with SOM. Because we don't support
9796 named subspaces, we can only create a new subspace or switch back
9797 to the default text subspace. */
9799 static void
9800 som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED)
9802 gcc_assert (TARGET_SOM);
9803 if (TARGET_GAS)
9805 if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
9807 /* We only want to emit a .nsubspa directive once at the
9808 start of the function. */
9809 cfun->machine->in_nsubspa = 1;
9811 /* Create a new subspace for the text. This provides
9812 better stub placement and one-only functions. */
9813 if (cfun->decl
9814 && DECL_ONE_ONLY (cfun->decl)
9815 && !DECL_WEAK (cfun->decl))
9817 output_section_asm_op ("\t.SPACE $TEXT$\n"
9818 "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
9819 "ACCESS=44,SORT=24,COMDAT");
9820 return;
9823 else
9825 /* There isn't a current function or the body of the current
9826 function has been completed. So, we are changing to the
9827 text section to output debugging information. Thus, we
9828 need to forget that we are in the text section so that
9829 varasm.c will call us when text_section is selected again. */
9830 gcc_assert (!cfun || !cfun->machine
9831 || cfun->machine->in_nsubspa == 2);
9832 in_section = NULL;
9834 output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
9835 return;
9837 output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
9840 /* A get_unnamed_section callback for switching to comdat data
9841 sections. This function is only used with SOM. */
9843 static void
9844 som_output_comdat_data_section_asm_op (const void *data)
9846 in_section = NULL;
9847 output_section_asm_op (data);
9850 /* Implement TARGET_ASM_INITIALIZE_SECTIONS */
9852 static void
9853 pa_som_asm_init_sections (void)
9855 text_section
9856 = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
9858 /* SOM puts readonly data in the default $LIT$ subspace when PIC code
9859 is not being generated. */
9860 som_readonly_data_section
9861 = get_unnamed_section (0, output_section_asm_op,
9862 "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
9864 /* When secondary definitions are not supported, SOM makes readonly
9865 data one-only by creating a new $LIT$ subspace in $TEXT$ with
9866 the comdat flag. */
9867 som_one_only_readonly_data_section
9868 = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
9869 "\t.SPACE $TEXT$\n"
9870 "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
9871 "ACCESS=0x2c,SORT=16,COMDAT");
9874 /* When secondary definitions are not supported, SOM makes data one-only
9875 by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */
9876 som_one_only_data_section
9877 = get_unnamed_section (SECTION_WRITE,
9878 som_output_comdat_data_section_asm_op,
9879 "\t.SPACE $PRIVATE$\n"
9880 "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
9881 "ACCESS=31,SORT=24,COMDAT");
9883 /* FIXME: HPUX ld generates incorrect GOT entries for "T" fixups
9884 which reference data within the $TEXT$ space (for example constant
9885 strings in the $LIT$ subspace).
9887 The assemblers (GAS and HP as) both have problems with handling
9888 the difference of two symbols which is the other correct way to
9889 reference constant data during PIC code generation.
9891 So, there's no way to reference constant data which is in the
9892 $TEXT$ space during PIC generation. Instead place all constant
9893 data into the $PRIVATE$ subspace (this reduces sharing, but it
9894 works correctly). */
9895 readonly_data_section = flag_pic ? data_section : som_readonly_data_section;
9897 /* We must not have a reference to an external symbol defined in a
9898 shared library in a readonly section, else the SOM linker will
9899 complain.
9901 So, we force exception information into the data section. */
9902 exception_section = data_section;
9905 /* On hpux10, the linker will give an error if we have a reference
9906 in the read-only data section to a symbol defined in a shared
9907 library. Therefore, expressions that might require a reloc can
9908 not be placed in the read-only data section. */
9910 static section *
9911 pa_select_section (tree exp, int reloc,
9912 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
9914 if (TREE_CODE (exp) == VAR_DECL
9915 && TREE_READONLY (exp)
9916 && !TREE_THIS_VOLATILE (exp)
9917 && DECL_INITIAL (exp)
9918 && (DECL_INITIAL (exp) == error_mark_node
9919 || TREE_CONSTANT (DECL_INITIAL (exp)))
9920 && !reloc)
9922 if (TARGET_SOM
9923 && DECL_ONE_ONLY (exp)
9924 && !DECL_WEAK (exp))
9925 return som_one_only_readonly_data_section;
9926 else
9927 return readonly_data_section;
9929 else if (CONSTANT_CLASS_P (exp) && !reloc)
9930 return readonly_data_section;
9931 else if (TARGET_SOM
9932 && TREE_CODE (exp) == VAR_DECL
9933 && DECL_ONE_ONLY (exp)
9934 && !DECL_WEAK (exp))
9935 return som_one_only_data_section;
9936 else
9937 return data_section;
9940 static void
9941 pa_globalize_label (FILE *stream, const char *name)
9943 /* We only handle DATA objects here, functions are globalized in
9944 ASM_DECLARE_FUNCTION_NAME. */
9945 if (! FUNCTION_NAME_P (name))
9947 fputs ("\t.EXPORT ", stream);
9948 assemble_name (stream, name);
9949 fputs (",DATA\n", stream);
9953 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
9955 static rtx
9956 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
9957 int incoming ATTRIBUTE_UNUSED)
9959 return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
9962 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9964 bool
9965 pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9967 /* SOM ABI says that objects larger than 64 bits are returned in memory.
9968 PA64 ABI says that objects larger than 128 bits are returned in memory.
9969 Note, int_size_in_bytes can return -1 if the size of the object is
9970 variable or larger than the maximum value that can be expressed as
9971 a HOST_WIDE_INT. It can also return zero for an empty type. The
9972 simplest way to handle variable and empty types is to pass them in
9973 memory. This avoids problems in defining the boundaries of argument
9974 slots, allocating registers, etc. */
9975 return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
9976 || int_size_in_bytes (type) <= 0);
9979 /* Structure to hold declaration and name of external symbols that are
9980 emitted by GCC. We generate a vector of these symbols and output them
9981 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
9982 This avoids putting out names that are never really used. */
9984 typedef struct GTY(()) extern_symbol
9986 tree decl;
9987 const char *name;
9988 } extern_symbol;
9990 /* Define gc'd vector type for extern_symbol. */
9991 DEF_VEC_O(extern_symbol);
9992 DEF_VEC_ALLOC_O(extern_symbol,gc);
9994 /* Vector of extern_symbol pointers. */
9995 static GTY(()) VEC(extern_symbol,gc) *extern_symbols;
9997 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9998 /* Mark DECL (name NAME) as an external reference (assembler output
9999 file FILE). This saves the names to output at the end of the file
10000 if actually referenced. */
10002 void
10003 pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
10005 extern_symbol * p = VEC_safe_push (extern_symbol, gc, extern_symbols, NULL);
10007 gcc_assert (file == asm_out_file);
10008 p->decl = decl;
10009 p->name = name;
10012 /* Output text required at the end of an assembler file.
10013 This includes deferred plabels and .import directives for
10014 all external symbols that were actually referenced. */
10016 static void
10017 pa_hpux_file_end (void)
10019 unsigned int i;
10020 extern_symbol *p;
10022 if (!NO_DEFERRED_PROFILE_COUNTERS)
10023 output_deferred_profile_counters ();
10025 output_deferred_plabels ();
10027 for (i = 0; VEC_iterate (extern_symbol, extern_symbols, i, p); i++)
10029 tree decl = p->decl;
10031 if (!TREE_ASM_WRITTEN (decl)
10032 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
10033 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
10036 VEC_free (extern_symbol, gc, extern_symbols);
10038 #endif
10040 /* Return true if a change from mode FROM to mode TO for a register
10041 in register class RCLASS is invalid. */
10043 bool
10044 pa_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
10045 enum reg_class rclass)
10047 if (from == to)
10048 return false;
10050 /* Reject changes to/from complex and vector modes. */
10051 if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from)
10052 || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to))
10053 return true;
10055 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to))
10056 return false;
10058 /* There is no way to load QImode or HImode values directly from
10059 memory. SImode loads to the FP registers are not zero extended.
10060 On the 64-bit target, this conflicts with the definition of
10061 LOAD_EXTEND_OP. Thus, we can't allow changing between modes
10062 with different sizes in the floating-point registers. */
10063 if (MAYBE_FP_REG_CLASS_P (rclass))
10064 return true;
10066 /* HARD_REGNO_MODE_OK places modes with sizes larger than a word
10067 in specific sets of registers. Thus, we cannot allow changing
10068 to a larger mode when it's larger than a word. */
10069 if (GET_MODE_SIZE (to) > UNITS_PER_WORD
10070 && GET_MODE_SIZE (to) > GET_MODE_SIZE (from))
10071 return true;
10073 return false;
10076 /* Returns TRUE if it is a good idea to tie two pseudo registers
10077 when one has mode MODE1 and one has mode MODE2.
10078 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
10079 for any hard reg, then this must be FALSE for correct output.
10081 We should return FALSE for QImode and HImode because these modes
10082 are not ok in the floating-point registers. However, this prevents
10083 tieing these modes to SImode and DImode in the general registers.
10084 So, this isn't a good idea. We rely on HARD_REGNO_MODE_OK and
10085 CANNOT_CHANGE_MODE_CLASS to prevent these modes from being used
10086 in the floating-point registers. */
10088 bool
10089 pa_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
10091 /* Don't tie modes in different classes. */
10092 if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2))
10093 return false;
10095 return true;
10099 /* Length in units of the trampoline instruction code. */
10101 #define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 32 : 40))
10104 /* Output assembler code for a block containing the constant parts
10105 of a trampoline, leaving space for the variable parts.\
10107 The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
10108 and then branches to the specified routine.
10110 This code template is copied from text segment to stack location
10111 and then patched with pa_trampoline_init to contain valid values,
10112 and then entered as a subroutine.
10114 It is best to keep this as small as possible to avoid having to
10115 flush multiple lines in the cache. */
10117 static void
10118 pa_asm_trampoline_template (FILE *f)
10120 if (!TARGET_64BIT)
10122 fputs ("\tldw 36(%r22),%r21\n", f);
10123 fputs ("\tbb,>=,n %r21,30,.+16\n", f);
10124 if (ASSEMBLER_DIALECT == 0)
10125 fputs ("\tdepi 0,31,2,%r21\n", f);
10126 else
10127 fputs ("\tdepwi 0,31,2,%r21\n", f);
10128 fputs ("\tldw 4(%r21),%r19\n", f);
10129 fputs ("\tldw 0(%r21),%r21\n", f);
10130 if (TARGET_PA_20)
10132 fputs ("\tbve (%r21)\n", f);
10133 fputs ("\tldw 40(%r22),%r29\n", f);
10134 fputs ("\t.word 0\n", f);
10135 fputs ("\t.word 0\n", f);
10137 else
10139 fputs ("\tldsid (%r21),%r1\n", f);
10140 fputs ("\tmtsp %r1,%sr0\n", f);
10141 fputs ("\tbe 0(%sr0,%r21)\n", f);
10142 fputs ("\tldw 40(%r22),%r29\n", f);
10144 fputs ("\t.word 0\n", f);
10145 fputs ("\t.word 0\n", f);
10146 fputs ("\t.word 0\n", f);
10147 fputs ("\t.word 0\n", f);
10149 else
10151 fputs ("\t.dword 0\n", f);
10152 fputs ("\t.dword 0\n", f);
10153 fputs ("\t.dword 0\n", f);
10154 fputs ("\t.dword 0\n", f);
10155 fputs ("\tmfia %r31\n", f);
10156 fputs ("\tldd 24(%r31),%r1\n", f);
10157 fputs ("\tldd 24(%r1),%r27\n", f);
10158 fputs ("\tldd 16(%r1),%r1\n", f);
10159 fputs ("\tbve (%r1)\n", f);
10160 fputs ("\tldd 32(%r31),%r31\n", f);
10161 fputs ("\t.dword 0 ; fptr\n", f);
10162 fputs ("\t.dword 0 ; static link\n", f);
10166 /* Emit RTL insns to initialize the variable parts of a trampoline.
10167 FNADDR is an RTX for the address of the function's pure code.
10168 CXT is an RTX for the static chain value for the function.
10170 Move the function address to the trampoline template at offset 36.
10171 Move the static chain value to trampoline template at offset 40.
10172 Move the trampoline address to trampoline template at offset 44.
10173 Move r19 to trampoline template at offset 48. The latter two
10174 words create a plabel for the indirect call to the trampoline.
10176 A similar sequence is used for the 64-bit port but the plabel is
10177 at the beginning of the trampoline.
10179 Finally, the cache entries for the trampoline code are flushed.
10180 This is necessary to ensure that the trampoline instruction sequence
10181 is written to memory prior to any attempts at prefetching the code
10182 sequence. */
10184 static void
10185 pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
10187 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10188 rtx start_addr = gen_reg_rtx (Pmode);
10189 rtx end_addr = gen_reg_rtx (Pmode);
10190 rtx line_length = gen_reg_rtx (Pmode);
10191 rtx r_tramp, tmp;
10193 emit_block_move (m_tramp, assemble_trampoline_template (),
10194 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
10195 r_tramp = force_reg (Pmode, XEXP (m_tramp, 0));
10197 if (!TARGET_64BIT)
10199 tmp = adjust_address (m_tramp, Pmode, 36);
10200 emit_move_insn (tmp, fnaddr);
10201 tmp = adjust_address (m_tramp, Pmode, 40);
10202 emit_move_insn (tmp, chain_value);
10204 /* Create a fat pointer for the trampoline. */
10205 tmp = adjust_address (m_tramp, Pmode, 44);
10206 emit_move_insn (tmp, r_tramp);
10207 tmp = adjust_address (m_tramp, Pmode, 48);
10208 emit_move_insn (tmp, gen_rtx_REG (Pmode, 19));
10210 /* fdc and fic only use registers for the address to flush,
10211 they do not accept integer displacements. We align the
10212 start and end addresses to the beginning of their respective
10213 cache lines to minimize the number of lines flushed. */
10214 emit_insn (gen_andsi3 (start_addr, r_tramp,
10215 GEN_INT (-MIN_CACHELINE_SIZE)));
10216 tmp = force_reg (Pmode, plus_constant (r_tramp, TRAMPOLINE_CODE_SIZE-1));
10217 emit_insn (gen_andsi3 (end_addr, tmp,
10218 GEN_INT (-MIN_CACHELINE_SIZE)));
10219 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10220 emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length));
10221 emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length,
10222 gen_reg_rtx (Pmode),
10223 gen_reg_rtx (Pmode)));
10225 else
10227 tmp = adjust_address (m_tramp, Pmode, 56);
10228 emit_move_insn (tmp, fnaddr);
10229 tmp = adjust_address (m_tramp, Pmode, 64);
10230 emit_move_insn (tmp, chain_value);
10232 /* Create a fat pointer for the trampoline. */
10233 tmp = adjust_address (m_tramp, Pmode, 16);
10234 emit_move_insn (tmp, force_reg (Pmode, plus_constant (r_tramp, 32)));
10235 tmp = adjust_address (m_tramp, Pmode, 24);
10236 emit_move_insn (tmp, gen_rtx_REG (Pmode, 27));
10238 /* fdc and fic only use registers for the address to flush,
10239 they do not accept integer displacements. We align the
10240 start and end addresses to the beginning of their respective
10241 cache lines to minimize the number of lines flushed. */
10242 tmp = force_reg (Pmode, plus_constant (r_tramp, 32));
10243 emit_insn (gen_anddi3 (start_addr, tmp,
10244 GEN_INT (-MIN_CACHELINE_SIZE)));
10245 tmp = force_reg (Pmode, plus_constant (tmp, TRAMPOLINE_CODE_SIZE - 1));
10246 emit_insn (gen_anddi3 (end_addr, tmp,
10247 GEN_INT (-MIN_CACHELINE_SIZE)));
10248 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10249 emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length));
10250 emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length,
10251 gen_reg_rtx (Pmode),
10252 gen_reg_rtx (Pmode)));
10256 /* Perform any machine-specific adjustment in the address of the trampoline.
10257 ADDR contains the address that was passed to pa_trampoline_init.
10258 Adjust the trampoline address to point to the plabel at offset 44. */
10260 static rtx
10261 pa_trampoline_adjust_address (rtx addr)
10263 if (!TARGET_64BIT)
10264 addr = memory_address (Pmode, plus_constant (addr, 46));
10265 return addr;
10268 static rtx
10269 pa_delegitimize_address (rtx orig_x)
10271 rtx x = delegitimize_mem_from_attrs (orig_x);
10273 if (GET_CODE (x) == LO_SUM
10274 && GET_CODE (XEXP (x, 1)) == UNSPEC
10275 && XINT (XEXP (x, 1), 1) == UNSPEC_DLTIND14R)
10276 return gen_const_mem (Pmode, XVECEXP (XEXP (x, 1), 0, 0));
10277 return x;
10280 static rtx
10281 pa_internal_arg_pointer (void)
10283 /* The argument pointer and the hard frame pointer are the same in
10284 the 32-bit runtime, so we don't need a copy. */
10285 if (TARGET_64BIT)
10286 return copy_to_reg (virtual_incoming_args_rtx);
10287 else
10288 return virtual_incoming_args_rtx;
10291 /* Given FROM and TO register numbers, say whether this elimination is allowed.
10292 Frame pointer elimination is automatically handled. */
10294 static bool
10295 pa_can_eliminate (const int from, const int to)
10297 /* The argument cannot be eliminated in the 64-bit runtime. */
10298 if (TARGET_64BIT && from == ARG_POINTER_REGNUM)
10299 return false;
10301 return (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
10302 ? ! frame_pointer_needed
10303 : true);
10306 /* Define the offset between two registers, FROM to be eliminated and its
10307 replacement TO, at the start of a routine. */
10308 HOST_WIDE_INT
10309 pa_initial_elimination_offset (int from, int to)
10311 HOST_WIDE_INT offset;
10313 if ((from == HARD_FRAME_POINTER_REGNUM || from == FRAME_POINTER_REGNUM)
10314 && to == STACK_POINTER_REGNUM)
10315 offset = -compute_frame_size (get_frame_size (), 0);
10316 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10317 offset = 0;
10318 else
10319 gcc_unreachable ();
10321 return offset;
10324 static void
10325 pa_conditional_register_usage (void)
10327 int i;
10329 if (!TARGET_64BIT && !TARGET_PA_11)
10331 for (i = 56; i <= FP_REG_LAST; i++)
10332 fixed_regs[i] = call_used_regs[i] = 1;
10333 for (i = 33; i < 56; i += 2)
10334 fixed_regs[i] = call_used_regs[i] = 1;
10336 if (TARGET_DISABLE_FPREGS || TARGET_SOFT_FLOAT)
10338 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
10339 fixed_regs[i] = call_used_regs[i] = 1;
10341 if (flag_pic)
10342 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
10345 /* Target hook for c_mode_for_suffix. */
10347 static enum machine_mode
10348 pa_c_mode_for_suffix (char suffix)
10350 if (HPUX_LONG_DOUBLE_LIBRARY)
10352 if (suffix == 'q')
10353 return TFmode;
10356 return VOIDmode;
10359 /* Target hook for function_section. */
10361 static section *
10362 pa_function_section (tree decl, enum node_frequency freq,
10363 bool startup, bool exit)
10365 /* Put functions in text section if target doesn't have named sections. */
10366 if (!targetm.have_named_sections)
10367 return text_section;
10369 /* Force nested functions into the same section as the containing
10370 function. */
10371 if (decl
10372 && DECL_SECTION_NAME (decl) == NULL_TREE
10373 && DECL_CONTEXT (decl) != NULL_TREE
10374 && TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL
10375 && DECL_SECTION_NAME (DECL_CONTEXT (decl)) == NULL_TREE)
10376 return function_section (DECL_CONTEXT (decl));
10378 /* Otherwise, use the default function section. */
10379 return default_function_section (decl, freq, startup, exit);
10382 #include "gt-pa.h"