2003-12-26 Guilhem Lavaux <guilhem@kaffe.org>
[official-gcc.git] / gcc / config / sh / sh.c
blob0015f7029a4f2a25deb21f844c54d1ae0a1f7d43
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1997, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
12 any later version.
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "insn-config.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "flags.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "function.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "output.h"
38 #include "insn-attr.h"
39 #include "toplev.h"
40 #include "recog.h"
41 #include "c-pragma.h"
42 #include "integrate.h"
43 #include "tm_p.h"
44 #include "target.h"
45 #include "target-def.h"
46 #include "real.h"
47 #include "langhooks.h"
48 #include "basic-block.h"
49 #include "ra.h"
50 #include "cfglayout.h"
51 #include "intl.h"
53 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
55 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
56 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
58 /* These are some macros to abstract register modes. */
59 #define CONST_OK_FOR_ADD(size) \
60 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
61 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
62 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
63 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
65 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
66 int current_function_interrupt;
68 /* ??? The pragma interrupt support will not work for SH3. */
69 /* This is set by #pragma interrupt and #pragma trapa, and causes gcc to
70 output code for the next function appropriate for an interrupt handler. */
71 int pragma_interrupt;
73 /* This is set by the trap_exit attribute for functions. It specifies
74 a trap number to be used in a trapa instruction at function exit
75 (instead of an rte instruction). */
76 int trap_exit;
78 /* This is used by the sp_switch attribute for functions. It specifies
79 a variable holding the address of the stack the interrupt function
80 should switch to/from at entry/exit. */
81 rtx sp_switch;
83 /* This is set by #pragma trapa, and is similar to the above, except that
84 the compiler doesn't emit code to preserve all registers. */
85 static int pragma_trapa;
87 /* This is set by #pragma nosave_low_regs. This is useful on the SH3,
88 which has a separate set of low regs for User and Supervisor modes.
89 This should only be used for the lowest level of interrupts. Higher levels
90 of interrupts must save the registers in case they themselves are
91 interrupted. */
92 int pragma_nosave_low_regs;
94 /* This is used for communication between SETUP_INCOMING_VARARGS and
95 sh_expand_prologue. */
96 int current_function_anonymous_args;
98 /* Global variables for machine-dependent things. */
100 /* Which cpu are we scheduling for. */
101 enum processor_type sh_cpu;
103 /* Saved operands from the last compare to use when we generate an scc
104 or bcc insn. */
106 rtx sh_compare_op0;
107 rtx sh_compare_op1;
109 /* Provides the class number of the smallest class containing
110 reg number. */
112 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
114 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
115 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
116 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
117 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
118 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
119 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
131 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
132 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
133 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
134 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
135 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
146 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
147 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
148 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
149 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
150 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
151 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
152 GENERAL_REGS,
155 char sh_register_names[FIRST_PSEUDO_REGISTER] \
156 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
158 char sh_additional_register_names[ADDREGNAMES_SIZE] \
159 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
160 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
162 /* Provide reg_class from a letter such as appears in the machine
163 description. *: target independently reserved letter.
164 reg_class_from_letter['e' - 'a'] is set to NO_REGS for TARGET_FMOVD. */
166 enum reg_class reg_class_from_letter[] =
168 /* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
169 /* e */ FP_REGS, /* f */ FP_REGS, /* g **/ NO_REGS, /* h */ NO_REGS,
170 /* i **/ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
171 /* m **/ NO_REGS, /* n **/ NO_REGS, /* o **/ NO_REGS, /* p **/ NO_REGS,
172 /* q */ NO_REGS, /* r **/ NO_REGS, /* s **/ NO_REGS, /* t */ T_REGS,
173 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
174 /* y */ FPUL_REGS, /* z */ R0_REGS
177 int assembler_dialect;
179 static bool shmedia_space_reserved_for_target_registers;
181 static void split_branches (rtx);
182 static int branch_dest (rtx);
183 static void force_into (rtx, rtx);
184 static void print_slot (rtx);
185 static rtx add_constant (rtx, enum machine_mode, rtx);
186 static void dump_table (rtx);
187 static int hi_const (rtx);
188 static int broken_move (rtx);
189 static int mova_p (rtx);
190 static rtx find_barrier (int, rtx, rtx);
191 static int noncall_uses_reg (rtx, rtx, rtx *);
192 static rtx gen_block_redirect (rtx, int, int);
193 static void sh_reorg (void);
194 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
195 static rtx frame_insn (rtx);
196 static rtx push (int);
197 static void pop (int);
198 static void push_regs (HARD_REG_SET *, int);
199 static int calc_live_regs (HARD_REG_SET *);
200 static void mark_use (rtx, rtx *);
201 static HOST_WIDE_INT rounded_frame_size (int);
202 static rtx mark_constant_pool_use (rtx);
203 const struct attribute_spec sh_attribute_table[];
204 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
205 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
206 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
207 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
208 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
209 static void sh_insert_attributes (tree, tree *);
210 static int sh_adjust_cost (rtx, rtx, rtx, int);
211 static int sh_use_dfa_interface (void);
212 static int sh_issue_rate (void);
213 static bool sh_function_ok_for_sibcall (tree, tree);
215 static bool sh_cannot_modify_jumps_p (void);
216 static int sh_target_reg_class (void);
217 static bool sh_optimize_target_register_callee_saved (bool);
218 static bool sh_ms_bitfield_layout_p (tree);
220 static void sh_init_builtins (void);
221 static void sh_media_init_builtins (void);
222 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
223 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
224 static void sh_file_start (void);
225 static int flow_dependent_p (rtx, rtx);
226 static void flow_dependent_p_1 (rtx, rtx, void *);
227 static int shiftcosts (rtx);
228 static int andcosts (rtx);
229 static int addsubcosts (rtx);
230 static int multcosts (rtx);
231 static bool unspec_caller_rtx_p (rtx);
232 static bool sh_cannot_copy_insn_p (rtx);
233 static bool sh_rtx_costs (rtx, int, int, int *);
234 static int sh_address_cost (rtx);
235 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
236 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
237 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
238 static int scavenge_reg (HARD_REG_SET *s);
239 struct save_schedule_s;
240 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
241 struct save_schedule_s *, int);
243 static bool sh_promote_prototypes (tree);
244 static rtx sh_struct_value_rtx (tree, int);
245 static bool sh_return_in_memory (tree, tree);
246 static rtx sh_builtin_saveregs (void);
247 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
248 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
249 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
250 static tree sh_build_builtin_va_list (void);
253 /* Initialize the GCC target structure. */
254 #undef TARGET_ATTRIBUTE_TABLE
255 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
257 /* The next two are used for debug info when compiling with -gdwarf. */
258 #undef TARGET_ASM_UNALIGNED_HI_OP
259 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
260 #undef TARGET_ASM_UNALIGNED_SI_OP
261 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
263 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
264 #undef TARGET_ASM_UNALIGNED_DI_OP
265 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
266 #undef TARGET_ASM_ALIGNED_DI_OP
267 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
269 #undef TARGET_ASM_FUNCTION_EPILOGUE
270 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
272 #undef TARGET_ASM_OUTPUT_MI_THUNK
273 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
275 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
276 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
278 #undef TARGET_ASM_FILE_START
279 #define TARGET_ASM_FILE_START sh_file_start
280 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
281 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
283 #undef TARGET_INSERT_ATTRIBUTES
284 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
286 #undef TARGET_SCHED_ADJUST_COST
287 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
289 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
290 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
291 sh_use_dfa_interface
292 #undef TARGET_SCHED_ISSUE_RATE
293 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
295 #undef TARGET_CANNOT_MODIFY_JUMPS_P
296 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
297 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
298 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
299 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
300 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
301 sh_optimize_target_register_callee_saved
303 #undef TARGET_MS_BITFIELD_LAYOUT_P
304 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
306 #undef TARGET_INIT_BUILTINS
307 #define TARGET_INIT_BUILTINS sh_init_builtins
308 #undef TARGET_EXPAND_BUILTIN
309 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
311 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
312 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
314 #undef TARGET_CANNOT_COPY_INSN_P
315 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
316 #undef TARGET_RTX_COSTS
317 #define TARGET_RTX_COSTS sh_rtx_costs
318 #undef TARGET_ADDRESS_COST
319 #define TARGET_ADDRESS_COST sh_address_cost
321 #undef TARGET_MACHINE_DEPENDENT_REORG
322 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
324 #ifdef HAVE_AS_TLS
325 #undef TARGET_HAVE_TLS
326 #define TARGET_HAVE_TLS true
327 #endif
329 #undef TARGET_PROMOTE_PROTOTYPES
330 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
331 #undef TARGET_PROMOTE_FUNCTION_ARGS
332 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
333 #undef TARGET_PROMOTE_FUNCTION_RETURN
334 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
336 #undef TARGET_STRUCT_VALUE_RTX
337 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
338 #undef TARGET_RETURN_IN_MEMORY
339 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
341 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
342 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
343 #undef TARGET_SETUP_INCOMING_VARARGS
344 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
345 #undef TARGET_STRICT_ARGUMENT_NAMING
346 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
347 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
348 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
350 #undef TARGET_BUILD_BUILTIN_VA_LIST
351 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
353 #undef TARGET_PCH_VALID_P
354 #define TARGET_PCH_VALID_P sh_pch_valid_p
356 struct gcc_target targetm = TARGET_INITIALIZER;
358 /* Print the operand address in x to the stream. */
360 void
361 print_operand_address (FILE *stream, rtx x)
363 switch (GET_CODE (x))
365 case REG:
366 case SUBREG:
367 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
368 break;
370 case PLUS:
372 rtx base = XEXP (x, 0);
373 rtx index = XEXP (x, 1);
375 switch (GET_CODE (index))
377 case CONST_INT:
378 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
379 reg_names[true_regnum (base)]);
380 break;
382 case REG:
383 case SUBREG:
385 int base_num = true_regnum (base);
386 int index_num = true_regnum (index);
388 fprintf (stream, "@(r0,%s)",
389 reg_names[MAX (base_num, index_num)]);
390 break;
393 default:
394 debug_rtx (x);
395 abort ();
398 break;
400 case PRE_DEC:
401 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
402 break;
404 case POST_INC:
405 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
406 break;
408 default:
409 x = mark_constant_pool_use (x);
410 output_addr_const (stream, x);
411 break;
415 /* Print operand x (an rtx) in assembler syntax to file stream
416 according to modifier code.
418 '.' print a .s if insn needs delay slot
419 ',' print LOCAL_LABEL_PREFIX
420 '@' print trap, rte or rts depending upon pragma interruptness
421 '#' output a nop if there is nothing to put in the delay slot
422 ''' print likelihood suffix (/u for unlikely).
423 'O' print a constant without the #
424 'R' print the LSW of a dp value - changes if in little endian
425 'S' print the MSW of a dp value - changes if in little endian
426 'T' print the next word of a dp value - same as 'R' in big endian mode.
427 'M' print an `x' if `m' will print `base,index'.
428 'N' print 'r63' if the operand is (const_int 0).
429 'm' print a pair `base,offset' or `base,index', for LD and ST.
430 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
431 'o' output an operator. */
433 void
434 print_operand (FILE *stream, rtx x, int code)
436 switch (code)
438 case '.':
439 if (final_sequence
440 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
441 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
442 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
443 break;
444 case ',':
445 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
446 break;
447 case '@':
448 if (trap_exit)
449 fprintf (stream, "trapa #%d", trap_exit);
450 else if (sh_cfun_interrupt_handler_p ())
451 fprintf (stream, "rte");
452 else
453 fprintf (stream, "rts");
454 break;
455 case '#':
456 /* Output a nop if there's nothing in the delay slot. */
457 if (dbr_sequence_length () == 0)
458 fprintf (stream, "\n\tnop");
459 break;
460 case '\'':
462 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
464 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
465 fputs ("/u", stream);
466 break;
468 case 'O':
469 x = mark_constant_pool_use (x);
470 output_addr_const (stream, x);
471 break;
472 case 'R':
473 fputs (reg_names[REGNO (x) + LSW], (stream));
474 break;
475 case 'S':
476 fputs (reg_names[REGNO (x) + MSW], (stream));
477 break;
478 case 'T':
479 /* Next word of a double. */
480 switch (GET_CODE (x))
482 case REG:
483 fputs (reg_names[REGNO (x) + 1], (stream));
484 break;
485 case MEM:
486 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
487 && GET_CODE (XEXP (x, 0)) != POST_INC)
488 x = adjust_address (x, SImode, 4);
489 print_operand_address (stream, XEXP (x, 0));
490 break;
491 default:
492 break;
494 break;
495 case 'o':
496 switch (GET_CODE (x))
498 case PLUS: fputs ("add", stream); break;
499 case MINUS: fputs ("sub", stream); break;
500 case MULT: fputs ("mul", stream); break;
501 case DIV: fputs ("div", stream); break;
502 case EQ: fputs ("eq", stream); break;
503 case NE: fputs ("ne", stream); break;
504 case GT: case LT: fputs ("gt", stream); break;
505 case GE: case LE: fputs ("ge", stream); break;
506 case GTU: case LTU: fputs ("gtu", stream); break;
507 case GEU: case LEU: fputs ("geu", stream); break;
508 default:
509 break;
511 break;
512 case 'M':
513 if (GET_CODE (x) == MEM
514 && GET_CODE (XEXP (x, 0)) == PLUS
515 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
516 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
517 fputc ('x', stream);
518 break;
520 case 'm':
521 if (GET_CODE (x) != MEM)
522 abort ();
523 x = XEXP (x, 0);
524 switch (GET_CODE (x))
526 case REG:
527 case SUBREG:
528 print_operand (stream, x, 0);
529 fputs (", 0", stream);
530 break;
532 case PLUS:
533 print_operand (stream, XEXP (x, 0), 0);
534 fputs (", ", stream);
535 print_operand (stream, XEXP (x, 1), 0);
536 break;
538 default:
539 abort ();
541 break;
543 case 'N':
544 if (x == CONST0_RTX (GET_MODE (x)))
546 fprintf ((stream), "r63");
547 break;
549 goto default_output;
550 case 'u':
551 if (GET_CODE (x) == CONST_INT)
553 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
554 break;
556 /* Fall through. */
558 default_output:
559 default:
560 switch (GET_CODE (x))
562 /* FIXME: We need this on SHmedia32 because reload generates
563 some sign-extended HI or QI loads into DImode registers
564 but, because Pmode is SImode, the address ends up with a
565 subreg:SI of the DImode register. Maybe reload should be
566 fixed so as to apply alter_subreg to such loads? */
567 case SUBREG:
568 if (SUBREG_BYTE (x) != 0
569 || GET_CODE (SUBREG_REG (x)) != REG)
570 abort ();
572 x = SUBREG_REG (x);
573 /* Fall through. */
575 case REG:
576 if (FP_REGISTER_P (REGNO (x))
577 && GET_MODE (x) == V16SFmode)
578 fprintf ((stream), "mtrx%s", reg_names[REGNO (x)] + 2);
579 else if (FP_REGISTER_P (REGNO (x))
580 && GET_MODE (x) == V4SFmode)
581 fprintf ((stream), "fv%s", reg_names[REGNO (x)] + 2);
582 else if (GET_CODE (x) == REG
583 && GET_MODE (x) == V2SFmode)
584 fprintf ((stream), "fp%s", reg_names[REGNO (x)] + 2);
585 else if (FP_REGISTER_P (REGNO (x))
586 && GET_MODE_SIZE (GET_MODE (x)) > 4)
587 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
588 else
589 fputs (reg_names[REGNO (x)], (stream));
590 break;
592 case MEM:
593 output_address (XEXP (x, 0));
594 break;
596 case CONST:
597 if (TARGET_SHMEDIA
598 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
599 && GET_MODE (XEXP (x, 0)) == DImode
600 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
601 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
603 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
605 fputc ('(', stream);
606 if (GET_CODE (val) == ASHIFTRT)
608 fputc ('(', stream);
609 if (GET_CODE (XEXP (val, 0)) == CONST)
610 fputc ('(', stream);
611 output_addr_const (stream, XEXP (val, 0));
612 if (GET_CODE (XEXP (val, 0)) == CONST)
613 fputc (')', stream);
614 fputs (" >> ", stream);
615 output_addr_const (stream, XEXP (val, 1));
616 fputc (')', stream);
618 else
620 if (GET_CODE (val) == CONST)
621 fputc ('(', stream);
622 output_addr_const (stream, val);
623 if (GET_CODE (val) == CONST)
624 fputc (')', stream);
626 fputs (" & 65535)", stream);
627 break;
630 /* Fall through. */
631 default:
632 if (TARGET_SH1)
633 fputc ('#', stream);
634 output_addr_const (stream, x);
635 break;
637 break;
641 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
642 static void
643 force_into (rtx value, rtx target)
645 value = force_operand (value, target);
646 if (! rtx_equal_p (value, target))
647 emit_insn (gen_move_insn (target, value));
650 /* Emit code to perform a block move. Choose the best method.
652 OPERANDS[0] is the destination.
653 OPERANDS[1] is the source.
654 OPERANDS[2] is the size.
655 OPERANDS[3] is the alignment safe to use. */
658 expand_block_move (rtx *operands)
660 int align = INTVAL (operands[3]);
661 int constp = (GET_CODE (operands[2]) == CONST_INT);
662 int bytes = (constp ? INTVAL (operands[2]) : 0);
664 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
665 alignment, or if it isn't a multiple of 4 bytes, then fail. */
666 if (! constp || align < 4 || (bytes % 4 != 0))
667 return 0;
669 if (TARGET_HARD_SH4)
671 if (bytes < 12)
672 return 0;
673 else if (bytes == 12)
675 tree entry_name;
676 rtx sym;
677 rtx func_addr_rtx;
678 rtx r4 = gen_rtx (REG, SImode, 4);
679 rtx r5 = gen_rtx (REG, SImode, 5);
681 entry_name = get_identifier ("__movstrSI12_i4");
683 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
684 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
685 force_into (XEXP (operands[0], 0), r4);
686 force_into (XEXP (operands[1], 0), r5);
687 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
688 return 1;
690 else if (! TARGET_SMALLCODE)
692 tree entry_name;
693 rtx sym;
694 rtx func_addr_rtx;
695 int dwords;
696 rtx r4 = gen_rtx (REG, SImode, 4);
697 rtx r5 = gen_rtx (REG, SImode, 5);
698 rtx r6 = gen_rtx (REG, SImode, 6);
700 entry_name = get_identifier (bytes & 4
701 ? "__movstr_i4_odd"
702 : "__movstr_i4_even");
703 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
704 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
705 force_into (XEXP (operands[0], 0), r4);
706 force_into (XEXP (operands[1], 0), r5);
708 dwords = bytes >> 3;
709 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
710 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
711 return 1;
713 else
714 return 0;
716 if (bytes < 64)
718 char entry[30];
719 tree entry_name;
720 rtx sym;
721 rtx func_addr_rtx;
722 rtx r4 = gen_rtx_REG (SImode, 4);
723 rtx r5 = gen_rtx_REG (SImode, 5);
725 sprintf (entry, "__movstrSI%d", bytes);
726 entry_name = get_identifier (entry);
727 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
728 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
729 force_into (XEXP (operands[0], 0), r4);
730 force_into (XEXP (operands[1], 0), r5);
731 emit_insn (gen_block_move_real (func_addr_rtx));
732 return 1;
735 /* This is the same number of bytes as a memcpy call, but to a different
736 less common function name, so this will occasionally use more space. */
737 if (! TARGET_SMALLCODE)
739 tree entry_name;
740 rtx sym;
741 rtx func_addr_rtx;
742 int final_switch, while_loop;
743 rtx r4 = gen_rtx_REG (SImode, 4);
744 rtx r5 = gen_rtx_REG (SImode, 5);
745 rtx r6 = gen_rtx_REG (SImode, 6);
747 entry_name = get_identifier ("__movstr");
748 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
749 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
750 force_into (XEXP (operands[0], 0), r4);
751 force_into (XEXP (operands[1], 0), r5);
753 /* r6 controls the size of the move. 16 is decremented from it
754 for each 64 bytes moved. Then the negative bit left over is used
755 as an index into a list of move instructions. e.g., a 72 byte move
756 would be set up with size(r6) = 14, for one iteration through the
757 big while loop, and a switch of -2 for the last part. */
759 final_switch = 16 - ((bytes / 4) % 16);
760 while_loop = ((bytes / 4) / 16 - 1) * 16;
761 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
762 emit_insn (gen_block_lump_real (func_addr_rtx));
763 return 1;
766 return 0;
769 /* Prepare operands for a move define_expand; specifically, one of the
770 operands must be in a register. */
773 prepare_move_operands (rtx operands[], enum machine_mode mode)
775 if ((mode == SImode || mode == DImode)
776 && flag_pic
777 && ! ((mode == Pmode || mode == ptr_mode)
778 && tls_symbolic_operand (operands[1], Pmode) != 0))
780 rtx temp;
781 if (SYMBOLIC_CONST_P (operands[1]))
783 if (GET_CODE (operands[0]) == MEM)
784 operands[1] = force_reg (Pmode, operands[1]);
785 else if (TARGET_SHMEDIA
786 && GET_CODE (operands[1]) == LABEL_REF
787 && target_reg_operand (operands[0], mode))
788 /* It's ok. */;
789 else
791 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
792 operands[1] = legitimize_pic_address (operands[1], mode, temp);
795 else if (GET_CODE (operands[1]) == CONST
796 && GET_CODE (XEXP (operands[1], 0)) == PLUS
797 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
799 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
800 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
801 mode, temp);
802 operands[1] = expand_binop (mode, add_optab, temp,
803 XEXP (XEXP (operands[1], 0), 1),
804 no_new_pseudos ? temp
805 : gen_reg_rtx (Pmode),
806 0, OPTAB_LIB_WIDEN);
810 if (! reload_in_progress && ! reload_completed)
812 /* Copy the source to a register if both operands aren't registers. */
813 if (! register_operand (operands[0], mode)
814 && ! sh_register_operand (operands[1], mode))
815 operands[1] = copy_to_mode_reg (mode, operands[1]);
817 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
819 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
820 except that we can't use that function because it is static. */
821 rtx new = change_address (operands[0], mode, 0);
822 MEM_COPY_ATTRIBUTES (new, operands[0]);
823 operands[0] = new;
826 /* This case can happen while generating code to move the result
827 of a library call to the target. Reject `st r0,@(rX,rY)' because
828 reload will fail to find a spill register for rX, since r0 is already
829 being used for the source. */
830 else if (refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
831 && GET_CODE (operands[0]) == MEM
832 && GET_CODE (XEXP (operands[0], 0)) == PLUS
833 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
834 operands[1] = copy_to_mode_reg (mode, operands[1]);
837 if (mode == Pmode || mode == ptr_mode)
839 rtx op0, op1;
840 enum tls_model tls_kind;
842 op0 = operands[0];
843 op1 = operands[1];
844 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
846 rtx tga_op1, tga_ret, tmp, tmp2;
849 switch (tls_kind)
851 case TLS_MODEL_GLOBAL_DYNAMIC:
852 tga_ret = gen_rtx_REG (Pmode, R0_REG);
853 emit_insn (gen_tls_global_dynamic (tga_ret, op1));
854 op1 = tga_ret;
855 break;
857 case TLS_MODEL_LOCAL_DYNAMIC:
858 tga_ret = gen_rtx_REG (Pmode, R0_REG);
859 emit_insn (gen_tls_local_dynamic (tga_ret, op1));
861 tmp = gen_reg_rtx (Pmode);
862 emit_move_insn (tmp, tga_ret);
864 if (register_operand (op0, Pmode))
865 tmp2 = op0;
866 else
867 tmp2 = gen_reg_rtx (Pmode);
869 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
870 op1 = tmp2;
871 break;
873 case TLS_MODEL_INITIAL_EXEC:
874 if (! flag_pic)
875 emit_insn (gen_GOTaddr2picreg ());
876 tga_op1 = gen_reg_rtx (Pmode);
877 tmp = gen_sym2GOTTPOFF (op1);
878 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
879 op1 = tga_op1;
880 break;
882 case TLS_MODEL_LOCAL_EXEC:
883 tmp2 = gen_reg_rtx (Pmode);
884 emit_insn (gen_load_gbr (tmp2));
885 tmp = gen_reg_rtx (Pmode);
886 emit_insn (gen_symTPOFF2reg (tmp, op1));
887 RTX_UNCHANGING_P (tmp) = 1;
889 if (register_operand (op0, Pmode))
890 op1 = op0;
891 else
892 op1 = gen_reg_rtx (Pmode);
894 emit_insn (gen_addsi3 (op1, tmp, tmp2));
895 break;
897 default:
898 abort ();
900 operands[1] = op1;
904 return 0;
907 /* Prepare the operands for an scc instruction; make sure that the
908 compare has been done. */
910 prepare_scc_operands (enum rtx_code code)
912 rtx t_reg = gen_rtx_REG (SImode, T_REG);
913 enum rtx_code oldcode = code;
914 enum machine_mode mode;
916 /* First need a compare insn. */
917 switch (code)
919 case NE:
920 /* It isn't possible to handle this case. */
921 abort ();
922 case LT:
923 code = GT;
924 break;
925 case LE:
926 code = GE;
927 break;
928 case LTU:
929 code = GTU;
930 break;
931 case LEU:
932 code = GEU;
933 break;
934 default:
935 break;
937 if (code != oldcode)
939 rtx tmp = sh_compare_op0;
940 sh_compare_op0 = sh_compare_op1;
941 sh_compare_op1 = tmp;
944 mode = GET_MODE (sh_compare_op0);
945 if (mode == VOIDmode)
946 mode = GET_MODE (sh_compare_op1);
948 sh_compare_op0 = force_reg (mode, sh_compare_op0);
949 if ((code != EQ && code != NE
950 && (sh_compare_op1 != const0_rtx
951 || code == GTU || code == GEU || code == LTU || code == LEU))
952 || (mode == DImode && sh_compare_op1 != const0_rtx)
953 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
954 sh_compare_op1 = force_reg (mode, sh_compare_op1);
956 if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
957 (mode == SFmode ? emit_sf_insn : emit_df_insn)
958 (gen_rtx (PARALLEL, VOIDmode, gen_rtvec (2,
959 gen_rtx (SET, VOIDmode, t_reg,
960 gen_rtx (code, SImode,
961 sh_compare_op0, sh_compare_op1)),
962 gen_rtx (USE, VOIDmode, get_fpscr_rtx ()))));
963 else
964 emit_insn (gen_rtx (SET, VOIDmode, t_reg,
965 gen_rtx (code, SImode, sh_compare_op0,
966 sh_compare_op1)));
968 return t_reg;
971 /* Called from the md file, set up the operands of a compare instruction. */
973 void
974 from_compare (rtx *operands, int code)
976 enum machine_mode mode = GET_MODE (sh_compare_op0);
977 rtx insn;
978 if (mode == VOIDmode)
979 mode = GET_MODE (sh_compare_op1);
980 if (code != EQ
981 || mode == DImode
982 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
984 /* Force args into regs, since we can't use constants here. */
985 sh_compare_op0 = force_reg (mode, sh_compare_op0);
986 if (sh_compare_op1 != const0_rtx
987 || code == GTU || code == GEU
988 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
989 sh_compare_op1 = force_reg (mode, sh_compare_op1);
991 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
993 from_compare (operands, GT);
994 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
996 else
997 insn = gen_rtx_SET (VOIDmode,
998 gen_rtx_REG (SImode, T_REG),
999 gen_rtx (code, SImode, sh_compare_op0,
1000 sh_compare_op1));
1001 if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
1003 insn = gen_rtx (PARALLEL, VOIDmode,
1004 gen_rtvec (2, insn,
1005 gen_rtx (USE, VOIDmode, get_fpscr_rtx ())));
1006 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1008 else
1009 emit_insn (insn);
1012 /* Functions to output assembly code. */
1014 /* Return a sequence of instructions to perform DI or DF move.
1016 Since the SH cannot move a DI or DF in one instruction, we have
1017 to take care when we see overlapping source and dest registers. */
1019 const char *
1020 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1021 enum machine_mode mode)
1023 rtx dst = operands[0];
1024 rtx src = operands[1];
1026 if (GET_CODE (dst) == MEM
1027 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1028 return "mov.l %T1,%0\n\tmov.l %1,%0";
1030 if (register_operand (dst, mode)
1031 && register_operand (src, mode))
1033 if (REGNO (src) == MACH_REG)
1034 return "sts mach,%S0\n\tsts macl,%R0";
1036 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1037 when mov.d r1,r0 do r1->r0 then r2->r1. */
1039 if (REGNO (src) + 1 == REGNO (dst))
1040 return "mov %T1,%T0\n\tmov %1,%0";
1041 else
1042 return "mov %1,%0\n\tmov %T1,%T0";
1044 else if (GET_CODE (src) == CONST_INT)
1046 if (INTVAL (src) < 0)
1047 output_asm_insn ("mov #-1,%S0", operands);
1048 else
1049 output_asm_insn ("mov #0,%S0", operands);
1051 return "mov %1,%R0";
1053 else if (GET_CODE (src) == MEM)
1055 int ptrreg = -1;
1056 int dreg = REGNO (dst);
1057 rtx inside = XEXP (src, 0);
1059 if (GET_CODE (inside) == REG)
1060 ptrreg = REGNO (inside);
1061 else if (GET_CODE (inside) == SUBREG)
1062 ptrreg = subreg_regno (inside);
1063 else if (GET_CODE (inside) == PLUS)
1065 ptrreg = REGNO (XEXP (inside, 0));
1066 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1067 an offsettable address. Unfortunately, offsettable addresses use
1068 QImode to check the offset, and a QImode offsettable address
1069 requires r0 for the other operand, which is not currently
1070 supported, so we can't use the 'o' constraint.
1071 Thus we must check for and handle r0+REG addresses here.
1072 We punt for now, since this is likely very rare. */
1073 if (GET_CODE (XEXP (inside, 1)) == REG)
1074 abort ();
1076 else if (GET_CODE (inside) == LABEL_REF)
1077 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1078 else if (GET_CODE (inside) == POST_INC)
1079 return "mov.l %1,%0\n\tmov.l %1,%T0";
1080 else
1081 abort ();
1083 /* Work out the safe way to copy. Copy into the second half first. */
1084 if (dreg == ptrreg)
1085 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1088 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1091 /* Print an instruction which would have gone into a delay slot after
1092 another instruction, but couldn't because the other instruction expanded
1093 into a sequence where putting the slot insn at the end wouldn't work. */
1095 static void
1096 print_slot (rtx insn)
1098 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 0, 1);
1100 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1103 const char *
1104 output_far_jump (rtx insn, rtx op)
1106 struct { rtx lab, reg, op; } this;
1107 rtx braf_base_lab = NULL_RTX;
1108 const char *jump;
1109 int far;
1110 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1111 rtx prev;
1113 this.lab = gen_label_rtx ();
1115 if (TARGET_SH2
1116 && offset >= -32764
1117 && offset - get_attr_length (insn) <= 32766)
1119 far = 0;
1120 jump = "mov.w %O0,%1; braf %1";
1122 else
1124 far = 1;
1125 if (flag_pic)
1127 if (TARGET_SH2)
1128 jump = "mov.l %O0,%1; braf %1";
1129 else
1130 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1132 else
1133 jump = "mov.l %O0,%1; jmp @%1";
1135 /* If we have a scratch register available, use it. */
1136 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1137 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1139 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1140 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1141 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1142 output_asm_insn (jump, &this.lab);
1143 if (dbr_sequence_length ())
1144 print_slot (final_sequence);
1145 else
1146 output_asm_insn ("nop", 0);
1148 else
1150 /* Output the delay slot insn first if any. */
1151 if (dbr_sequence_length ())
1152 print_slot (final_sequence);
1154 this.reg = gen_rtx_REG (SImode, 13);
1155 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1156 Fortunately, MACL is fixed and call-clobbered, and we never
1157 need its value across jumps, so save r13 in it instead of in
1158 the stack. */
1159 if (TARGET_SH5)
1160 output_asm_insn ("lds r13, macl", 0);
1161 else
1162 output_asm_insn ("mov.l r13,@-r15", 0);
1163 output_asm_insn (jump, &this.lab);
1164 if (TARGET_SH5)
1165 output_asm_insn ("sts macl, r13", 0);
1166 else
1167 output_asm_insn ("mov.l @r15+,r13", 0);
1169 if (far && flag_pic && TARGET_SH2)
1171 braf_base_lab = gen_label_rtx ();
1172 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1173 CODE_LABEL_NUMBER (braf_base_lab));
1175 if (far)
1176 output_asm_insn (".align 2", 0);
1177 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1178 this.op = op;
1179 if (far && flag_pic)
1181 if (TARGET_SH2)
1182 this.lab = braf_base_lab;
1183 output_asm_insn (".long %O2-%O0", &this.lab);
1185 else
1186 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1187 return "";
1190 /* Local label counter, used for constants in the pool and inside
1191 pattern branches. */
1193 static int lf = 100;
1195 /* Output code for ordinary branches. */
1197 const char *
1198 output_branch (int logic, rtx insn, rtx *operands)
1200 switch (get_attr_length (insn))
1202 case 6:
1203 /* This can happen if filling the delay slot has caused a forward
1204 branch to exceed its range (we could reverse it, but only
1205 when we know we won't overextend other branches; this should
1206 best be handled by relaxation).
1207 It can also happen when other condbranches hoist delay slot insn
1208 from their destination, thus leading to code size increase.
1209 But the branch will still be in the range -4092..+4098 bytes. */
1211 if (! TARGET_RELAX)
1213 int label = lf++;
1214 /* The call to print_slot will clobber the operands. */
1215 rtx op0 = operands[0];
1217 /* If the instruction in the delay slot is annulled (true), then
1218 there is no delay slot where we can put it now. The only safe
1219 place for it is after the label. final will do that by default. */
1221 if (final_sequence
1222 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
1224 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1225 ASSEMBLER_DIALECT ? "/" : ".", label);
1226 print_slot (final_sequence);
1228 else
1229 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1231 output_asm_insn ("bra\t%l0", &op0);
1232 fprintf (asm_out_file, "\tnop\n");
1233 (*targetm.asm_out.internal_label)(asm_out_file, "LF", label);
1235 return "";
1237 /* When relaxing, handle this like a short branch. The linker
1238 will fix it up if it still doesn't fit after relaxation. */
1239 case 2:
1240 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1242 /* These are for SH2e, in which we have to account for the
1243 extra nop because of the hardware bug in annulled branches. */
1244 case 8:
1245 if (! TARGET_RELAX)
1247 int label = lf++;
1249 if (final_sequence
1250 && INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
1251 abort ();
1252 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
1253 logic ? "f" : "t",
1254 ASSEMBLER_DIALECT ? "/" : ".", label);
1255 fprintf (asm_out_file, "\tnop\n");
1256 output_asm_insn ("bra\t%l0", operands);
1257 fprintf (asm_out_file, "\tnop\n");
1258 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1260 return "";
1262 /* When relaxing, fall through. */
1263 case 4:
1265 char buffer[10];
1267 sprintf (buffer, "b%s%ss\t%%l0",
1268 logic ? "t" : "f",
1269 ASSEMBLER_DIALECT ? "/" : ".");
1270 output_asm_insn (buffer, &operands[0]);
1271 return "nop";
1274 default:
1275 /* There should be no longer branches now - that would
1276 indicate that something has destroyed the branches set
1277 up in machine_dependent_reorg. */
1278 abort ();
1282 const char *
1283 output_branchy_insn (enum rtx_code code, const char *template,
1284 rtx insn, rtx *operands)
1286 rtx next_insn = NEXT_INSN (insn);
1288 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
1290 rtx src = SET_SRC (PATTERN (next_insn));
1291 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
1293 /* Following branch not taken */
1294 operands[9] = gen_label_rtx ();
1295 emit_label_after (operands[9], next_insn);
1296 INSN_ADDRESSES_NEW (operands[9],
1297 INSN_ADDRESSES (INSN_UID (next_insn))
1298 + get_attr_length (next_insn));
1299 return template;
1301 else
1303 int offset = (branch_dest (next_insn)
1304 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
1305 if (offset >= -252 && offset <= 258)
1307 if (GET_CODE (src) == IF_THEN_ELSE)
1308 /* branch_true */
1309 src = XEXP (src, 1);
1310 operands[9] = src;
1311 return template;
1315 operands[9] = gen_label_rtx ();
1316 emit_label_after (operands[9], insn);
1317 INSN_ADDRESSES_NEW (operands[9],
1318 INSN_ADDRESSES (INSN_UID (insn))
1319 + get_attr_length (insn));
1320 return template;
1323 const char *
1324 output_ieee_ccmpeq (rtx insn, rtx *operands)
1326 return output_branchy_insn (NE, "bt\t%l9\\;fcmp/eq\t%1,%0", insn, operands);
1329 /* Output the start of the assembler file. */
1331 static void
1332 sh_file_start (void)
1334 default_file_start ();
1336 if (TARGET_ELF)
1337 /* We need to show the text section with the proper
1338 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
1339 emits it without attributes in TEXT_SECTION, else GAS
1340 will complain. We can teach GAS specifically about the
1341 default attributes for our choice of text section, but
1342 then we would have to change GAS again if/when we change
1343 the text section name. */
1344 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
1345 else
1346 /* Switch to the data section so that the coffsem symbol
1347 isn't in the text section. */
1348 data_section ();
1350 if (TARGET_LITTLE_ENDIAN)
1351 fputs ("\t.little\n", asm_out_file);
1353 if (!TARGET_ELF)
1355 if (TARGET_SHCOMPACT)
1356 fputs ("\t.mode\tSHcompact\n", asm_out_file);
1357 else if (TARGET_SHMEDIA)
1358 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
1359 TARGET_SHMEDIA64 ? 64 : 32);
1363 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
1365 static bool
1366 unspec_caller_rtx_p (rtx pat)
1368 switch (GET_CODE (pat))
1370 case CONST:
1371 return unspec_caller_rtx_p (XEXP (pat, 0));
1372 case PLUS:
1373 case MINUS:
1374 if (unspec_caller_rtx_p (XEXP (pat, 0)))
1375 return true;
1376 return unspec_caller_rtx_p (XEXP (pat, 1));
1377 case UNSPEC:
1378 if (XINT (pat, 1) == UNSPEC_CALLER)
1379 return true;
1380 default:
1381 break;
1384 return false;
1387 /* Indicate that INSN cannot be duplicated. This is true for insn
1388 that generates an unique label. */
1390 static bool
1391 sh_cannot_copy_insn_p (rtx insn)
1393 rtx pat;
1395 if (!reload_completed || !flag_pic)
1396 return false;
1398 if (GET_CODE (insn) != INSN)
1399 return false;
1400 if (asm_noperands (insn) >= 0)
1401 return false;
1403 pat = PATTERN (insn);
1404 if (GET_CODE (pat) != SET)
1405 return false;
1406 pat = SET_SRC (pat);
1408 if (unspec_caller_rtx_p (pat))
1409 return true;
1411 return false;
1414 /* Actual number of instructions used to make a shift by N. */
1415 static const char ashiftrt_insns[] =
1416 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
1418 /* Left shift and logical right shift are the same. */
1419 static const char shift_insns[] =
1420 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1422 /* Individual shift amounts needed to get the above length sequences.
1423 One bit right shifts clobber the T bit, so when possible, put one bit
1424 shifts in the middle of the sequence, so the ends are eligible for
1425 branch delay slots. */
1426 static const short shift_amounts[32][5] = {
1427 {0}, {1}, {2}, {2, 1},
1428 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
1429 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1430 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
1431 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1432 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1433 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1434 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1436 /* Likewise, but for shift amounts < 16, up to three highmost bits
1437 might be clobbered. This is typically used when combined with some
1438 kind of sign or zero extension. */
1440 static const char ext_shift_insns[] =
1441 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1443 static const short ext_shift_amounts[32][4] = {
1444 {0}, {1}, {2}, {2, 1},
1445 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
1446 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1447 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
1448 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1449 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1450 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1451 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1453 /* Assuming we have a value that has been sign-extended by at least one bit,
1454 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
1455 to shift it by N without data loss, and quicker than by other means? */
1456 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
1458 /* This is used in length attributes in sh.md to help compute the length
1459 of arbitrary constant shift instructions. */
1462 shift_insns_rtx (rtx insn)
1464 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1465 int shift_count = INTVAL (XEXP (set_src, 1));
1466 enum rtx_code shift_code = GET_CODE (set_src);
1468 switch (shift_code)
1470 case ASHIFTRT:
1471 return ashiftrt_insns[shift_count];
1472 case LSHIFTRT:
1473 case ASHIFT:
1474 return shift_insns[shift_count];
1475 default:
1476 abort();
1480 /* Return the cost of a shift. */
1482 static inline int
1483 shiftcosts (rtx x)
1485 int value;
1487 if (TARGET_SHMEDIA)
1488 return 1;
1490 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
1492 if (GET_MODE (x) == DImode
1493 && GET_CODE (XEXP (x, 1)) == CONST_INT
1494 && INTVAL (XEXP (x, 1)) == 1)
1495 return 2;
1497 /* Everything else is invalid, because there is no pattern for it. */
1498 return 10000;
1500 /* If shift by a non constant, then this will be expensive. */
1501 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1502 return SH_DYNAMIC_SHIFT_COST;
1504 value = INTVAL (XEXP (x, 1));
1506 /* Otherwise, return the true cost in instructions. */
1507 if (GET_CODE (x) == ASHIFTRT)
1509 int cost = ashiftrt_insns[value];
1510 /* If SH3, then we put the constant in a reg and use shad. */
1511 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
1512 cost = 1 + SH_DYNAMIC_SHIFT_COST;
1513 return cost;
1515 else
1516 return shift_insns[value];
1519 /* Return the cost of an AND operation. */
1521 static inline int
1522 andcosts (rtx x)
1524 int i;
1526 /* Anding with a register is a single cycle and instruction. */
1527 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1528 return 1;
1530 i = INTVAL (XEXP (x, 1));
1532 if (TARGET_SHMEDIA)
1534 if ((GET_CODE (XEXP (x, 1)) == CONST_INT
1535 && CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1536 || EXTRA_CONSTRAINT_C16 (XEXP (x, 1)))
1537 return 1;
1538 else
1539 return 2;
1542 /* These constants are single cycle extu.[bw] instructions. */
1543 if (i == 0xff || i == 0xffff)
1544 return 1;
1545 /* Constants that can be used in an and immediate instruction in a single
1546 cycle, but this requires r0, so make it a little more expensive. */
1547 if (CONST_OK_FOR_K08 (i))
1548 return 2;
1549 /* Constants that can be loaded with a mov immediate and an and.
1550 This case is probably unnecessary. */
1551 if (CONST_OK_FOR_I08 (i))
1552 return 2;
1553 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1554 This case is probably unnecessary. */
1555 return 3;
1558 /* Return the cost of an addition or a subtraction. */
1560 static inline int
1561 addsubcosts (rtx x)
1563 /* Adding a register is a single cycle insn. */
1564 if (GET_CODE (XEXP (x, 1)) == REG
1565 || GET_CODE (XEXP (x, 1)) == SUBREG)
1566 return 1;
1568 /* Likewise for small constants. */
1569 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1570 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
1571 return 1;
1573 if (TARGET_SHMEDIA)
1574 switch (GET_CODE (XEXP (x, 1)))
1576 case CONST:
1577 case LABEL_REF:
1578 case SYMBOL_REF:
1579 return TARGET_SHMEDIA64 ? 5 : 3;
1581 case CONST_INT:
1582 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1583 return 2;
1584 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
1585 return 3;
1586 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
1587 return 4;
1589 /* Fall through. */
1590 default:
1591 return 5;
1594 /* Any other constant requires a 2 cycle pc-relative load plus an
1595 addition. */
1596 return 3;
1599 /* Return the cost of a multiply. */
1600 static inline int
1601 multcosts (rtx x ATTRIBUTE_UNUSED)
1603 if (TARGET_SHMEDIA)
1604 return 3;
1606 if (TARGET_SH2)
1608 /* We have a mul insn, so we can never take more than the mul and the
1609 read of the mac reg, but count more because of the latency and extra
1610 reg usage. */
1611 if (TARGET_SMALLCODE)
1612 return 2;
1613 return 3;
1616 /* If we're aiming at small code, then just count the number of
1617 insns in a multiply call sequence. */
1618 if (TARGET_SMALLCODE)
1619 return 5;
1621 /* Otherwise count all the insns in the routine we'd be calling too. */
1622 return 20;
1625 /* Compute a (partial) cost for rtx X. Return true if the complete
1626 cost has been computed, and false if subexpressions should be
1627 scanned. In either case, *TOTAL contains the cost result. */
1629 static bool
1630 sh_rtx_costs (rtx x, int code, int outer_code, int *total)
1632 switch (code)
1634 case CONST_INT:
1635 if (TARGET_SHMEDIA)
1637 if (INTVAL (x) == 0)
1638 *total = 0;
1639 else if (outer_code == AND && and_operand ((x), DImode))
1640 *total = 0;
1641 else if ((outer_code == IOR || outer_code == XOR
1642 || outer_code == PLUS)
1643 && CONST_OK_FOR_I10 (INTVAL (x)))
1644 *total = 0;
1645 else if (CONST_OK_FOR_I16 (INTVAL (x)))
1646 *total = COSTS_N_INSNS (outer_code != SET);
1647 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
1648 *total = COSTS_N_INSNS (2);
1649 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
1650 *total = COSTS_N_INSNS (3);
1651 else
1652 *total = COSTS_N_INSNS (4);
1653 return true;
1655 if (CONST_OK_FOR_I08 (INTVAL (x)))
1656 *total = 0;
1657 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
1658 && CONST_OK_FOR_K08 (INTVAL (x)))
1659 *total = 1;
1660 else
1661 *total = 8;
1662 return true;
1664 case CONST:
1665 case LABEL_REF:
1666 case SYMBOL_REF:
1667 if (TARGET_SHMEDIA64)
1668 *total = COSTS_N_INSNS (4);
1669 else if (TARGET_SHMEDIA32)
1670 *total = COSTS_N_INSNS (2);
1671 else
1672 *total = 5;
1673 return true;
1675 case CONST_DOUBLE:
1676 if (TARGET_SHMEDIA)
1677 *total = COSTS_N_INSNS (4);
1678 else
1679 *total = 10;
1680 return true;
1682 case PLUS:
1683 *total = COSTS_N_INSNS (addsubcosts (x));
1684 return true;
1686 case AND:
1687 *total = COSTS_N_INSNS (andcosts (x));
1688 return true;
1690 case MULT:
1691 *total = COSTS_N_INSNS (multcosts (x));
1692 return true;
1694 case ASHIFT:
1695 case ASHIFTRT:
1696 case LSHIFTRT:
1697 *total = COSTS_N_INSNS (shiftcosts (x));
1698 return true;
1700 case DIV:
1701 case UDIV:
1702 case MOD:
1703 case UMOD:
1704 *total = COSTS_N_INSNS (20);
1705 return true;
1707 case FLOAT:
1708 case FIX:
1709 *total = 100;
1710 return true;
1712 default:
1713 return false;
1717 /* Compute the cost of an address. For the SH, all valid addresses are
1718 the same cost. Use a slightly higher cost for reg + reg addressing,
1719 since it increases pressure on r0. */
1721 static int
1722 sh_address_cost (rtx X)
1724 return (GET_CODE (X) == PLUS
1725 && ! CONSTANT_P (XEXP (X, 1))
1726 && ! TARGET_SHMEDIA ? 1 : 0);
1729 /* Code to expand a shift. */
1731 void
1732 gen_ashift (int type, int n, rtx reg)
1734 /* Negative values here come from the shift_amounts array. */
1735 if (n < 0)
1737 if (type == ASHIFT)
1738 type = LSHIFTRT;
1739 else
1740 type = ASHIFT;
1741 n = -n;
1744 switch (type)
1746 case ASHIFTRT:
1747 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
1748 break;
1749 case LSHIFTRT:
1750 if (n == 1)
1751 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
1752 else
1753 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
1754 break;
1755 case ASHIFT:
1756 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
1757 break;
1761 /* Same for HImode */
1763 void
1764 gen_ashift_hi (int type, int n, rtx reg)
1766 /* Negative values here come from the shift_amounts array. */
1767 if (n < 0)
1769 if (type == ASHIFT)
1770 type = LSHIFTRT;
1771 else
1772 type = ASHIFT;
1773 n = -n;
1776 switch (type)
1778 case ASHIFTRT:
1779 case LSHIFTRT:
1780 /* We don't have HImode right shift operations because using the
1781 ordinary 32 bit shift instructions for that doesn't generate proper
1782 zero/sign extension.
1783 gen_ashift_hi is only called in contexts where we know that the
1784 sign extension works out correctly. */
1786 int offset = 0;
1787 if (GET_CODE (reg) == SUBREG)
1789 offset = SUBREG_BYTE (reg);
1790 reg = SUBREG_REG (reg);
1792 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
1793 break;
1795 case ASHIFT:
1796 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
1797 break;
1801 /* Output RTL to split a constant shift into its component SH constant
1802 shift instructions. */
1804 void
1805 gen_shifty_op (int code, rtx *operands)
1807 int value = INTVAL (operands[2]);
1808 int max, i;
1810 /* Truncate the shift count in case it is out of bounds. */
1811 value = value & 0x1f;
1813 if (value == 31)
1815 if (code == LSHIFTRT)
1817 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
1818 emit_insn (gen_movt (operands[0]));
1819 return;
1821 else if (code == ASHIFT)
1823 /* There is a two instruction sequence for 31 bit left shifts,
1824 but it requires r0. */
1825 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
1827 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
1828 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
1829 return;
1833 else if (value == 0)
1835 /* This can happen when not optimizing. We must output something here
1836 to prevent the compiler from aborting in final.c after the try_split
1837 call. */
1838 emit_insn (gen_nop ());
1839 return;
1842 max = shift_insns[value];
1843 for (i = 0; i < max; i++)
1844 gen_ashift (code, shift_amounts[value][i], operands[0]);
1847 /* Same as above, but optimized for values where the topmost bits don't
1848 matter. */
1850 void
1851 gen_shifty_hi_op (int code, rtx *operands)
1853 int value = INTVAL (operands[2]);
1854 int max, i;
1855 void (*gen_fun) (int, int, rtx);
1857 /* This operation is used by and_shl for SImode values with a few
1858 high bits known to be cleared. */
1859 value &= 31;
1860 if (value == 0)
1862 emit_insn (gen_nop ());
1863 return;
1866 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
1867 if (code == ASHIFT)
1869 max = ext_shift_insns[value];
1870 for (i = 0; i < max; i++)
1871 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
1873 else
1874 /* When shifting right, emit the shifts in reverse order, so that
1875 solitary negative values come first. */
1876 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
1877 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
1880 /* Output RTL for an arithmetic right shift. */
1882 /* ??? Rewrite to use super-optimizer sequences. */
1885 expand_ashiftrt (rtx *operands)
1887 rtx sym;
1888 rtx wrk;
1889 char func[18];
1890 tree func_name;
1891 int value;
1893 if (TARGET_SH3)
1895 if (GET_CODE (operands[2]) != CONST_INT)
1897 rtx count = copy_to_mode_reg (SImode, operands[2]);
1898 emit_insn (gen_negsi2 (count, count));
1899 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
1900 return 1;
1902 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
1903 > 1 + SH_DYNAMIC_SHIFT_COST)
1905 rtx count
1906 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
1907 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
1908 return 1;
1911 if (GET_CODE (operands[2]) != CONST_INT)
1912 return 0;
1914 value = INTVAL (operands[2]) & 31;
1916 if (value == 31)
1918 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
1919 return 1;
1921 else if (value >= 16 && value <= 19)
1923 wrk = gen_reg_rtx (SImode);
1924 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
1925 value -= 16;
1926 while (value--)
1927 gen_ashift (ASHIFTRT, 1, wrk);
1928 emit_move_insn (operands[0], wrk);
1929 return 1;
1931 /* Expand a short sequence inline, longer call a magic routine. */
1932 else if (value <= 5)
1934 wrk = gen_reg_rtx (SImode);
1935 emit_move_insn (wrk, operands[1]);
1936 while (value--)
1937 gen_ashift (ASHIFTRT, 1, wrk);
1938 emit_move_insn (operands[0], wrk);
1939 return 1;
1942 wrk = gen_reg_rtx (Pmode);
1944 /* Load the value into an arg reg and call a helper. */
1945 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
1946 sprintf (func, "__ashiftrt_r4_%d", value);
1947 func_name = get_identifier (func);
1948 sym = function_symbol (IDENTIFIER_POINTER (func_name));
1949 emit_move_insn (wrk, sym);
1950 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
1951 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
1952 return 1;
1956 sh_dynamicalize_shift_p (rtx count)
1958 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
1961 /* Try to find a good way to implement the combiner pattern
1962 [(set (match_operand:SI 0 "register_operand" "r")
1963 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
1964 (match_operand:SI 2 "const_int_operand" "n"))
1965 (match_operand:SI 3 "const_int_operand" "n"))) .
1966 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
1967 return 0 for simple right / left or left/right shift combination.
1968 return 1 for a combination of shifts with zero_extend.
1969 return 2 for a combination of shifts with an AND that needs r0.
1970 return 3 for a combination of shifts with an AND that needs an extra
1971 scratch register, when the three highmost bits of the AND mask are clear.
1972 return 4 for a combination of shifts with an AND that needs an extra
1973 scratch register, when any of the three highmost bits of the AND mask
1974 is set.
1975 If ATTRP is set, store an initial right shift width in ATTRP[0],
1976 and the instruction length in ATTRP[1] . These values are not valid
1977 when returning 0.
1978 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
1979 shift_amounts for the last shift value that is to be used before the
1980 sign extend. */
1982 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
1984 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
1985 int left = INTVAL (left_rtx), right;
1986 int best = 0;
1987 int cost, best_cost = 10000;
1988 int best_right = 0, best_len = 0;
1989 int i;
1990 int can_ext;
1992 if (left < 0 || left > 31)
1993 return 0;
1994 if (GET_CODE (mask_rtx) == CONST_INT)
1995 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
1996 else
1997 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
1998 /* Can this be expressed as a right shift / left shift pair ? */
1999 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2000 right = exact_log2 (lsb);
2001 mask2 = ~(mask + lsb - 1);
2002 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2003 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2004 if (! mask2)
2005 best_cost = shift_insns[right] + shift_insns[right + left];
2006 /* mask has no trailing zeroes <==> ! right */
2007 else if (! right && mask2 == ~(lsb2 - 1))
2009 int late_right = exact_log2 (lsb2);
2010 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2012 /* Try to use zero extend */
2013 if (mask2 == ~(lsb2 - 1))
2015 int width, first;
2017 for (width = 8; width <= 16; width += 8)
2019 /* Can we zero-extend right away? */
2020 if (lsb2 == (unsigned HOST_WIDE_INT)1 << width)
2022 cost
2023 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2024 if (cost < best_cost)
2026 best = 1;
2027 best_cost = cost;
2028 best_right = right;
2029 best_len = cost;
2030 if (attrp)
2031 attrp[2] = -1;
2033 continue;
2035 /* ??? Could try to put zero extend into initial right shift,
2036 or even shift a bit left before the right shift. */
2037 /* Determine value of first part of left shift, to get to the
2038 zero extend cut-off point. */
2039 first = width - exact_log2 (lsb2) + right;
2040 if (first >= 0 && right + left - first >= 0)
2042 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2043 + ext_shift_insns[right + left - first];
2044 if (cost < best_cost)
2046 best = 1;
2047 best_cost = cost;
2048 best_right = right;
2049 best_len = cost;
2050 if (attrp)
2051 attrp[2] = first;
2056 /* Try to use r0 AND pattern */
2057 for (i = 0; i <= 2; i++)
2059 if (i > right)
2060 break;
2061 if (! CONST_OK_FOR_K08 (mask >> i))
2062 continue;
2063 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2064 if (cost < best_cost)
2066 best = 2;
2067 best_cost = cost;
2068 best_right = i;
2069 best_len = cost - 1;
2072 /* Try to use a scratch register to hold the AND operand. */
2073 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT)3 << 30)) == 0;
2074 for (i = 0; i <= 2; i++)
2076 if (i > right)
2077 break;
2078 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2079 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2080 if (cost < best_cost)
2082 best = 4 - can_ext;
2083 best_cost = cost;
2084 best_right = i;
2085 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2089 if (attrp)
2091 attrp[0] = best_right;
2092 attrp[1] = best_len;
2094 return best;
2097 /* This is used in length attributes of the unnamed instructions
2098 corresponding to shl_and_kind return values of 1 and 2. */
2100 shl_and_length (rtx insn)
2102 rtx set_src, left_rtx, mask_rtx;
2103 int attributes[3];
2105 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2106 left_rtx = XEXP (XEXP (set_src, 0), 1);
2107 mask_rtx = XEXP (set_src, 1);
2108 shl_and_kind (left_rtx, mask_rtx, attributes);
2109 return attributes[1];
2112 /* This is used in length attribute of the and_shl_scratch instruction. */
2115 shl_and_scr_length (rtx insn)
2117 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2118 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2119 rtx op = XEXP (set_src, 0);
2120 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2121 op = XEXP (XEXP (op, 0), 0);
2122 return len + shift_insns[INTVAL (XEXP (op, 1))];
2125 /* Generating rtl? */
2126 extern int rtx_equal_function_value_matters;
2128 /* Generate rtl for instructions for which shl_and_kind advised a particular
2129 method of generating them, i.e. returned zero. */
2132 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
2134 int attributes[3];
2135 unsigned HOST_WIDE_INT mask;
2136 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2137 int right, total_shift;
2138 void (*shift_gen_fun) (int, rtx*) = gen_shifty_hi_op;
2140 right = attributes[0];
2141 total_shift = INTVAL (left_rtx) + right;
2142 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2143 switch (kind)
2145 default:
2146 return -1;
2147 case 1:
2149 int first = attributes[2];
2150 rtx operands[3];
2152 if (first < 0)
2154 emit_insn ((mask << right) <= 0xff
2155 ? gen_zero_extendqisi2(dest,
2156 gen_lowpart (QImode, source))
2157 : gen_zero_extendhisi2(dest,
2158 gen_lowpart (HImode, source)));
2159 source = dest;
2161 if (source != dest)
2162 emit_insn (gen_movsi (dest, source));
2163 operands[0] = dest;
2164 if (right)
2166 operands[2] = GEN_INT (right);
2167 gen_shifty_hi_op (LSHIFTRT, operands);
2169 if (first > 0)
2171 operands[2] = GEN_INT (first);
2172 gen_shifty_hi_op (ASHIFT, operands);
2173 total_shift -= first;
2174 mask <<= first;
2176 if (first >= 0)
2177 emit_insn (mask <= 0xff
2178 ? gen_zero_extendqisi2(dest, gen_lowpart (QImode, dest))
2179 : gen_zero_extendhisi2(dest, gen_lowpart (HImode, dest)));
2180 if (total_shift > 0)
2182 operands[2] = GEN_INT (total_shift);
2183 gen_shifty_hi_op (ASHIFT, operands);
2185 break;
2187 case 4:
2188 shift_gen_fun = gen_shifty_op;
2189 case 3:
2190 /* If the topmost bit that matters is set, set the topmost bits
2191 that don't matter. This way, we might be able to get a shorter
2192 signed constant. */
2193 if (mask & ((HOST_WIDE_INT)1 << (31 - total_shift)))
2194 mask |= (HOST_WIDE_INT)~0 << (31 - total_shift);
2195 case 2:
2196 /* Don't expand fine-grained when combining, because that will
2197 make the pattern fail. */
2198 if (rtx_equal_function_value_matters
2199 || reload_in_progress || reload_completed)
2201 rtx operands[3];
2203 /* Cases 3 and 4 should be handled by this split
2204 only while combining */
2205 if (kind > 2)
2206 abort ();
2207 if (right)
2209 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
2210 source = dest;
2212 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
2213 if (total_shift)
2215 operands[0] = dest;
2216 operands[1] = dest;
2217 operands[2] = GEN_INT (total_shift);
2218 shift_gen_fun (ASHIFT, operands);
2220 break;
2222 else
2224 int neg = 0;
2225 if (kind != 4 && total_shift < 16)
2227 neg = -ext_shift_amounts[total_shift][1];
2228 if (neg > 0)
2229 neg -= ext_shift_amounts[total_shift][2];
2230 else
2231 neg = 0;
2233 emit_insn (gen_and_shl_scratch (dest, source,
2234 GEN_INT (right),
2235 GEN_INT (mask),
2236 GEN_INT (total_shift + neg),
2237 GEN_INT (neg)));
2238 emit_insn (gen_movsi (dest, dest));
2239 break;
2242 return 0;
2245 /* Try to find a good way to implement the combiner pattern
2246 [(set (match_operand:SI 0 "register_operand" "=r")
2247 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2248 (match_operand:SI 2 "const_int_operand" "n")
2249 (match_operand:SI 3 "const_int_operand" "n")
2250 (const_int 0)))
2251 (clobber (reg:SI T_REG))]
2252 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
2253 return 0 for simple left / right shift combination.
2254 return 1 for left shift / 8 bit sign extend / left shift.
2255 return 2 for left shift / 16 bit sign extend / left shift.
2256 return 3 for left shift / 8 bit sign extend / shift / sign extend.
2257 return 4 for left shift / 16 bit sign extend / shift / sign extend.
2258 return 5 for left shift / 16 bit sign extend / right shift
2259 return 6 for < 8 bit sign extend / left shift.
2260 return 7 for < 8 bit sign extend / left shift / single right shift.
2261 If COSTP is nonzero, assign the calculated cost to *COSTP. */
2264 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
2266 int left, size, insize, ext;
2267 int cost = 0, best_cost;
2268 int kind;
2270 left = INTVAL (left_rtx);
2271 size = INTVAL (size_rtx);
2272 insize = size - left;
2273 if (insize <= 0)
2274 abort ();
2275 /* Default to left / right shift. */
2276 kind = 0;
2277 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
2278 if (size <= 16)
2280 /* 16 bit shift / sign extend / 16 bit shift */
2281 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
2282 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
2283 below, by alternative 3 or something even better. */
2284 if (cost < best_cost)
2286 kind = 5;
2287 best_cost = cost;
2290 /* Try a plain sign extend between two shifts. */
2291 for (ext = 16; ext >= insize; ext -= 8)
2293 if (ext <= size)
2295 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
2296 if (cost < best_cost)
2298 kind = ext / (unsigned) 8;
2299 best_cost = cost;
2302 /* Check if we can do a sloppy shift with a final signed shift
2303 restoring the sign. */
2304 if (EXT_SHIFT_SIGNED (size - ext))
2305 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
2306 /* If not, maybe it's still cheaper to do the second shift sloppy,
2307 and do a final sign extend? */
2308 else if (size <= 16)
2309 cost = ext_shift_insns[ext - insize] + 1
2310 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
2311 else
2312 continue;
2313 if (cost < best_cost)
2315 kind = ext / (unsigned) 8 + 2;
2316 best_cost = cost;
2319 /* Check if we can sign extend in r0 */
2320 if (insize < 8)
2322 cost = 3 + shift_insns[left];
2323 if (cost < best_cost)
2325 kind = 6;
2326 best_cost = cost;
2328 /* Try the same with a final signed shift. */
2329 if (left < 31)
2331 cost = 3 + ext_shift_insns[left + 1] + 1;
2332 if (cost < best_cost)
2334 kind = 7;
2335 best_cost = cost;
2339 if (TARGET_SH3)
2341 /* Try to use a dynamic shift. */
2342 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
2343 if (cost < best_cost)
2345 kind = 0;
2346 best_cost = cost;
2349 if (costp)
2350 *costp = cost;
2351 return kind;
2354 /* Function to be used in the length attribute of the instructions
2355 implementing this pattern. */
2358 shl_sext_length (rtx insn)
2360 rtx set_src, left_rtx, size_rtx;
2361 int cost;
2363 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2364 left_rtx = XEXP (XEXP (set_src, 0), 1);
2365 size_rtx = XEXP (set_src, 1);
2366 shl_sext_kind (left_rtx, size_rtx, &cost);
2367 return cost;
2370 /* Generate rtl for this pattern */
2373 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
2375 int kind;
2376 int left, size, insize, cost;
2377 rtx operands[3];
2379 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
2380 left = INTVAL (left_rtx);
2381 size = INTVAL (size_rtx);
2382 insize = size - left;
2383 switch (kind)
2385 case 1:
2386 case 2:
2387 case 3:
2388 case 4:
2390 int ext = kind & 1 ? 8 : 16;
2391 int shift2 = size - ext;
2393 /* Don't expand fine-grained when combining, because that will
2394 make the pattern fail. */
2395 if (! rtx_equal_function_value_matters
2396 && ! reload_in_progress && ! reload_completed)
2398 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2399 emit_insn (gen_movsi (dest, source));
2400 break;
2402 if (dest != source)
2403 emit_insn (gen_movsi (dest, source));
2404 operands[0] = dest;
2405 if (ext - insize)
2407 operands[2] = GEN_INT (ext - insize);
2408 gen_shifty_hi_op (ASHIFT, operands);
2410 emit_insn (kind & 1
2411 ? gen_extendqisi2(dest, gen_lowpart (QImode, dest))
2412 : gen_extendhisi2(dest, gen_lowpart (HImode, dest)));
2413 if (kind <= 2)
2415 if (shift2)
2417 operands[2] = GEN_INT (shift2);
2418 gen_shifty_op (ASHIFT, operands);
2421 else
2423 if (shift2 > 0)
2425 if (EXT_SHIFT_SIGNED (shift2))
2427 operands[2] = GEN_INT (shift2 + 1);
2428 gen_shifty_op (ASHIFT, operands);
2429 operands[2] = GEN_INT (1);
2430 gen_shifty_op (ASHIFTRT, operands);
2431 break;
2433 operands[2] = GEN_INT (shift2);
2434 gen_shifty_hi_op (ASHIFT, operands);
2436 else if (shift2)
2438 operands[2] = GEN_INT (-shift2);
2439 gen_shifty_hi_op (LSHIFTRT, operands);
2441 emit_insn (size <= 8
2442 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2443 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2445 break;
2447 case 5:
2449 int i = 16 - size;
2450 if (! rtx_equal_function_value_matters
2451 && ! reload_in_progress && ! reload_completed)
2452 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2453 else
2455 operands[0] = dest;
2456 operands[2] = GEN_INT (16 - insize);
2457 gen_shifty_hi_op (ASHIFT, operands);
2458 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2460 /* Don't use gen_ashrsi3 because it generates new pseudos. */
2461 while (--i >= 0)
2462 gen_ashift (ASHIFTRT, 1, dest);
2463 break;
2465 case 6:
2466 case 7:
2467 /* Don't expand fine-grained when combining, because that will
2468 make the pattern fail. */
2469 if (! rtx_equal_function_value_matters
2470 && ! reload_in_progress && ! reload_completed)
2472 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2473 emit_insn (gen_movsi (dest, source));
2474 break;
2476 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
2477 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
2478 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
2479 operands[0] = dest;
2480 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
2481 gen_shifty_op (ASHIFT, operands);
2482 if (kind == 7)
2483 emit_insn (gen_ashrsi3_k (dest, dest, GEN_INT (1)));
2484 break;
2485 default:
2486 return -1;
2488 return 0;
2491 /* Prefix a symbol_ref name with "datalabel". */
2494 gen_datalabel_ref (rtx sym)
2496 if (GET_CODE (sym) == LABEL_REF)
2497 return gen_rtx_CONST (GET_MODE (sym),
2498 gen_rtx_UNSPEC (GET_MODE (sym),
2499 gen_rtvec (1, sym),
2500 UNSPEC_DATALABEL));
2502 if (GET_CODE (sym) != SYMBOL_REF)
2503 abort ();
2505 return sym;
2509 /* The SH cannot load a large constant into a register, constants have to
2510 come from a pc relative load. The reference of a pc relative load
2511 instruction must be less than 1k infront of the instruction. This
2512 means that we often have to dump a constant inside a function, and
2513 generate code to branch around it.
2515 It is important to minimize this, since the branches will slow things
2516 down and make things bigger.
2518 Worst case code looks like:
2520 mov.l L1,rn
2521 bra L2
2523 align
2524 L1: .long value
2528 mov.l L3,rn
2529 bra L4
2531 align
2532 L3: .long value
2536 We fix this by performing a scan before scheduling, which notices which
2537 instructions need to have their operands fetched from the constant table
2538 and builds the table.
2540 The algorithm is:
2542 scan, find an instruction which needs a pcrel move. Look forward, find the
2543 last barrier which is within MAX_COUNT bytes of the requirement.
2544 If there isn't one, make one. Process all the instructions between
2545 the find and the barrier.
2547 In the above example, we can tell that L3 is within 1k of L1, so
2548 the first move can be shrunk from the 3 insn+constant sequence into
2549 just 1 insn, and the constant moved to L3 to make:
2551 mov.l L1,rn
2553 mov.l L3,rn
2554 bra L4
2556 align
2557 L3:.long value
2558 L4:.long value
2560 Then the second move becomes the target for the shortening process. */
2562 typedef struct
2564 rtx value; /* Value in table. */
2565 rtx label; /* Label of value. */
2566 rtx wend; /* End of window. */
2567 enum machine_mode mode; /* Mode of value. */
2569 /* True if this constant is accessed as part of a post-increment
2570 sequence. Note that HImode constants are never accessed in this way. */
2571 bool part_of_sequence_p;
2572 } pool_node;
2574 /* The maximum number of constants that can fit into one pool, since
2575 the pc relative range is 0...1020 bytes and constants are at least 4
2576 bytes long. */
2578 #define MAX_POOL_SIZE (1020/4)
2579 static pool_node pool_vector[MAX_POOL_SIZE];
2580 static int pool_size;
2581 static rtx pool_window_label;
2582 static int pool_window_last;
2584 /* ??? If we need a constant in HImode which is the truncated value of a
2585 constant we need in SImode, we could combine the two entries thus saving
2586 two bytes. Is this common enough to be worth the effort of implementing
2587 it? */
2589 /* ??? This stuff should be done at the same time that we shorten branches.
2590 As it is now, we must assume that all branches are the maximum size, and
2591 this causes us to almost always output constant pools sooner than
2592 necessary. */
2594 /* Add a constant to the pool and return its label. */
2596 static rtx
2597 add_constant (rtx x, enum machine_mode mode, rtx last_value)
2599 int i;
2600 rtx lab, new, ref, newref;
2602 /* First see if we've already got it. */
2603 for (i = 0; i < pool_size; i++)
2605 if (x->code == pool_vector[i].value->code
2606 && mode == pool_vector[i].mode)
2608 if (x->code == CODE_LABEL)
2610 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
2611 continue;
2613 if (rtx_equal_p (x, pool_vector[i].value))
2615 lab = new = 0;
2616 if (! last_value
2617 || ! i
2618 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
2620 new = gen_label_rtx ();
2621 LABEL_REFS (new) = pool_vector[i].label;
2622 pool_vector[i].label = lab = new;
2624 if (lab && pool_window_label)
2626 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2627 ref = pool_vector[pool_window_last].wend;
2628 LABEL_NEXTREF (newref) = ref;
2629 pool_vector[pool_window_last].wend = newref;
2631 if (new)
2632 pool_window_label = new;
2633 pool_window_last = i;
2634 return lab;
2639 /* Need a new one. */
2640 pool_vector[pool_size].value = x;
2641 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
2643 lab = 0;
2644 pool_vector[pool_size - 1].part_of_sequence_p = true;
2646 else
2647 lab = gen_label_rtx ();
2648 pool_vector[pool_size].mode = mode;
2649 pool_vector[pool_size].label = lab;
2650 pool_vector[pool_size].wend = NULL_RTX;
2651 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
2652 if (lab && pool_window_label)
2654 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2655 ref = pool_vector[pool_window_last].wend;
2656 LABEL_NEXTREF (newref) = ref;
2657 pool_vector[pool_window_last].wend = newref;
2659 if (lab)
2660 pool_window_label = lab;
2661 pool_window_last = pool_size;
2662 pool_size++;
2663 return lab;
2666 /* Output the literal table. */
2668 static void
2669 dump_table (rtx scan)
2671 int i;
2672 int need_align = 1;
2673 rtx lab, ref;
2674 int have_df = 0;
2676 /* Do two passes, first time dump out the HI sized constants. */
2678 for (i = 0; i < pool_size; i++)
2680 pool_node *p = &pool_vector[i];
2682 if (p->mode == HImode)
2684 if (need_align)
2686 scan = emit_insn_after (gen_align_2 (), scan);
2687 need_align = 0;
2689 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2690 scan = emit_label_after (lab, scan);
2691 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
2692 scan);
2693 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2695 lab = XEXP (ref, 0);
2696 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2699 else if (p->mode == DFmode)
2700 have_df = 1;
2703 need_align = 1;
2705 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
2707 rtx align_insn = NULL_RTX;
2709 scan = emit_label_after (gen_label_rtx (), scan);
2710 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2711 need_align = 0;
2713 for (i = 0; i < pool_size; i++)
2715 pool_node *p = &pool_vector[i];
2717 switch (p->mode)
2719 case HImode:
2720 break;
2721 case SImode:
2722 case SFmode:
2723 if (align_insn && !p->part_of_sequence_p)
2725 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2726 emit_label_before (lab, align_insn);
2727 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
2728 align_insn);
2729 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2731 lab = XEXP (ref, 0);
2732 emit_insn_before (gen_consttable_window_end (lab),
2733 align_insn);
2735 delete_insn (align_insn);
2736 align_insn = NULL_RTX;
2737 continue;
2739 else
2741 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2742 scan = emit_label_after (lab, scan);
2743 scan = emit_insn_after (gen_consttable_4 (p->value,
2744 const0_rtx), scan);
2745 need_align = ! need_align;
2747 break;
2748 case DFmode:
2749 if (need_align)
2751 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2752 align_insn = scan;
2753 need_align = 0;
2755 case DImode:
2756 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2757 scan = emit_label_after (lab, scan);
2758 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2759 scan);
2760 break;
2761 default:
2762 abort ();
2763 break;
2766 if (p->mode != HImode)
2768 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2770 lab = XEXP (ref, 0);
2771 scan = emit_insn_after (gen_consttable_window_end (lab),
2772 scan);
2777 pool_size = 0;
2780 for (i = 0; i < pool_size; i++)
2782 pool_node *p = &pool_vector[i];
2784 switch (p->mode)
2786 case HImode:
2787 break;
2788 case SImode:
2789 case SFmode:
2790 if (need_align)
2792 need_align = 0;
2793 scan = emit_label_after (gen_label_rtx (), scan);
2794 scan = emit_insn_after (gen_align_4 (), scan);
2796 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2797 scan = emit_label_after (lab, scan);
2798 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
2799 scan);
2800 break;
2801 case DFmode:
2802 case DImode:
2803 if (need_align)
2805 need_align = 0;
2806 scan = emit_label_after (gen_label_rtx (), scan);
2807 scan = emit_insn_after (gen_align_4 (), scan);
2809 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2810 scan = emit_label_after (lab, scan);
2811 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2812 scan);
2813 break;
2814 default:
2815 abort ();
2816 break;
2819 if (p->mode != HImode)
2821 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2823 lab = XEXP (ref, 0);
2824 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2829 scan = emit_insn_after (gen_consttable_end (), scan);
2830 scan = emit_barrier_after (scan);
2831 pool_size = 0;
2832 pool_window_label = NULL_RTX;
2833 pool_window_last = 0;
2836 /* Return nonzero if constant would be an ok source for a
2837 mov.w instead of a mov.l. */
2839 static int
2840 hi_const (rtx src)
2842 return (GET_CODE (src) == CONST_INT
2843 && INTVAL (src) >= -32768
2844 && INTVAL (src) <= 32767);
2847 /* Nonzero if the insn is a move instruction which needs to be fixed. */
2849 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
2850 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
2851 need to fix it if the input value is CONST_OK_FOR_I08. */
2853 static int
2854 broken_move (rtx insn)
2856 if (GET_CODE (insn) == INSN)
2858 rtx pat = PATTERN (insn);
2859 if (GET_CODE (pat) == PARALLEL)
2860 pat = XVECEXP (pat, 0, 0);
2861 if (GET_CODE (pat) == SET
2862 /* We can load any 8 bit value if we don't care what the high
2863 order bits end up as. */
2864 && GET_MODE (SET_DEST (pat)) != QImode
2865 && (CONSTANT_P (SET_SRC (pat))
2866 /* Match mova_const. */
2867 || (GET_CODE (SET_SRC (pat)) == UNSPEC
2868 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
2869 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
2870 && ! (TARGET_SH2E
2871 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
2872 && (fp_zero_operand (SET_SRC (pat))
2873 || fp_one_operand (SET_SRC (pat)))
2874 /* ??? If this is a -m4 or -m4-single compilation, in general
2875 we don't know the current setting of fpscr, so disable fldi.
2876 There is an exception if this was a register-register move
2877 before reload - and hence it was ascertained that we have
2878 single precision setting - and in a post-reload optimization
2879 we changed this to do a constant load. In that case
2880 we don't have an r0 clobber, hence we must use fldi. */
2881 && (! TARGET_SH4 || TARGET_FMOVD
2882 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
2883 == SCRATCH))
2884 && GET_CODE (SET_DEST (pat)) == REG
2885 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
2886 && (GET_CODE (SET_SRC (pat)) != CONST_INT
2887 || ! CONST_OK_FOR_I08 (INTVAL (SET_SRC (pat)))))
2888 return 1;
2891 return 0;
2894 static int
2895 mova_p (rtx insn)
2897 return (GET_CODE (insn) == INSN
2898 && GET_CODE (PATTERN (insn)) == SET
2899 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
2900 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
2901 /* Don't match mova_const. */
2902 && GET_CODE (XVECEXP (SET_SRC (PATTERN (insn)), 0, 0)) == LABEL_REF);
2905 /* Find the last barrier from insn FROM which is close enough to hold the
2906 constant pool. If we can't find one, then create one near the end of
2907 the range. */
2909 static rtx
2910 find_barrier (int num_mova, rtx mova, rtx from)
2912 int count_si = 0;
2913 int count_hi = 0;
2914 int found_hi = 0;
2915 int found_si = 0;
2916 int found_di = 0;
2917 int hi_align = 2;
2918 int si_align = 2;
2919 int leading_mova = num_mova;
2920 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
2921 int si_limit;
2922 int hi_limit;
2924 /* For HImode: range is 510, add 4 because pc counts from address of
2925 second instruction after this one, subtract 2 for the jump instruction
2926 that we may need to emit before the table, subtract 2 for the instruction
2927 that fills the jump delay slot (in very rare cases, reorg will take an
2928 instruction from after the constant pool or will leave the delay slot
2929 empty). This gives 510.
2930 For SImode: range is 1020, add 4 because pc counts from address of
2931 second instruction after this one, subtract 2 in case pc is 2 byte
2932 aligned, subtract 2 for the jump instruction that we may need to emit
2933 before the table, subtract 2 for the instruction that fills the jump
2934 delay slot. This gives 1018. */
2936 /* The branch will always be shortened now that the reference address for
2937 forward branches is the successor address, thus we need no longer make
2938 adjustments to the [sh]i_limit for -O0. */
2940 si_limit = 1018;
2941 hi_limit = 510;
2943 while (from && count_si < si_limit && count_hi < hi_limit)
2945 int inc = get_attr_length (from);
2946 int new_align = 1;
2948 if (GET_CODE (from) == CODE_LABEL)
2950 if (optimize)
2951 new_align = 1 << label_to_alignment (from);
2952 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
2953 new_align = 1 << barrier_align (from);
2954 else
2955 new_align = 1;
2956 inc = 0;
2959 if (GET_CODE (from) == BARRIER)
2962 found_barrier = from;
2964 /* If we are at the end of the function, or in front of an alignment
2965 instruction, we need not insert an extra alignment. We prefer
2966 this kind of barrier. */
2967 if (barrier_align (from) > 2)
2968 good_barrier = from;
2971 if (broken_move (from))
2973 rtx pat, src, dst;
2974 enum machine_mode mode;
2976 pat = PATTERN (from);
2977 if (GET_CODE (pat) == PARALLEL)
2978 pat = XVECEXP (pat, 0, 0);
2979 src = SET_SRC (pat);
2980 dst = SET_DEST (pat);
2981 mode = GET_MODE (dst);
2983 /* We must explicitly check the mode, because sometimes the
2984 front end will generate code to load unsigned constants into
2985 HImode targets without properly sign extending them. */
2986 if (mode == HImode
2987 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
2989 found_hi += 2;
2990 /* We put the short constants before the long constants, so
2991 we must count the length of short constants in the range
2992 for the long constants. */
2993 /* ??? This isn't optimal, but is easy to do. */
2994 si_limit -= 2;
2996 else
2998 /* We dump DF/DI constants before SF/SI ones, because
2999 the limit is the same, but the alignment requirements
3000 are higher. We may waste up to 4 additional bytes
3001 for alignment, and the DF/DI constant may have
3002 another SF/SI constant placed before it. */
3003 if (TARGET_SHCOMPACT
3004 && ! found_di
3005 && (mode == DFmode || mode == DImode))
3007 found_di = 1;
3008 si_limit -= 8;
3010 while (si_align > 2 && found_si + si_align - 2 > count_si)
3011 si_align >>= 1;
3012 if (found_si > count_si)
3013 count_si = found_si;
3014 found_si += GET_MODE_SIZE (mode);
3015 if (num_mova)
3016 si_limit -= GET_MODE_SIZE (mode);
3019 /* See the code in machine_dependent_reorg, which has a similar if
3020 statement that generates a new mova insn in many cases. */
3021 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
3022 inc += 2;
3025 if (mova_p (from))
3027 if (! num_mova++)
3029 leading_mova = 0;
3030 mova = from;
3031 barrier_before_mova = good_barrier ? good_barrier : found_barrier;
3033 if (found_si > count_si)
3034 count_si = found_si;
3036 else if (GET_CODE (from) == JUMP_INSN
3037 && (GET_CODE (PATTERN (from)) == ADDR_VEC
3038 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
3040 if (num_mova)
3041 num_mova--;
3042 if (barrier_align (next_real_insn (from)) == align_jumps_log)
3044 /* We have just passed the barrier in front of the
3045 ADDR_DIFF_VEC, which is stored in found_barrier. Since
3046 the ADDR_DIFF_VEC is accessed as data, just like our pool
3047 constants, this is a good opportunity to accommodate what
3048 we have gathered so far.
3049 If we waited any longer, we could end up at a barrier in
3050 front of code, which gives worse cache usage for separated
3051 instruction / data caches. */
3052 good_barrier = found_barrier;
3053 break;
3055 else
3057 rtx body = PATTERN (from);
3058 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
3061 /* For the SH1, we generate alignments even after jumps-around-jumps. */
3062 else if (GET_CODE (from) == JUMP_INSN
3063 && ! TARGET_SH2
3064 && ! TARGET_SMALLCODE)
3065 new_align = 4;
3067 if (found_si)
3069 count_si += inc;
3070 if (new_align > si_align)
3072 si_limit -= (count_si - 1) & (new_align - si_align);
3073 si_align = new_align;
3075 count_si = (count_si + new_align - 1) & -new_align;
3077 if (found_hi)
3079 count_hi += inc;
3080 if (new_align > hi_align)
3082 hi_limit -= (count_hi - 1) & (new_align - hi_align);
3083 hi_align = new_align;
3085 count_hi = (count_hi + new_align - 1) & -new_align;
3087 from = NEXT_INSN (from);
3090 if (num_mova)
3092 if (leading_mova)
3094 /* Try as we might, the leading mova is out of range. Change
3095 it into a load (which will become a pcload) and retry. */
3096 SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
3097 INSN_CODE (mova) = -1;
3098 return find_barrier (0, 0, mova);
3100 else
3102 /* Insert the constant pool table before the mova instruction,
3103 to prevent the mova label reference from going out of range. */
3104 from = mova;
3105 good_barrier = found_barrier = barrier_before_mova;
3109 if (found_barrier)
3111 if (good_barrier && next_real_insn (found_barrier))
3112 found_barrier = good_barrier;
3114 else
3116 /* We didn't find a barrier in time to dump our stuff,
3117 so we'll make one. */
3118 rtx label = gen_label_rtx ();
3120 /* If we exceeded the range, then we must back up over the last
3121 instruction we looked at. Otherwise, we just need to undo the
3122 NEXT_INSN at the end of the loop. */
3123 if (count_hi > hi_limit || count_si > si_limit)
3124 from = PREV_INSN (PREV_INSN (from));
3125 else
3126 from = PREV_INSN (from);
3128 /* Walk back to be just before any jump or label.
3129 Putting it before a label reduces the number of times the branch
3130 around the constant pool table will be hit. Putting it before
3131 a jump makes it more likely that the bra delay slot will be
3132 filled. */
3133 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
3134 || GET_CODE (from) == CODE_LABEL)
3135 from = PREV_INSN (from);
3137 from = emit_jump_insn_after (gen_jump (label), from);
3138 JUMP_LABEL (from) = label;
3139 LABEL_NUSES (label) = 1;
3140 found_barrier = emit_barrier_after (from);
3141 emit_label_after (label, found_barrier);
3144 return found_barrier;
3147 /* If the instruction INSN is implemented by a special function, and we can
3148 positively find the register that is used to call the sfunc, and this
3149 register is not used anywhere else in this instruction - except as the
3150 destination of a set, return this register; else, return 0. */
3152 sfunc_uses_reg (rtx insn)
3154 int i;
3155 rtx pattern, part, reg_part, reg;
3157 if (GET_CODE (insn) != INSN)
3158 return 0;
3159 pattern = PATTERN (insn);
3160 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
3161 return 0;
3163 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3165 part = XVECEXP (pattern, 0, i);
3166 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
3167 reg_part = part;
3169 if (! reg_part)
3170 return 0;
3171 reg = XEXP (reg_part, 0);
3172 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
3174 part = XVECEXP (pattern, 0, i);
3175 if (part == reg_part || GET_CODE (part) == CLOBBER)
3176 continue;
3177 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
3178 && GET_CODE (SET_DEST (part)) == REG)
3179 ? SET_SRC (part) : part)))
3180 return 0;
3182 return reg;
3185 /* See if the only way in which INSN uses REG is by calling it, or by
3186 setting it while calling it. Set *SET to a SET rtx if the register
3187 is set by INSN. */
3189 static int
3190 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
3192 rtx pattern, reg2;
3194 *set = NULL_RTX;
3196 reg2 = sfunc_uses_reg (insn);
3197 if (reg2 && REGNO (reg2) == REGNO (reg))
3199 pattern = single_set (insn);
3200 if (pattern
3201 && GET_CODE (SET_DEST (pattern)) == REG
3202 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3203 *set = pattern;
3204 return 0;
3206 if (GET_CODE (insn) != CALL_INSN)
3208 /* We don't use rtx_equal_p because we don't care if the mode is
3209 different. */
3210 pattern = single_set (insn);
3211 if (pattern
3212 && GET_CODE (SET_DEST (pattern)) == REG
3213 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3215 rtx par, part;
3216 int i;
3218 *set = pattern;
3219 par = PATTERN (insn);
3220 if (GET_CODE (par) == PARALLEL)
3221 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
3223 part = XVECEXP (par, 0, i);
3224 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
3225 return 1;
3227 return reg_mentioned_p (reg, SET_SRC (pattern));
3230 return 1;
3233 pattern = PATTERN (insn);
3235 if (GET_CODE (pattern) == PARALLEL)
3237 int i;
3239 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3240 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
3241 return 1;
3242 pattern = XVECEXP (pattern, 0, 0);
3245 if (GET_CODE (pattern) == SET)
3247 if (reg_mentioned_p (reg, SET_DEST (pattern)))
3249 /* We don't use rtx_equal_p, because we don't care if the
3250 mode is different. */
3251 if (GET_CODE (SET_DEST (pattern)) != REG
3252 || REGNO (reg) != REGNO (SET_DEST (pattern)))
3253 return 1;
3255 *set = pattern;
3258 pattern = SET_SRC (pattern);
3261 if (GET_CODE (pattern) != CALL
3262 || GET_CODE (XEXP (pattern, 0)) != MEM
3263 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
3264 return 1;
3266 return 0;
3269 /* Given a X, a pattern of an insn or a part of it, return a mask of used
3270 general registers. Bits 0..15 mean that the respective registers
3271 are used as inputs in the instruction. Bits 16..31 mean that the
3272 registers 0..15, respectively, are used as outputs, or are clobbered.
3273 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
3275 regs_used (rtx x, int is_dest)
3277 enum rtx_code code;
3278 const char *fmt;
3279 int i, used = 0;
3281 if (! x)
3282 return used;
3283 code = GET_CODE (x);
3284 switch (code)
3286 case REG:
3287 if (REGNO (x) < 16)
3288 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3289 << (REGNO (x) + is_dest));
3290 return 0;
3291 case SUBREG:
3293 rtx y = SUBREG_REG (x);
3295 if (GET_CODE (y) != REG)
3296 break;
3297 if (REGNO (y) < 16)
3298 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3299 << (REGNO (y) +
3300 subreg_regno_offset (REGNO (y),
3301 GET_MODE (y),
3302 SUBREG_BYTE (x),
3303 GET_MODE (x)) + is_dest));
3304 return 0;
3306 case SET:
3307 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
3308 case RETURN:
3309 /* If there was a return value, it must have been indicated with USE. */
3310 return 0x00ffff00;
3311 case CLOBBER:
3312 is_dest = 1;
3313 break;
3314 case MEM:
3315 is_dest = 0;
3316 break;
3317 case CALL:
3318 used |= 0x00ff00f0;
3319 break;
3320 default:
3321 break;
3324 fmt = GET_RTX_FORMAT (code);
3326 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3328 if (fmt[i] == 'E')
3330 register int j;
3331 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3332 used |= regs_used (XVECEXP (x, i, j), is_dest);
3334 else if (fmt[i] == 'e')
3335 used |= regs_used (XEXP (x, i), is_dest);
3337 return used;
3340 /* Create an instruction that prevents redirection of a conditional branch
3341 to the destination of the JUMP with address ADDR.
3342 If the branch needs to be implemented as an indirect jump, try to find
3343 a scratch register for it.
3344 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
3345 If any preceding insn that doesn't fit into a delay slot is good enough,
3346 pass 1. Pass 2 if a definite blocking insn is needed.
3347 -1 is used internally to avoid deep recursion.
3348 If a blocking instruction is made or recognized, return it. */
3350 static rtx
3351 gen_block_redirect (rtx jump, int addr, int need_block)
3353 int dead = 0;
3354 rtx prev = prev_nonnote_insn (jump);
3355 rtx dest;
3357 /* First, check if we already have an instruction that satisfies our need. */
3358 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
3360 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
3361 return prev;
3362 if (GET_CODE (PATTERN (prev)) == USE
3363 || GET_CODE (PATTERN (prev)) == CLOBBER
3364 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3365 prev = jump;
3366 else if ((need_block &= ~1) < 0)
3367 return prev;
3368 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
3369 need_block = 0;
3371 /* We can't use JUMP_LABEL here because it might be undefined
3372 when not optimizing. */
3373 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
3374 /* If the branch is out of range, try to find a scratch register for it. */
3375 if (optimize
3376 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3377 > 4092 + 4098))
3379 rtx scan;
3380 /* Don't look for the stack pointer as a scratch register,
3381 it would cause trouble if an interrupt occurred. */
3382 unsigned try = 0x7fff, used;
3383 int jump_left = flag_expensive_optimizations + 1;
3385 /* It is likely that the most recent eligible instruction is wanted for
3386 the delay slot. Therefore, find out which registers it uses, and
3387 try to avoid using them. */
3389 for (scan = jump; (scan = PREV_INSN (scan)); )
3391 enum rtx_code code;
3393 if (INSN_DELETED_P (scan))
3394 continue;
3395 code = GET_CODE (scan);
3396 if (code == CODE_LABEL || code == JUMP_INSN)
3397 break;
3398 if (code == INSN
3399 && GET_CODE (PATTERN (scan)) != USE
3400 && GET_CODE (PATTERN (scan)) != CLOBBER
3401 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
3403 try &= ~regs_used (PATTERN (scan), 0);
3404 break;
3407 for (used = dead = 0, scan = JUMP_LABEL (jump);
3408 (scan = NEXT_INSN (scan)); )
3410 enum rtx_code code;
3412 if (INSN_DELETED_P (scan))
3413 continue;
3414 code = GET_CODE (scan);
3415 if (GET_RTX_CLASS (code) == 'i')
3417 used |= regs_used (PATTERN (scan), 0);
3418 if (code == CALL_INSN)
3419 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
3420 dead |= (used >> 16) & ~used;
3421 if (dead & try)
3423 dead &= try;
3424 break;
3426 if (code == JUMP_INSN)
3428 if (jump_left-- && simplejump_p (scan))
3429 scan = JUMP_LABEL (scan);
3430 else
3431 break;
3435 /* Mask out the stack pointer again, in case it was
3436 the only 'free' register we have found. */
3437 dead &= 0x7fff;
3439 /* If the immediate destination is still in range, check for possible
3440 threading with a jump beyond the delay slot insn.
3441 Don't check if we are called recursively; the jump has been or will be
3442 checked in a different invocation then. */
3444 else if (optimize && need_block >= 0)
3446 rtx next = next_active_insn (next_active_insn (dest));
3447 if (next && GET_CODE (next) == JUMP_INSN
3448 && GET_CODE (PATTERN (next)) == SET
3449 && recog_memoized (next) == CODE_FOR_jump_compact)
3451 dest = JUMP_LABEL (next);
3452 if (dest
3453 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3454 > 4092 + 4098))
3455 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
3459 if (dead)
3461 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
3463 /* It would be nice if we could convert the jump into an indirect
3464 jump / far branch right now, and thus exposing all constituent
3465 instructions to further optimization. However, reorg uses
3466 simplejump_p to determine if there is an unconditional jump where
3467 it should try to schedule instructions from the target of the
3468 branch; simplejump_p fails for indirect jumps even if they have
3469 a JUMP_LABEL. */
3470 rtx insn = emit_insn_before (gen_indirect_jump_scratch
3471 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
3472 , jump);
3473 /* ??? We would like this to have the scope of the jump, but that
3474 scope will change when a delay slot insn of an inner scope is added.
3475 Hence, after delay slot scheduling, we'll have to expect
3476 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
3477 the jump. */
3479 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
3480 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
3481 return insn;
3483 else if (need_block)
3484 /* We can't use JUMP_LABEL here because it might be undefined
3485 when not optimizing. */
3486 return emit_insn_before (gen_block_branch_redirect
3487 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
3488 , jump);
3489 return prev;
3492 #define CONDJUMP_MIN -252
3493 #define CONDJUMP_MAX 262
3494 struct far_branch
3496 /* A label (to be placed) in front of the jump
3497 that jumps to our ultimate destination. */
3498 rtx near_label;
3499 /* Where we are going to insert it if we cannot move the jump any farther,
3500 or the jump itself if we have picked up an existing jump. */
3501 rtx insert_place;
3502 /* The ultimate destination. */
3503 rtx far_label;
3504 struct far_branch *prev;
3505 /* If the branch has already been created, its address;
3506 else the address of its first prospective user. */
3507 int address;
3510 static void gen_far_branch (struct far_branch *);
3511 enum mdep_reorg_phase_e mdep_reorg_phase;
3512 static void
3513 gen_far_branch (struct far_branch *bp)
3515 rtx insn = bp->insert_place;
3516 rtx jump;
3517 rtx label = gen_label_rtx ();
3519 emit_label_after (label, insn);
3520 if (bp->far_label)
3522 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
3523 LABEL_NUSES (bp->far_label)++;
3525 else
3526 jump = emit_jump_insn_after (gen_return (), insn);
3527 /* Emit a barrier so that reorg knows that any following instructions
3528 are not reachable via a fall-through path.
3529 But don't do this when not optimizing, since we wouldn't suppress the
3530 alignment for the barrier then, and could end up with out-of-range
3531 pc-relative loads. */
3532 if (optimize)
3533 emit_barrier_after (jump);
3534 emit_label_after (bp->near_label, insn);
3535 JUMP_LABEL (jump) = bp->far_label;
3536 if (! invert_jump (insn, label, 1))
3537 abort ();
3538 (emit_insn_after
3539 (gen_stuff_delay_slot
3540 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
3541 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
3542 insn));
3543 /* Prevent reorg from undoing our splits. */
3544 gen_block_redirect (jump, bp->address += 2, 2);
3547 /* Fix up ADDR_DIFF_VECs. */
3548 void
3549 fixup_addr_diff_vecs (rtx first)
3551 rtx insn;
3553 for (insn = first; insn; insn = NEXT_INSN (insn))
3555 rtx vec_lab, pat, prev, prevpat, x, braf_label;
3557 if (GET_CODE (insn) != JUMP_INSN
3558 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
3559 continue;
3560 pat = PATTERN (insn);
3561 vec_lab = XEXP (XEXP (pat, 0), 0);
3563 /* Search the matching casesi_jump_2. */
3564 for (prev = vec_lab; ; prev = PREV_INSN (prev))
3566 if (GET_CODE (prev) != JUMP_INSN)
3567 continue;
3568 prevpat = PATTERN (prev);
3569 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
3570 continue;
3571 x = XVECEXP (prevpat, 0, 1);
3572 if (GET_CODE (x) != USE)
3573 continue;
3574 x = XEXP (x, 0);
3575 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
3576 break;
3579 /* Emit the reference label of the braf where it belongs, right after
3580 the casesi_jump_2 (i.e. braf). */
3581 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
3582 emit_label_after (braf_label, prev);
3584 /* Fix up the ADDR_DIF_VEC to be relative
3585 to the reference address of the braf. */
3586 XEXP (XEXP (pat, 0), 0) = braf_label;
3590 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
3591 a barrier. Return the base 2 logarithm of the desired alignment. */
3593 barrier_align (rtx barrier_or_label)
3595 rtx next = next_real_insn (barrier_or_label), pat, prev;
3596 int slot, credit, jump_to_next = 0;
3598 if (! next)
3599 return 0;
3601 pat = PATTERN (next);
3603 if (GET_CODE (pat) == ADDR_DIFF_VEC)
3604 return 2;
3606 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
3607 /* This is a barrier in front of a constant table. */
3608 return 0;
3610 prev = prev_real_insn (barrier_or_label);
3611 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
3613 pat = PATTERN (prev);
3614 /* If this is a very small table, we want to keep the alignment after
3615 the table to the minimum for proper code alignment. */
3616 return ((TARGET_SMALLCODE
3617 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
3618 <= (unsigned)1 << (CACHE_LOG - 2)))
3619 ? 1 << TARGET_SHMEDIA : align_jumps_log);
3622 if (TARGET_SMALLCODE)
3623 return 0;
3625 if (! TARGET_SH2 || ! optimize)
3626 return align_jumps_log;
3628 /* When fixing up pcloads, a constant table might be inserted just before
3629 the basic block that ends with the barrier. Thus, we can't trust the
3630 instruction lengths before that. */
3631 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
3633 /* Check if there is an immediately preceding branch to the insn beyond
3634 the barrier. We must weight the cost of discarding useful information
3635 from the current cache line when executing this branch and there is
3636 an alignment, against that of fetching unneeded insn in front of the
3637 branch target when there is no alignment. */
3639 /* There are two delay_slot cases to consider. One is the simple case
3640 where the preceding branch is to the insn beyond the barrier (simple
3641 delay slot filling), and the other is where the preceding branch has
3642 a delay slot that is a duplicate of the insn after the barrier
3643 (fill_eager_delay_slots) and the branch is to the insn after the insn
3644 after the barrier. */
3646 /* PREV is presumed to be the JUMP_INSN for the barrier under
3647 investigation. Skip to the insn before it. */
3648 prev = prev_real_insn (prev);
3650 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
3651 credit >= 0 && prev && GET_CODE (prev) == INSN;
3652 prev = prev_real_insn (prev))
3654 jump_to_next = 0;
3655 if (GET_CODE (PATTERN (prev)) == USE
3656 || GET_CODE (PATTERN (prev)) == CLOBBER)
3657 continue;
3658 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
3660 prev = XVECEXP (PATTERN (prev), 0, 1);
3661 if (INSN_UID (prev) == INSN_UID (next))
3663 /* Delay slot was filled with insn at jump target. */
3664 jump_to_next = 1;
3665 continue;
3669 if (slot &&
3670 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3671 slot = 0;
3672 credit -= get_attr_length (prev);
3674 if (prev
3675 && GET_CODE (prev) == JUMP_INSN
3676 && JUMP_LABEL (prev))
3678 rtx x;
3679 if (jump_to_next
3680 || next_real_insn (JUMP_LABEL (prev)) == next
3681 /* If relax_delay_slots() decides NEXT was redundant
3682 with some previous instruction, it will have
3683 redirected PREV's jump to the following insn. */
3684 || JUMP_LABEL (prev) == next_nonnote_insn (next)
3685 /* There is no upper bound on redundant instructions
3686 that might have been skipped, but we must not put an
3687 alignment where none had been before. */
3688 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
3689 (INSN_P (x)
3690 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
3691 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
3692 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
3694 rtx pat = PATTERN (prev);
3695 if (GET_CODE (pat) == PARALLEL)
3696 pat = XVECEXP (pat, 0, 0);
3697 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
3698 return 0;
3703 return align_jumps_log;
3706 /* If we are inside a phony loop, almost any kind of label can turn up as the
3707 first one in the loop. Aligning a braf label causes incorrect switch
3708 destination addresses; we can detect braf labels because they are
3709 followed by a BARRIER.
3710 Applying loop alignment to small constant or switch tables is a waste
3711 of space, so we suppress this too. */
3713 sh_loop_align (rtx label)
3715 rtx next = label;
3718 next = next_nonnote_insn (next);
3719 while (next && GET_CODE (next) == CODE_LABEL);
3721 if (! next
3722 || ! INSN_P (next)
3723 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
3724 || recog_memoized (next) == CODE_FOR_consttable_2)
3725 return 0;
3727 return align_loops_log;
3730 /* Do a final pass over the function, just before delayed branch
3731 scheduling. */
3733 static void
3734 sh_reorg (void)
3736 rtx first, insn, mova = NULL_RTX;
3737 int num_mova;
3738 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
3739 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
3741 first = get_insns ();
3743 /* We must split call insns before introducing `mova's. If we're
3744 optimizing, they'll have already been split. Otherwise, make
3745 sure we don't split them too late. */
3746 if (! optimize)
3747 split_all_insns_noflow ();
3749 if (TARGET_SHMEDIA)
3750 return;
3752 /* If relaxing, generate pseudo-ops to associate function calls with
3753 the symbols they call. It does no harm to not generate these
3754 pseudo-ops. However, when we can generate them, it enables to
3755 linker to potentially relax the jsr to a bsr, and eliminate the
3756 register load and, possibly, the constant pool entry. */
3758 mdep_reorg_phase = SH_INSERT_USES_LABELS;
3759 if (TARGET_RELAX)
3761 /* Remove all REG_LABEL notes. We want to use them for our own
3762 purposes. This works because none of the remaining passes
3763 need to look at them.
3765 ??? But it may break in the future. We should use a machine
3766 dependent REG_NOTE, or some other approach entirely. */
3767 for (insn = first; insn; insn = NEXT_INSN (insn))
3769 if (INSN_P (insn))
3771 rtx note;
3773 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
3774 remove_note (insn, note);
3778 for (insn = first; insn; insn = NEXT_INSN (insn))
3780 rtx pattern, reg, link, set, scan, dies, label;
3781 int rescan = 0, foundinsn = 0;
3783 if (GET_CODE (insn) == CALL_INSN)
3785 pattern = PATTERN (insn);
3787 if (GET_CODE (pattern) == PARALLEL)
3788 pattern = XVECEXP (pattern, 0, 0);
3789 if (GET_CODE (pattern) == SET)
3790 pattern = SET_SRC (pattern);
3792 if (GET_CODE (pattern) != CALL
3793 || GET_CODE (XEXP (pattern, 0)) != MEM)
3794 continue;
3796 reg = XEXP (XEXP (pattern, 0), 0);
3798 else
3800 reg = sfunc_uses_reg (insn);
3801 if (! reg)
3802 continue;
3805 if (GET_CODE (reg) != REG)
3806 continue;
3808 /* This is a function call via REG. If the only uses of REG
3809 between the time that it is set and the time that it dies
3810 are in function calls, then we can associate all the
3811 function calls with the setting of REG. */
3813 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
3815 if (REG_NOTE_KIND (link) != 0)
3816 continue;
3817 set = single_set (XEXP (link, 0));
3818 if (set && rtx_equal_p (reg, SET_DEST (set)))
3820 link = XEXP (link, 0);
3821 break;
3825 if (! link)
3827 /* ??? Sometimes global register allocation will have
3828 deleted the insn pointed to by LOG_LINKS. Try
3829 scanning backward to find where the register is set. */
3830 for (scan = PREV_INSN (insn);
3831 scan && GET_CODE (scan) != CODE_LABEL;
3832 scan = PREV_INSN (scan))
3834 if (! INSN_P (scan))
3835 continue;
3837 if (! reg_mentioned_p (reg, scan))
3838 continue;
3840 if (noncall_uses_reg (reg, scan, &set))
3841 break;
3843 if (set)
3845 link = scan;
3846 break;
3851 if (! link)
3852 continue;
3854 /* The register is set at LINK. */
3856 /* We can only optimize the function call if the register is
3857 being set to a symbol. In theory, we could sometimes
3858 optimize calls to a constant location, but the assembler
3859 and linker do not support that at present. */
3860 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
3861 && GET_CODE (SET_SRC (set)) != LABEL_REF)
3862 continue;
3864 /* Scan forward from LINK to the place where REG dies, and
3865 make sure that the only insns which use REG are
3866 themselves function calls. */
3868 /* ??? This doesn't work for call targets that were allocated
3869 by reload, since there may not be a REG_DEAD note for the
3870 register. */
3872 dies = NULL_RTX;
3873 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
3875 rtx scanset;
3877 /* Don't try to trace forward past a CODE_LABEL if we haven't
3878 seen INSN yet. Ordinarily, we will only find the setting insn
3879 in LOG_LINKS if it is in the same basic block. However,
3880 cross-jumping can insert code labels in between the load and
3881 the call, and can result in situations where a single call
3882 insn may have two targets depending on where we came from. */
3884 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
3885 break;
3887 if (! INSN_P (scan))
3888 continue;
3890 /* Don't try to trace forward past a JUMP. To optimize
3891 safely, we would have to check that all the
3892 instructions at the jump destination did not use REG. */
3894 if (GET_CODE (scan) == JUMP_INSN)
3895 break;
3897 if (! reg_mentioned_p (reg, scan))
3898 continue;
3900 if (noncall_uses_reg (reg, scan, &scanset))
3901 break;
3903 if (scan == insn)
3904 foundinsn = 1;
3906 if (scan != insn
3907 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
3909 /* There is a function call to this register other
3910 than the one we are checking. If we optimize
3911 this call, we need to rescan again below. */
3912 rescan = 1;
3915 /* ??? We shouldn't have to worry about SCANSET here.
3916 We should just be able to check for a REG_DEAD note
3917 on a function call. However, the REG_DEAD notes are
3918 apparently not dependable around libcalls; c-torture
3919 execute/920501-2 is a test case. If SCANSET is set,
3920 then this insn sets the register, so it must have
3921 died earlier. Unfortunately, this will only handle
3922 the cases in which the register is, in fact, set in a
3923 later insn. */
3925 /* ??? We shouldn't have to use FOUNDINSN here.
3926 However, the LOG_LINKS fields are apparently not
3927 entirely reliable around libcalls;
3928 newlib/libm/math/e_pow.c is a test case. Sometimes
3929 an insn will appear in LOG_LINKS even though it is
3930 not the most recent insn which sets the register. */
3932 if (foundinsn
3933 && (scanset
3934 || find_reg_note (scan, REG_DEAD, reg)))
3936 dies = scan;
3937 break;
3941 if (! dies)
3943 /* Either there was a branch, or some insn used REG
3944 other than as a function call address. */
3945 continue;
3948 /* Create a code label, and put it in a REG_LABEL note on
3949 the insn which sets the register, and on each call insn
3950 which uses the register. In final_prescan_insn we look
3951 for the REG_LABEL notes, and output the appropriate label
3952 or pseudo-op. */
3954 label = gen_label_rtx ();
3955 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL, label,
3956 REG_NOTES (link));
3957 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, label,
3958 REG_NOTES (insn));
3959 if (rescan)
3961 scan = link;
3964 rtx reg2;
3966 scan = NEXT_INSN (scan);
3967 if (scan != insn
3968 && ((GET_CODE (scan) == CALL_INSN
3969 && reg_mentioned_p (reg, scan))
3970 || ((reg2 = sfunc_uses_reg (scan))
3971 && REGNO (reg2) == REGNO (reg))))
3972 REG_NOTES (scan)
3973 = gen_rtx_INSN_LIST (REG_LABEL, label, REG_NOTES (scan));
3975 while (scan != dies);
3980 if (TARGET_SH2)
3981 fixup_addr_diff_vecs (first);
3983 if (optimize)
3985 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
3986 shorten_branches (first);
3988 /* Scan the function looking for move instructions which have to be
3989 changed to pc-relative loads and insert the literal tables. */
3991 mdep_reorg_phase = SH_FIXUP_PCLOAD;
3992 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
3994 if (mova_p (insn))
3996 if (! num_mova++)
3997 mova = insn;
3999 else if (GET_CODE (insn) == JUMP_INSN
4000 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
4001 && num_mova)
4003 rtx scan;
4004 int total;
4006 num_mova--;
4008 /* Some code might have been inserted between the mova and
4009 its ADDR_DIFF_VEC. Check if the mova is still in range. */
4010 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
4011 total += get_attr_length (scan);
4013 /* range of mova is 1020, add 4 because pc counts from address of
4014 second instruction after this one, subtract 2 in case pc is 2
4015 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
4016 cancels out with alignment effects of the mova itself. */
4017 if (total > 1022)
4019 /* Change the mova into a load, and restart scanning
4020 there. broken_move will then return true for mova. */
4021 SET_SRC (PATTERN (mova))
4022 = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4023 INSN_CODE (mova) = -1;
4024 insn = mova;
4027 if (broken_move (insn))
4029 rtx scan;
4030 /* Scan ahead looking for a barrier to stick the constant table
4031 behind. */
4032 rtx barrier = find_barrier (num_mova, mova, insn);
4033 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
4035 if (num_mova && ! mova_p (mova))
4037 /* find_barrier had to change the first mova into a
4038 pcload; thus, we have to start with this new pcload. */
4039 insn = mova;
4040 num_mova = 0;
4042 /* Now find all the moves between the points and modify them. */
4043 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
4045 if (GET_CODE (scan) == CODE_LABEL)
4046 last_float = 0;
4047 if (broken_move (scan))
4049 rtx *patp = &PATTERN (scan), pat = *patp;
4050 rtx src, dst;
4051 rtx lab;
4052 rtx newsrc;
4053 enum machine_mode mode;
4055 if (GET_CODE (pat) == PARALLEL)
4056 patp = &XVECEXP (pat, 0, 0), pat = *patp;
4057 src = SET_SRC (pat);
4058 dst = SET_DEST (pat);
4059 mode = GET_MODE (dst);
4061 if (mode == SImode && hi_const (src)
4062 && REGNO (dst) != FPUL_REG)
4064 int offset = 0;
4066 mode = HImode;
4067 while (GET_CODE (dst) == SUBREG)
4069 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
4070 GET_MODE (SUBREG_REG (dst)),
4071 SUBREG_BYTE (dst),
4072 GET_MODE (dst));
4073 dst = SUBREG_REG (dst);
4075 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
4078 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
4080 /* This must be an insn that clobbers r0. */
4081 rtx clobber = XVECEXP (PATTERN (scan), 0,
4082 XVECLEN (PATTERN (scan), 0) - 1);
4084 if (GET_CODE (clobber) != CLOBBER
4085 || ! rtx_equal_p (XEXP (clobber, 0), r0_rtx))
4086 abort ();
4088 if (last_float
4089 && reg_set_between_p (r0_rtx, last_float_move, scan))
4090 last_float = 0;
4091 if (last_float
4092 && TARGET_SHCOMPACT
4093 && GET_MODE_SIZE (mode) != 4
4094 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
4095 last_float = 0;
4096 lab = add_constant (src, mode, last_float);
4097 if (lab)
4098 emit_insn_before (gen_mova (lab), scan);
4099 else
4101 /* There will be a REG_UNUSED note for r0 on
4102 LAST_FLOAT_MOVE; we have to change it to REG_INC,
4103 lest reorg:mark_target_live_regs will not
4104 consider r0 to be used, and we end up with delay
4105 slot insn in front of SCAN that clobbers r0. */
4106 rtx note
4107 = find_regno_note (last_float_move, REG_UNUSED, 0);
4109 /* If we are not optimizing, then there may not be
4110 a note. */
4111 if (note)
4112 PUT_MODE (note, REG_INC);
4114 *last_float_addr = r0_inc_rtx;
4116 last_float_move = scan;
4117 last_float = src;
4118 newsrc = gen_rtx (MEM, mode,
4119 (((TARGET_SH4 && ! TARGET_FMOVD)
4120 || REGNO (dst) == FPUL_REG)
4121 ? r0_inc_rtx
4122 : r0_rtx));
4123 last_float_addr = &XEXP (newsrc, 0);
4125 /* Remove the clobber of r0. */
4126 XEXP (clobber, 0) = gen_rtx_SCRATCH (Pmode);
4127 RTX_UNCHANGING_P (newsrc) = 1;
4129 /* This is a mova needing a label. Create it. */
4130 else if (GET_CODE (src) == UNSPEC
4131 && XINT (src, 1) == UNSPEC_MOVA
4132 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
4134 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
4135 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4136 newsrc = gen_rtx_UNSPEC (SImode,
4137 gen_rtvec (1, newsrc),
4138 UNSPEC_MOVA);
4140 else
4142 lab = add_constant (src, mode, 0);
4143 newsrc = gen_rtx_MEM (mode,
4144 gen_rtx_LABEL_REF (VOIDmode, lab));
4145 RTX_UNCHANGING_P (newsrc) = 1;
4147 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
4148 INSN_CODE (scan) = -1;
4151 dump_table (barrier);
4152 insn = barrier;
4156 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
4157 INSN_ADDRESSES_FREE ();
4158 split_branches (first);
4160 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
4161 also has an effect on the register that holds the address of the sfunc.
4162 Insert an extra dummy insn in front of each sfunc that pretends to
4163 use this register. */
4164 if (flag_delayed_branch)
4166 for (insn = first; insn; insn = NEXT_INSN (insn))
4168 rtx reg = sfunc_uses_reg (insn);
4170 if (! reg)
4171 continue;
4172 emit_insn_before (gen_use_sfunc_addr (reg), insn);
4175 #if 0
4176 /* fpscr is not actually a user variable, but we pretend it is for the
4177 sake of the previous optimization passes, since we want it handled like
4178 one. However, we don't have any debugging information for it, so turn
4179 it into a non-user variable now. */
4180 if (TARGET_SH4)
4181 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
4182 #endif
4183 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
4187 get_dest_uid (rtx label, int max_uid)
4189 rtx dest = next_real_insn (label);
4190 int dest_uid;
4191 if (! dest)
4192 /* This can happen for an undefined label. */
4193 return 0;
4194 dest_uid = INSN_UID (dest);
4195 /* If this is a newly created branch redirection blocking instruction,
4196 we cannot index the branch_uid or insn_addresses arrays with its
4197 uid. But then, we won't need to, because the actual destination is
4198 the following branch. */
4199 while (dest_uid >= max_uid)
4201 dest = NEXT_INSN (dest);
4202 dest_uid = INSN_UID (dest);
4204 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
4205 return 0;
4206 return dest_uid;
4209 /* Split condbranches that are out of range. Also add clobbers for
4210 scratch registers that are needed in far jumps.
4211 We do this before delay slot scheduling, so that it can take our
4212 newly created instructions into account. It also allows us to
4213 find branches with common targets more easily. */
4215 static void
4216 split_branches (rtx first)
4218 rtx insn;
4219 struct far_branch **uid_branch, *far_branch_list = 0;
4220 int max_uid = get_max_uid ();
4222 /* Find out which branches are out of range. */
4223 shorten_branches (first);
4225 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
4226 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
4228 for (insn = first; insn; insn = NEXT_INSN (insn))
4229 if (! INSN_P (insn))
4230 continue;
4231 else if (INSN_DELETED_P (insn))
4233 /* Shorten_branches would split this instruction again,
4234 so transform it into a note. */
4235 PUT_CODE (insn, NOTE);
4236 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
4237 NOTE_SOURCE_FILE (insn) = 0;
4239 else if (GET_CODE (insn) == JUMP_INSN
4240 /* Don't mess with ADDR_DIFF_VEC */
4241 && (GET_CODE (PATTERN (insn)) == SET
4242 || GET_CODE (PATTERN (insn)) == RETURN))
4244 enum attr_type type = get_attr_type (insn);
4245 if (type == TYPE_CBRANCH)
4247 rtx next, beyond;
4249 if (get_attr_length (insn) > 4)
4251 rtx src = SET_SRC (PATTERN (insn));
4252 rtx olabel = XEXP (XEXP (src, 1), 0);
4253 int addr = INSN_ADDRESSES (INSN_UID (insn));
4254 rtx label = 0;
4255 int dest_uid = get_dest_uid (olabel, max_uid);
4256 struct far_branch *bp = uid_branch[dest_uid];
4258 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
4259 the label if the LABEL_NUSES count drops to zero. There is
4260 always a jump_optimize pass that sets these values, but it
4261 proceeds to delete unreferenced code, and then if not
4262 optimizing, to un-delete the deleted instructions, thus
4263 leaving labels with too low uses counts. */
4264 if (! optimize)
4266 JUMP_LABEL (insn) = olabel;
4267 LABEL_NUSES (olabel)++;
4269 if (! bp)
4271 bp = (struct far_branch *) alloca (sizeof *bp);
4272 uid_branch[dest_uid] = bp;
4273 bp->prev = far_branch_list;
4274 far_branch_list = bp;
4275 bp->far_label
4276 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
4277 LABEL_NUSES (bp->far_label)++;
4279 else
4281 label = bp->near_label;
4282 if (! label && bp->address - addr >= CONDJUMP_MIN)
4284 rtx block = bp->insert_place;
4286 if (GET_CODE (PATTERN (block)) == RETURN)
4287 block = PREV_INSN (block);
4288 else
4289 block = gen_block_redirect (block,
4290 bp->address, 2);
4291 label = emit_label_after (gen_label_rtx (),
4292 PREV_INSN (block));
4293 bp->near_label = label;
4295 else if (label && ! NEXT_INSN (label))
4297 if (addr + 2 - bp->address <= CONDJUMP_MAX)
4298 bp->insert_place = insn;
4299 else
4300 gen_far_branch (bp);
4303 if (! label
4304 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
4306 bp->near_label = label = gen_label_rtx ();
4307 bp->insert_place = insn;
4308 bp->address = addr;
4310 if (! redirect_jump (insn, label, 1))
4311 abort ();
4313 else
4315 /* get_attr_length (insn) == 2 */
4316 /* Check if we have a pattern where reorg wants to redirect
4317 the branch to a label from an unconditional branch that
4318 is too far away. */
4319 /* We can't use JUMP_LABEL here because it might be undefined
4320 when not optimizing. */
4321 /* A syntax error might cause beyond to be NULL_RTX. */
4322 beyond
4323 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
4324 0));
4326 if (beyond
4327 && (GET_CODE (beyond) == JUMP_INSN
4328 || ((beyond = next_active_insn (beyond))
4329 && GET_CODE (beyond) == JUMP_INSN))
4330 && GET_CODE (PATTERN (beyond)) == SET
4331 && recog_memoized (beyond) == CODE_FOR_jump_compact
4332 && ((INSN_ADDRESSES
4333 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
4334 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4335 > 252 + 258 + 2))
4336 gen_block_redirect (beyond,
4337 INSN_ADDRESSES (INSN_UID (beyond)), 1);
4340 next = next_active_insn (insn);
4342 if ((GET_CODE (next) == JUMP_INSN
4343 || GET_CODE (next = next_active_insn (next)) == JUMP_INSN)
4344 && GET_CODE (PATTERN (next)) == SET
4345 && recog_memoized (next) == CODE_FOR_jump_compact
4346 && ((INSN_ADDRESSES
4347 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
4348 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4349 > 252 + 258 + 2))
4350 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
4352 else if (type == TYPE_JUMP || type == TYPE_RETURN)
4354 int addr = INSN_ADDRESSES (INSN_UID (insn));
4355 rtx far_label = 0;
4356 int dest_uid = 0;
4357 struct far_branch *bp;
4359 if (type == TYPE_JUMP)
4361 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
4362 dest_uid = get_dest_uid (far_label, max_uid);
4363 if (! dest_uid)
4365 /* Parse errors can lead to labels outside
4366 the insn stream. */
4367 if (! NEXT_INSN (far_label))
4368 continue;
4370 if (! optimize)
4372 JUMP_LABEL (insn) = far_label;
4373 LABEL_NUSES (far_label)++;
4375 redirect_jump (insn, NULL_RTX, 1);
4376 far_label = 0;
4379 bp = uid_branch[dest_uid];
4380 if (! bp)
4382 bp = (struct far_branch *) alloca (sizeof *bp);
4383 uid_branch[dest_uid] = bp;
4384 bp->prev = far_branch_list;
4385 far_branch_list = bp;
4386 bp->near_label = 0;
4387 bp->far_label = far_label;
4388 if (far_label)
4389 LABEL_NUSES (far_label)++;
4391 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
4392 if (addr - bp->address <= CONDJUMP_MAX)
4393 emit_label_after (bp->near_label, PREV_INSN (insn));
4394 else
4396 gen_far_branch (bp);
4397 bp->near_label = 0;
4399 else
4400 bp->near_label = 0;
4401 bp->address = addr;
4402 bp->insert_place = insn;
4403 if (! far_label)
4404 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
4405 else
4406 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
4409 /* Generate all pending far branches,
4410 and free our references to the far labels. */
4411 while (far_branch_list)
4413 if (far_branch_list->near_label
4414 && ! NEXT_INSN (far_branch_list->near_label))
4415 gen_far_branch (far_branch_list);
4416 if (optimize
4417 && far_branch_list->far_label
4418 && ! --LABEL_NUSES (far_branch_list->far_label))
4419 delete_insn (far_branch_list->far_label);
4420 far_branch_list = far_branch_list->prev;
4423 /* Instruction length information is no longer valid due to the new
4424 instructions that have been generated. */
4425 init_insn_lengths ();
4428 /* Dump out instruction addresses, which is useful for debugging the
4429 constant pool table stuff.
4431 If relaxing, output the label and pseudo-ops used to link together
4432 calls and the instruction which set the registers. */
4434 /* ??? The addresses printed by this routine for insns are nonsense for
4435 insns which are inside of a sequence where none of the inner insns have
4436 variable length. This is because the second pass of shorten_branches
4437 does not bother to update them. */
4439 void
4440 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
4441 int noperands ATTRIBUTE_UNUSED)
4443 if (TARGET_DUMPISIZE)
4444 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
4446 if (TARGET_RELAX)
4448 rtx note;
4450 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
4451 if (note)
4453 rtx pattern;
4455 pattern = PATTERN (insn);
4456 if (GET_CODE (pattern) == PARALLEL)
4457 pattern = XVECEXP (pattern, 0, 0);
4458 if (GET_CODE (pattern) == CALL
4459 || (GET_CODE (pattern) == SET
4460 && (GET_CODE (SET_SRC (pattern)) == CALL
4461 || get_attr_type (insn) == TYPE_SFUNC)))
4462 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
4463 CODE_LABEL_NUMBER (XEXP (note, 0)));
4464 else if (GET_CODE (pattern) == SET)
4465 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4466 CODE_LABEL_NUMBER (XEXP (note, 0)));
4467 else
4468 abort ();
4473 /* Dump out any constants accumulated in the final pass. These will
4474 only be labels. */
4476 const char *
4477 output_jump_label_table (void)
4479 int i;
4481 if (pool_size)
4483 fprintf (asm_out_file, "\t.align 2\n");
4484 for (i = 0; i < pool_size; i++)
4486 pool_node *p = &pool_vector[i];
4488 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4489 CODE_LABEL_NUMBER (p->label));
4490 output_asm_insn (".long %O0", &p->value);
4492 pool_size = 0;
4495 return "";
4498 /* A full frame looks like:
4500 arg-5
4501 arg-4
4502 [ if current_function_anonymous_args
4503 arg-3
4504 arg-2
4505 arg-1
4506 arg-0 ]
4507 saved-fp
4508 saved-r10
4509 saved-r11
4510 saved-r12
4511 saved-pr
4512 local-n
4514 local-1
4515 local-0 <- fp points here. */
4517 /* Number of bytes pushed for anonymous args, used to pass information
4518 between expand_prologue and expand_epilogue. */
4520 static int extra_push;
4522 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
4523 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
4524 for an epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET
4525 of all the registers that are about to be restored, and hence dead. */
4527 static void
4528 output_stack_adjust (int size, rtx reg, int epilogue_p,
4529 HARD_REG_SET *live_regs_mask)
4531 rtx (*emit_fn) (rtx) = epilogue_p ? &emit_insn : &frame_insn;
4532 if (size)
4534 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
4536 if (size % align)
4537 abort ();
4539 if (CONST_OK_FOR_ADD (size))
4540 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
4541 /* Try to do it with two partial adjustments; however, we must make
4542 sure that the stack is properly aligned at all times, in case
4543 an interrupt occurs between the two partial adjustments. */
4544 else if (CONST_OK_FOR_ADD (size / 2 & -align)
4545 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
4547 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
4548 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
4550 else
4552 rtx const_reg;
4553 rtx insn;
4554 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
4555 int i;
4557 /* If TEMP is invalid, we could temporarily save a general
4558 register to MACL. However, there is currently no need
4559 to handle this case, so just abort when we see it. */
4560 if (current_function_interrupt
4561 || ! call_used_regs[temp] || fixed_regs[temp])
4562 temp = -1;
4563 if (temp < 0 && ! current_function_interrupt)
4565 HARD_REG_SET temps;
4566 COPY_HARD_REG_SET (temps, call_used_reg_set);
4567 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
4568 if (epilogue_p)
4570 for (i = 0; i < HARD_REGNO_NREGS (FIRST_RET_REG, DImode); i++)
4571 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
4572 if (current_function_calls_eh_return)
4574 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
4575 for (i = 0; i <= 3; i++)
4576 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
4579 else
4581 for (i = FIRST_PARM_REG;
4582 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
4583 CLEAR_HARD_REG_BIT (temps, i);
4584 if (current_function_needs_context)
4585 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
4587 temp = scavenge_reg (&temps);
4589 if (temp < 0 && live_regs_mask)
4590 temp = scavenge_reg (live_regs_mask);
4591 if (temp < 0)
4592 abort ();
4593 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
4595 /* If SIZE is negative, subtract the positive value.
4596 This sometimes allows a constant pool entry to be shared
4597 between prologue and epilogue code. */
4598 if (size < 0)
4600 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
4601 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
4603 else
4605 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
4606 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
4608 if (! epilogue_p)
4609 REG_NOTES (insn)
4610 = (gen_rtx_EXPR_LIST
4611 (REG_FRAME_RELATED_EXPR,
4612 gen_rtx_SET (VOIDmode, reg,
4613 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
4614 REG_NOTES (insn)));
4619 static rtx
4620 frame_insn (rtx x)
4622 x = emit_insn (x);
4623 RTX_FRAME_RELATED_P (x) = 1;
4624 return x;
4627 /* Output RTL to push register RN onto the stack. */
4629 static rtx
4630 push (int rn)
4632 rtx x;
4633 if (rn == FPUL_REG)
4634 x = gen_push_fpul ();
4635 else if (rn == FPSCR_REG)
4636 x = gen_push_fpscr ();
4637 else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
4638 && FP_OR_XD_REGISTER_P (rn))
4640 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
4641 return NULL_RTX;
4642 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
4644 else if (TARGET_SH2E && FP_REGISTER_P (rn))
4645 x = gen_push_e (gen_rtx_REG (SFmode, rn));
4646 else
4647 x = gen_push (gen_rtx_REG (SImode, rn));
4649 x = frame_insn (x);
4650 REG_NOTES (x)
4651 = gen_rtx_EXPR_LIST (REG_INC,
4652 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
4653 return x;
4656 /* Output RTL to pop register RN from the stack. */
4658 static void
4659 pop (int rn)
4661 rtx x;
4662 if (rn == FPUL_REG)
4663 x = gen_pop_fpul ();
4664 else if (rn == FPSCR_REG)
4665 x = gen_pop_fpscr ();
4666 else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
4667 && FP_OR_XD_REGISTER_P (rn))
4669 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
4670 return;
4671 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
4673 else if (TARGET_SH2E && FP_REGISTER_P (rn))
4674 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
4675 else
4676 x = gen_pop (gen_rtx_REG (SImode, rn));
4678 x = emit_insn (x);
4679 REG_NOTES (x)
4680 = gen_rtx_EXPR_LIST (REG_INC,
4681 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
4684 /* Generate code to push the regs specified in the mask. */
4686 static void
4687 push_regs (HARD_REG_SET *mask, int interrupt_handler)
4689 int i;
4690 int skip_fpscr = 0;
4692 /* Push PR last; this gives better latencies after the prologue, and
4693 candidates for the return delay slot when there are no general
4694 registers pushed. */
4695 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4697 /* If this is an interrupt handler, and the SZ bit varies,
4698 and we have to push any floating point register, we need
4699 to switch to the correct precision first. */
4700 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
4701 && hard_regs_intersect_p (mask, &reg_class_contents[DF_REGS]))
4703 HARD_REG_SET unsaved;
4705 push (FPSCR_REG);
4706 COMPL_HARD_REG_SET(unsaved, *mask);
4707 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
4708 skip_fpscr = 1;
4710 if (i != PR_REG
4711 && (i != FPSCR_REG || ! skip_fpscr)
4712 && TEST_HARD_REG_BIT (*mask, i))
4713 push (i);
4715 if (TEST_HARD_REG_BIT (*mask, PR_REG))
4716 push (PR_REG);
4719 /* Calculate how much extra space is needed to save all callee-saved
4720 target registers.
4721 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
4723 static int
4724 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
4726 int reg;
4727 int stack_space = 0;
4728 int interrupt_handler = sh_cfun_interrupt_handler_p ();
4730 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
4731 if ((! call_used_regs[reg] || interrupt_handler)
4732 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
4733 /* Leave space to save this target register on the stack,
4734 in case target register allocation wants to use it. */
4735 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
4736 return stack_space;
4739 /* Decide whether we should reserve space for callee-save target registers,
4740 in case target register allocation wants to use them. REGS_SAVED is
4741 the space, in bytes, that is already required for register saves.
4742 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
4744 static int
4745 shmedia_reserve_space_for_target_registers_p (int regs_saved,
4746 HARD_REG_SET *live_regs_mask)
4748 if (optimize_size)
4749 return 0;
4750 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
4753 /* Decide how much space to reserve for callee-save target registers
4754 in case target register allocation wants to use them.
4755 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
4757 static int
4758 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
4760 if (shmedia_space_reserved_for_target_registers)
4761 return shmedia_target_regs_stack_space (live_regs_mask);
4762 else
4763 return 0;
4766 /* Work out the registers which need to be saved, both as a mask and a
4767 count of saved words. Return the count.
4769 If doing a pragma interrupt function, then push all regs used by the
4770 function, and if we call another function (we can tell by looking at PR),
4771 make sure that all the regs it clobbers are safe too. */
4773 static int
4774 calc_live_regs (HARD_REG_SET *live_regs_mask)
4776 int reg;
4777 int count;
4778 int interrupt_handler;
4779 int pr_live, has_call;
4781 interrupt_handler = sh_cfun_interrupt_handler_p ();
4783 CLEAR_HARD_REG_SET (*live_regs_mask);
4784 if (TARGET_SH4 && TARGET_FMOVD && interrupt_handler
4785 && regs_ever_live[FPSCR_REG])
4786 target_flags &= ~FPU_SINGLE_BIT;
4787 /* If we can save a lot of saves by switching to double mode, do that. */
4788 else if (TARGET_SH4 && TARGET_FMOVD && TARGET_FPU_SINGLE)
4789 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
4790 if (regs_ever_live[reg] && regs_ever_live[reg+1]
4791 && (! call_used_regs[reg] || (interrupt_handler && ! pragma_trapa))
4792 && ++count > 2)
4794 target_flags &= ~FPU_SINGLE_BIT;
4795 break;
4797 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
4798 knows how to use it. That means the pseudo originally allocated for
4799 the initial value can become the PR_MEDIA_REG hard register, as seen for
4800 execute/20010122-1.c:test9. */
4801 if (TARGET_SHMEDIA)
4802 /* ??? this function is called from initial_elimination_offset, hence we
4803 can't use the result of sh_media_register_for_return here. */
4804 pr_live = sh_pr_n_sets ();
4805 else
4807 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
4808 pr_live = (pr_initial
4809 ? (GET_CODE (pr_initial) != REG
4810 || REGNO (pr_initial) != (PR_REG))
4811 : regs_ever_live[PR_REG]);
4812 /* For Shcompact, if not optimizing, we end up with a memory reference
4813 using the return address pointer for __builtin_return_address even
4814 though there is no actual need to put the PR register on the stack. */
4815 pr_live |= regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM];
4817 /* Force PR to be live if the prologue has to call the SHmedia
4818 argument decoder or register saver. */
4819 if (TARGET_SHCOMPACT
4820 && ((current_function_args_info.call_cookie
4821 & ~ CALL_COOKIE_RET_TRAMP (1))
4822 || current_function_has_nonlocal_label))
4823 pr_live = 1;
4824 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
4825 for (count = 0, reg = FIRST_PSEUDO_REGISTER - 1; reg >= 0; reg--)
4827 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
4828 ? pr_live
4829 : (interrupt_handler && ! pragma_trapa)
4830 ? (/* Need to save all the regs ever live. */
4831 (regs_ever_live[reg]
4832 || (call_used_regs[reg]
4833 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG)
4834 && has_call)
4835 || (has_call && REGISTER_NATURAL_MODE (reg) == SImode
4836 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
4837 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
4838 && reg != RETURN_ADDRESS_POINTER_REGNUM
4839 && reg != T_REG && reg != GBR_REG
4840 /* Push fpscr only on targets which have FPU */
4841 && (reg != FPSCR_REG || TARGET_FPU_ANY))
4842 : (/* Only push those regs which are used and need to be saved. */
4843 (TARGET_SHCOMPACT
4844 && flag_pic
4845 && current_function_args_info.call_cookie
4846 && reg == (int) PIC_OFFSET_TABLE_REGNUM)
4847 || (regs_ever_live[reg] && ! call_used_regs[reg])
4848 || (current_function_calls_eh_return
4849 && (reg == (int) EH_RETURN_DATA_REGNO (0)
4850 || reg == (int) EH_RETURN_DATA_REGNO (1)
4851 || reg == (int) EH_RETURN_DATA_REGNO (2)
4852 || reg == (int) EH_RETURN_DATA_REGNO (3)))
4853 || ((reg == MACL_REG || reg == MACH_REG)
4854 && regs_ever_live[reg]
4855 && sh_cfun_attr_renesas_p ())
4858 SET_HARD_REG_BIT (*live_regs_mask, reg);
4859 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
4861 if ((TARGET_SH4 || TARGET_SH5) && TARGET_FMOVD
4862 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
4864 if (FP_REGISTER_P (reg))
4866 if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
4868 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
4869 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
4872 else if (XD_REGISTER_P (reg))
4874 /* Must switch to double mode to access these registers. */
4875 target_flags &= ~FPU_SINGLE_BIT;
4880 /* If we have a target register optimization pass after prologue / epilogue
4881 threading, we need to assume all target registers will be live even if
4882 they aren't now. */
4883 if (flag_branch_target_load_optimize2
4884 && TARGET_SAVE_ALL_TARGET_REGS
4885 && shmedia_space_reserved_for_target_registers)
4886 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
4887 if ((! call_used_regs[reg] || interrupt_handler)
4888 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
4890 SET_HARD_REG_BIT (*live_regs_mask, reg);
4891 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
4893 /* If this is an interrupt handler, we don't have any call-clobbered
4894 registers we can conveniently use for target register save/restore.
4895 Make sure we save at least one general purpose register when we need
4896 to save target registers. */
4897 if (interrupt_handler
4898 && hard_regs_intersect_p (live_regs_mask,
4899 &reg_class_contents[TARGET_REGS])
4900 && ! hard_regs_intersect_p (live_regs_mask,
4901 &reg_class_contents[GENERAL_REGS]))
4903 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
4904 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
4907 return count;
4910 /* Code to generate prologue and epilogue sequences */
4912 /* PUSHED is the number of bytes that are being pushed on the
4913 stack for register saves. Return the frame size, padded
4914 appropriately so that the stack stays properly aligned. */
4915 static HOST_WIDE_INT
4916 rounded_frame_size (int pushed)
4918 HOST_WIDE_INT size = get_frame_size ();
4919 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
4921 return ((size + pushed + align - 1) & -align) - pushed;
4924 /* Choose a call-clobbered target-branch register that remains
4925 unchanged along the whole function. We set it up as the return
4926 value in the prologue. */
4928 sh_media_register_for_return (void)
4930 int regno;
4931 int tr0_used;
4933 if (! current_function_is_leaf)
4934 return -1;
4935 if (lookup_attribute ("interrupt_handler",
4936 DECL_ATTRIBUTES (current_function_decl)))
4937 return -1;
4939 tr0_used = flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM];
4941 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
4942 if (call_used_regs[regno] && ! regs_ever_live[regno])
4943 return regno;
4945 return -1;
4948 /* The maximum registers we need to save are:
4949 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
4950 - 32 floating point registers (for each pair, we save none,
4951 one single precision value, or a double precision value).
4952 - 8 target registers
4953 - add 1 entry for a delimiter. */
4954 #define MAX_SAVED_REGS (62+32+8)
4956 typedef struct save_entry_s
4958 unsigned char reg;
4959 unsigned char mode;
4960 short offset;
4961 } save_entry;
4963 #define MAX_TEMPS 4
4965 /* There will be a delimiter entry with VOIDmode both at the start and the
4966 end of a filled in schedule. The end delimiter has the offset of the
4967 save with the smallest (i.e. most negative) offset. */
4968 typedef struct save_schedule_s
4970 save_entry entries[MAX_SAVED_REGS + 2];
4971 int temps[MAX_TEMPS+1];
4972 } save_schedule;
4974 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
4975 use reverse order. Returns the last entry written to (not counting
4976 the delimiter). OFFSET_BASE is a number to be added to all offset
4977 entries. */
4979 static save_entry *
4980 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
4981 int offset_base)
4983 int align, i;
4984 save_entry *entry = schedule->entries;
4985 int tmpx = 0;
4986 int offset;
4988 if (! current_function_interrupt)
4989 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
4990 if (call_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
4991 && ! FUNCTION_ARG_REGNO_P (i)
4992 && i != FIRST_RET_REG
4993 && ! (current_function_needs_context && i == STATIC_CHAIN_REGNUM)
4994 && ! (current_function_calls_eh_return
4995 && (i == EH_RETURN_STACKADJ_REGNO
4996 || ((unsigned)i <= EH_RETURN_DATA_REGNO (0)
4997 && (unsigned)i >= EH_RETURN_DATA_REGNO (3)))))
4998 schedule->temps[tmpx++] = i;
4999 entry->reg = -1;
5000 entry->mode = VOIDmode;
5001 entry->offset = offset_base;
5002 entry++;
5003 /* We loop twice: first, we save 8-byte aligned registers in the
5004 higher addresses, that are known to be aligned. Then, we
5005 proceed to saving 32-bit registers that don't need 8-byte
5006 alignment.
5007 If this is an interrupt function, all registers that need saving
5008 need to be saved in full. moreover, we need to postpone saving
5009 target registers till we have saved some general purpose registers
5010 we can then use as scratch registers. */
5011 offset = offset_base;
5012 for (align = 1; align >= 0; align--)
5014 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
5015 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5017 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5018 int reg = i;
5020 if (current_function_interrupt)
5022 if (TARGET_REGISTER_P (i))
5023 continue;
5024 if (GENERAL_REGISTER_P (i))
5025 mode = DImode;
5027 if (mode == SFmode && (i % 2) == 1
5028 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5029 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
5031 mode = DFmode;
5032 i--;
5033 reg--;
5036 /* If we're doing the aligned pass and this is not aligned,
5037 or we're doing the unaligned pass and this is aligned,
5038 skip it. */
5039 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
5040 != align)
5041 continue;
5043 if (current_function_interrupt
5044 && GENERAL_REGISTER_P (i)
5045 && tmpx < MAX_TEMPS)
5046 schedule->temps[tmpx++] = i;
5048 offset -= GET_MODE_SIZE (mode);
5049 entry->reg = i;
5050 entry->mode = mode;
5051 entry->offset = offset;
5052 entry++;
5054 if (align && current_function_interrupt)
5055 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
5056 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5058 offset -= GET_MODE_SIZE (DImode);
5059 entry->reg = i;
5060 entry->mode = DImode;
5061 entry->offset = offset;
5062 entry++;
5065 entry->reg = -1;
5066 entry->mode = VOIDmode;
5067 entry->offset = offset;
5068 schedule->temps[tmpx] = -1;
5069 return entry - 1;
5072 void
5073 sh_expand_prologue (void)
5075 HARD_REG_SET live_regs_mask;
5076 int d, i;
5077 int d_rounding = 0;
5078 int save_flags = target_flags;
5080 current_function_interrupt = sh_cfun_interrupt_handler_p ();
5082 /* We have pretend args if we had an object sent partially in registers
5083 and partially on the stack, e.g. a large structure. */
5084 output_stack_adjust (-current_function_pretend_args_size
5085 - current_function_args_info.stack_regs * 8,
5086 stack_pointer_rtx, 0, NULL);
5088 extra_push = 0;
5090 if (TARGET_SHCOMPACT && flag_pic && current_function_args_info.call_cookie)
5091 /* We're going to use the PIC register to load the address of the
5092 incoming-argument decoder and/or of the return trampoline from
5093 the GOT, so make sure the PIC register is preserved and
5094 initialized. */
5095 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5097 if (TARGET_SHCOMPACT
5098 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5100 int reg;
5102 /* First, make all registers with incoming arguments that will
5103 be pushed onto the stack live, so that register renaming
5104 doesn't overwrite them. */
5105 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
5106 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info.call_cookie)
5107 >= NPARM_REGS (SImode) - reg)
5108 for (; reg < NPARM_REGS (SImode); reg++)
5109 emit_insn (gen_shcompact_preserve_incoming_args
5110 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5111 else if (CALL_COOKIE_INT_REG_GET
5112 (current_function_args_info.call_cookie, reg) == 1)
5113 emit_insn (gen_shcompact_preserve_incoming_args
5114 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5116 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
5117 stack_pointer_rtx);
5118 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
5119 GEN_INT (current_function_args_info.call_cookie));
5120 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
5121 gen_rtx_REG (SImode, R0_REG));
5123 else if (TARGET_SHMEDIA)
5125 int tr = sh_media_register_for_return ();
5127 if (tr >= 0)
5129 rtx insn = emit_move_insn (gen_rtx_REG (DImode, tr),
5130 gen_rtx_REG (DImode, PR_MEDIA_REG));
5132 /* ??? We should suppress saving pr when we don't need it, but this
5133 is tricky because of builtin_return_address. */
5135 /* If this function only exits with sibcalls, this copy
5136 will be flagged as dead. */
5137 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5138 const0_rtx,
5139 REG_NOTES (insn));
5143 /* Emit the code for SETUP_VARARGS. */
5144 if (current_function_stdarg)
5146 /* This is not used by the SH2E calling convention */
5147 if (TARGET_SH1 && ! TARGET_SH2E && ! TARGET_SH5
5148 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
5150 /* Push arg regs as if they'd been provided by caller in stack. */
5151 for (i = 0; i < NPARM_REGS(SImode); i++)
5153 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
5154 rtx insn;
5156 if (i >= (NPARM_REGS(SImode)
5157 - current_function_args_info.arg_count[(int) SH_ARG_INT]
5159 break;
5160 insn = push (rn);
5161 RTX_FRAME_RELATED_P (insn) = 0;
5162 extra_push += 4;
5167 /* If we're supposed to switch stacks at function entry, do so now. */
5168 if (sp_switch)
5169 emit_insn (gen_sp_switch_1 ());
5171 d = calc_live_regs (&live_regs_mask);
5172 /* ??? Maybe we could save some switching if we can move a mode switch
5173 that already happens to be at the function start into the prologue. */
5174 if (target_flags != save_flags && ! current_function_interrupt)
5175 emit_insn (gen_toggle_sz ());
5177 if (TARGET_SH5)
5179 int offset_base, offset;
5180 rtx r0 = NULL_RTX;
5181 int offset_in_r0 = -1;
5182 int sp_in_r0 = 0;
5183 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5184 int total_size, save_size;
5185 save_schedule schedule;
5186 save_entry *entry;
5187 int *tmp_pnt;
5189 if (call_used_regs[R0_REG] && ! fixed_regs[R0_REG]
5190 && ! current_function_interrupt)
5191 r0 = gen_rtx_REG (Pmode, R0_REG);
5193 /* D is the actual number of bytes that we need for saving registers,
5194 however, in initial_elimination_offset we have committed to using
5195 an additional TREGS_SPACE amount of bytes - in order to keep both
5196 addresses to arguments supplied by the caller and local variables
5197 valid, we must keep this gap. Place it between the incoming
5198 arguments and the actually saved registers in a bid to optimize
5199 locality of reference. */
5200 total_size = d + tregs_space;
5201 total_size += rounded_frame_size (total_size);
5202 save_size = total_size - rounded_frame_size (d);
5203 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
5204 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5205 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
5207 /* If adjusting the stack in a single step costs nothing extra, do so.
5208 I.e. either if a single addi is enough, or we need a movi anyway,
5209 and we don't exceed the maximum offset range (the test for the
5210 latter is conservative for simplicity). */
5211 if (TARGET_SHMEDIA
5212 && (CONST_OK_FOR_I10 (-total_size)
5213 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
5214 && total_size <= 2044)))
5215 d_rounding = total_size - save_size;
5217 offset_base = d + d_rounding;
5219 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
5220 0, NULL);
5222 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
5223 tmp_pnt = schedule.temps;
5224 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
5226 enum machine_mode mode = entry->mode;
5227 int reg = entry->reg;
5228 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
5230 offset = entry->offset;
5232 reg_rtx = gen_rtx_REG (mode, reg);
5234 mem_rtx = gen_rtx_MEM (mode,
5235 gen_rtx_PLUS (Pmode,
5236 stack_pointer_rtx,
5237 GEN_INT (offset)));
5239 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
5241 if (! r0)
5242 abort ();
5243 mem_rtx = NULL_RTX;
5245 try_pre_dec:
5247 if (HAVE_PRE_DECREMENT
5248 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
5249 || mem_rtx == NULL_RTX
5250 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
5252 pre_dec = gen_rtx_MEM (mode,
5253 gen_rtx_PRE_DEC (Pmode, r0));
5255 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
5256 pre_dec_ok);
5258 pre_dec = NULL_RTX;
5260 break;
5262 pre_dec_ok:
5263 mem_rtx = NULL_RTX;
5264 offset += GET_MODE_SIZE (mode);
5266 while (0);
5268 if (mem_rtx != NULL_RTX)
5269 goto addr_ok;
5271 if (offset_in_r0 == -1)
5273 emit_move_insn (r0, GEN_INT (offset));
5274 offset_in_r0 = offset;
5276 else if (offset != offset_in_r0)
5278 emit_move_insn (r0,
5279 gen_rtx_PLUS
5280 (Pmode, r0,
5281 GEN_INT (offset - offset_in_r0)));
5282 offset_in_r0 += offset - offset_in_r0;
5285 if (pre_dec != NULL_RTX)
5287 if (! sp_in_r0)
5289 emit_move_insn (r0,
5290 gen_rtx_PLUS
5291 (Pmode, r0, stack_pointer_rtx));
5292 sp_in_r0 = 1;
5295 offset -= GET_MODE_SIZE (mode);
5296 offset_in_r0 -= GET_MODE_SIZE (mode);
5298 mem_rtx = pre_dec;
5300 else if (sp_in_r0)
5301 mem_rtx = gen_rtx_MEM (mode, r0);
5302 else
5303 mem_rtx = gen_rtx_MEM (mode,
5304 gen_rtx_PLUS (Pmode,
5305 stack_pointer_rtx,
5306 r0));
5308 /* We must not use an r0-based address for target-branch
5309 registers or for special registers without pre-dec
5310 memory addresses, since we store their values in r0
5311 first. */
5312 if (TARGET_REGISTER_P (reg)
5313 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5314 && mem_rtx != pre_dec))
5315 abort ();
5317 addr_ok:
5318 if (TARGET_REGISTER_P (reg)
5319 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5320 && mem_rtx != pre_dec))
5322 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
5324 emit_move_insn (tmp_reg, reg_rtx);
5326 if (REGNO (tmp_reg) == R0_REG)
5328 offset_in_r0 = -1;
5329 sp_in_r0 = 0;
5330 if (refers_to_regno_p (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0))
5331 abort ();
5334 if (*++tmp_pnt <= 0)
5335 tmp_pnt = schedule.temps;
5337 reg_rtx = tmp_reg;
5340 rtx insn;
5342 /* Mark as interesting for dwarf cfi generator */
5343 insn = emit_move_insn (mem_rtx, reg_rtx);
5344 RTX_FRAME_RELATED_P (insn) = 1;
5346 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
5348 rtx reg_rtx = gen_rtx_REG (mode, reg);
5349 rtx set, note_rtx;
5350 rtx mem_rtx = gen_rtx_MEM (mode,
5351 gen_rtx_PLUS (Pmode,
5352 stack_pointer_rtx,
5353 GEN_INT (offset)));
5355 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
5356 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
5357 REG_NOTES (insn));
5358 REG_NOTES (insn) = note_rtx;
5363 if (entry->offset != d_rounding)
5364 abort ();
5366 else
5367 push_regs (&live_regs_mask, current_function_interrupt);
5369 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
5371 rtx insn = get_last_insn ();
5372 rtx last = emit_insn (gen_GOTaddr2picreg ());
5374 /* Mark these insns as possibly dead. Sometimes, flow2 may
5375 delete all uses of the PIC register. In this case, let it
5376 delete the initialization too. */
5379 insn = NEXT_INSN (insn);
5381 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5382 const0_rtx,
5383 REG_NOTES (insn));
5385 while (insn != last);
5388 if (SHMEDIA_REGS_STACK_ADJUST ())
5390 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5391 function_symbol (TARGET_FPU_ANY
5392 ? "__GCC_push_shmedia_regs"
5393 : "__GCC_push_shmedia_regs_nofpu"));
5394 /* This must NOT go through the PLT, otherwise mach and macl
5395 may be clobbered. */
5396 emit_insn (gen_shmedia_save_restore_regs_compact
5397 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
5400 if (target_flags != save_flags && ! current_function_interrupt)
5402 rtx insn = emit_insn (gen_toggle_sz ());
5404 /* If we're lucky, a mode switch in the function body will
5405 overwrite fpscr, turning this insn dead. Tell flow this
5406 insn is ok to delete. */
5407 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5408 const0_rtx,
5409 REG_NOTES (insn));
5412 target_flags = save_flags;
5414 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
5415 stack_pointer_rtx, 0, NULL);
5417 if (frame_pointer_needed)
5418 frame_insn (GEN_MOV (frame_pointer_rtx, stack_pointer_rtx));
5420 if (TARGET_SHCOMPACT
5421 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5423 /* This must NOT go through the PLT, otherwise mach and macl
5424 may be clobbered. */
5425 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5426 function_symbol ("__GCC_shcompact_incoming_args"));
5427 emit_insn (gen_shcompact_incoming_args ());
5431 void
5432 sh_expand_epilogue (void)
5434 HARD_REG_SET live_regs_mask;
5435 int d, i;
5436 int d_rounding = 0;
5438 int save_flags = target_flags;
5439 int frame_size, save_size;
5440 int fpscr_deferred = 0;
5442 d = calc_live_regs (&live_regs_mask);
5444 save_size = d;
5445 frame_size = rounded_frame_size (d);
5447 if (TARGET_SH5)
5449 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5450 int total_size;
5451 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
5452 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5453 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
5455 total_size = d + tregs_space;
5456 total_size += rounded_frame_size (total_size);
5457 save_size = total_size - frame_size;
5459 /* If adjusting the stack in a single step costs nothing extra, do so.
5460 I.e. either if a single addi is enough, or we need a movi anyway,
5461 and we don't exceed the maximum offset range (the test for the
5462 latter is conservative for simplicity). */
5463 if (TARGET_SHMEDIA
5464 && ! frame_pointer_needed
5465 && (CONST_OK_FOR_I10 (total_size)
5466 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
5467 && total_size <= 2044)))
5468 d_rounding = frame_size;
5470 frame_size -= d_rounding;
5473 if (frame_pointer_needed)
5475 output_stack_adjust (frame_size, frame_pointer_rtx, 1, &live_regs_mask);
5477 /* We must avoid moving the stack pointer adjustment past code
5478 which reads from the local frame, else an interrupt could
5479 occur after the SP adjustment and clobber data in the local
5480 frame. */
5481 emit_insn (gen_blockage ());
5482 emit_insn (GEN_MOV (stack_pointer_rtx, frame_pointer_rtx));
5484 else if (frame_size)
5486 /* We must avoid moving the stack pointer adjustment past code
5487 which reads from the local frame, else an interrupt could
5488 occur after the SP adjustment and clobber data in the local
5489 frame. */
5490 emit_insn (gen_blockage ());
5491 output_stack_adjust (frame_size, stack_pointer_rtx, 1, &live_regs_mask);
5494 if (SHMEDIA_REGS_STACK_ADJUST ())
5496 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5497 function_symbol (TARGET_FPU_ANY
5498 ? "__GCC_pop_shmedia_regs"
5499 : "__GCC_pop_shmedia_regs_nofpu"));
5500 /* This must NOT go through the PLT, otherwise mach and macl
5501 may be clobbered. */
5502 emit_insn (gen_shmedia_save_restore_regs_compact
5503 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
5506 /* Pop all the registers. */
5508 if (target_flags != save_flags && ! current_function_interrupt)
5509 emit_insn (gen_toggle_sz ());
5510 if (TARGET_SH5)
5512 int offset_base, offset;
5513 int offset_in_r0 = -1;
5514 int sp_in_r0 = 0;
5515 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
5516 save_schedule schedule;
5517 save_entry *entry;
5518 int *tmp_pnt;
5520 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
5521 offset_base = -entry[1].offset + d_rounding;
5522 tmp_pnt = schedule.temps;
5523 for (; entry->mode != VOIDmode; entry--)
5525 enum machine_mode mode = entry->mode;
5526 int reg = entry->reg;
5527 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
5529 offset = offset_base + entry->offset;
5530 reg_rtx = gen_rtx_REG (mode, reg);
5532 mem_rtx = gen_rtx_MEM (mode,
5533 gen_rtx_PLUS (Pmode,
5534 stack_pointer_rtx,
5535 GEN_INT (offset)));
5537 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
5539 mem_rtx = NULL_RTX;
5541 try_post_inc:
5543 if (HAVE_POST_INCREMENT
5544 && (offset == offset_in_r0
5545 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
5546 && mem_rtx == NULL_RTX)
5547 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
5549 post_inc = gen_rtx_MEM (mode,
5550 gen_rtx_POST_INC (Pmode, r0));
5552 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
5553 post_inc_ok);
5555 post_inc = NULL_RTX;
5557 break;
5559 post_inc_ok:
5560 mem_rtx = NULL_RTX;
5562 while (0);
5564 if (mem_rtx != NULL_RTX)
5565 goto addr_ok;
5567 if (offset_in_r0 == -1)
5569 emit_move_insn (r0, GEN_INT (offset));
5570 offset_in_r0 = offset;
5572 else if (offset != offset_in_r0)
5574 emit_move_insn (r0,
5575 gen_rtx_PLUS
5576 (Pmode, r0,
5577 GEN_INT (offset - offset_in_r0)));
5578 offset_in_r0 += offset - offset_in_r0;
5581 if (post_inc != NULL_RTX)
5583 if (! sp_in_r0)
5585 emit_move_insn (r0,
5586 gen_rtx_PLUS
5587 (Pmode, r0, stack_pointer_rtx));
5588 sp_in_r0 = 1;
5591 mem_rtx = post_inc;
5593 offset_in_r0 += GET_MODE_SIZE (mode);
5595 else if (sp_in_r0)
5596 mem_rtx = gen_rtx_MEM (mode, r0);
5597 else
5598 mem_rtx = gen_rtx_MEM (mode,
5599 gen_rtx_PLUS (Pmode,
5600 stack_pointer_rtx,
5601 r0));
5603 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5604 && mem_rtx != post_inc)
5605 abort ();
5607 addr_ok:
5608 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5609 && mem_rtx != post_inc)
5611 insn = emit_move_insn (r0, mem_rtx);
5612 mem_rtx = r0;
5614 else if (TARGET_REGISTER_P (reg))
5616 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
5618 /* Give the scheduler a bit of freedom by using up to
5619 MAX_TEMPS registers in a round-robin fashion. */
5620 insn = emit_move_insn (tmp_reg, mem_rtx);
5621 mem_rtx = tmp_reg;
5622 if (*++tmp_pnt < 0)
5623 tmp_pnt = schedule.temps;
5626 insn = emit_move_insn (reg_rtx, mem_rtx);
5627 if (reg == PR_MEDIA_REG && sh_media_register_for_return () >= 0)
5628 /* This is dead, unless we return with a sibcall. */
5629 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5630 const0_rtx,
5631 REG_NOTES (insn));
5634 if (entry->offset + offset_base != d + d_rounding)
5635 abort ();
5637 else /* ! TARGET_SH5 */
5639 save_size = 0;
5640 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
5641 pop (PR_REG);
5642 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5644 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
5646 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
5647 && hard_regs_intersect_p (&live_regs_mask,
5648 &reg_class_contents[DF_REGS]))
5649 fpscr_deferred = 1;
5650 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j))
5651 pop (j);
5652 if (j == FIRST_FP_REG && fpscr_deferred)
5653 pop (FPSCR_REG);
5657 if (target_flags != save_flags && ! current_function_interrupt)
5658 emit_insn (gen_toggle_sz ());
5659 target_flags = save_flags;
5661 output_stack_adjust (extra_push + current_function_pretend_args_size
5662 + save_size + d_rounding
5663 + current_function_args_info.stack_regs * 8,
5664 stack_pointer_rtx, 1, NULL);
5666 if (current_function_calls_eh_return)
5667 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
5668 EH_RETURN_STACKADJ_RTX));
5670 /* Switch back to the normal stack if necessary. */
5671 if (sp_switch)
5672 emit_insn (gen_sp_switch_2 ());
5674 /* Tell flow the insn that pops PR isn't dead. */
5675 /* PR_REG will never be live in SHmedia mode, and we don't need to
5676 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
5677 by the return pattern. */
5678 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
5679 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
5682 static int sh_need_epilogue_known = 0;
5685 sh_need_epilogue (void)
5687 if (! sh_need_epilogue_known)
5689 rtx epilogue;
5691 start_sequence ();
5692 sh_expand_epilogue ();
5693 epilogue = get_insns ();
5694 end_sequence ();
5695 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
5697 return sh_need_epilogue_known > 0;
5700 /* Emit code to change the current function's return address to RA.
5701 TEMP is available as a scratch register, if needed. */
5703 void
5704 sh_set_return_address (rtx ra, rtx tmp)
5706 HARD_REG_SET live_regs_mask;
5707 int d;
5708 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
5709 int pr_offset;
5711 d = calc_live_regs (&live_regs_mask);
5713 /* If pr_reg isn't life, we can set it (or the register given in
5714 sh_media_register_for_return) directly. */
5715 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
5717 rtx rr;
5719 if (TARGET_SHMEDIA)
5721 int rr_regno = sh_media_register_for_return ();
5723 if (rr_regno < 0)
5724 rr_regno = pr_reg;
5726 rr = gen_rtx_REG (DImode, rr_regno);
5728 else
5729 rr = gen_rtx_REG (SImode, pr_reg);
5731 emit_insn (GEN_MOV (rr, ra));
5732 /* Tell flow the register for return isn't dead. */
5733 emit_insn (gen_rtx_USE (VOIDmode, rr));
5734 return;
5737 if (TARGET_SH5)
5739 int offset;
5740 save_schedule schedule;
5741 save_entry *entry;
5743 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
5744 offset = entry[1].offset;
5745 for (; entry->mode != VOIDmode; entry--)
5746 if (entry->reg == pr_reg)
5747 goto found;
5749 /* We can't find pr register. */
5750 abort ();
5752 found:
5753 offset = entry->offset - offset;
5754 pr_offset = (rounded_frame_size (d) + offset
5755 + SHMEDIA_REGS_STACK_ADJUST ());
5757 else
5758 pr_offset = rounded_frame_size (d);
5760 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
5761 emit_insn (GEN_ADD3 (tmp, tmp, frame_pointer_rtx));
5763 tmp = gen_rtx_MEM (Pmode, tmp);
5764 emit_insn (GEN_MOV (tmp, ra));
5767 /* Clear variables at function end. */
5769 static void
5770 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5771 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5773 trap_exit = pragma_interrupt = pragma_trapa = pragma_nosave_low_regs = 0;
5774 sh_need_epilogue_known = 0;
5775 sp_switch = NULL_RTX;
5778 static rtx
5779 sh_builtin_saveregs (void)
5781 /* First unnamed integer register. */
5782 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
5783 /* Number of integer registers we need to save. */
5784 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
5785 /* First unnamed SFmode float reg */
5786 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
5787 /* Number of SFmode float regs to save. */
5788 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
5789 rtx regbuf, fpregs;
5790 int bufsize, regno;
5791 HOST_WIDE_INT alias_set;
5793 if (TARGET_SH5)
5795 if (n_intregs)
5797 int pushregs = n_intregs;
5799 while (pushregs < NPARM_REGS (SImode) - 1
5800 && (CALL_COOKIE_INT_REG_GET
5801 (current_function_args_info.call_cookie,
5802 NPARM_REGS (SImode) - pushregs)
5803 == 1))
5805 current_function_args_info.call_cookie
5806 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
5807 - pushregs, 1);
5808 pushregs++;
5811 if (pushregs == NPARM_REGS (SImode))
5812 current_function_args_info.call_cookie
5813 |= (CALL_COOKIE_INT_REG (0, 1)
5814 | CALL_COOKIE_STACKSEQ (pushregs - 1));
5815 else
5816 current_function_args_info.call_cookie
5817 |= CALL_COOKIE_STACKSEQ (pushregs);
5819 current_function_pretend_args_size += 8 * n_intregs;
5821 if (TARGET_SHCOMPACT)
5822 return const0_rtx;
5825 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
5827 error ("__builtin_saveregs not supported by this subtarget");
5828 return const0_rtx;
5831 if (TARGET_SHMEDIA)
5832 n_floatregs = 0;
5834 /* Allocate block of memory for the regs. */
5835 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
5836 Or can assign_stack_local accept a 0 SIZE argument? */
5837 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
5839 if (TARGET_SHMEDIA)
5840 regbuf = gen_rtx_MEM (BLKmode,
5841 gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
5842 else if (n_floatregs & 1)
5844 rtx addr;
5846 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
5847 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
5848 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
5849 regbuf = change_address (regbuf, BLKmode, addr);
5851 else
5852 regbuf = assign_stack_local (BLKmode, bufsize, 0);
5853 alias_set = get_varargs_alias_set ();
5854 set_mem_alias_set (regbuf, alias_set);
5856 /* Save int args.
5857 This is optimized to only save the regs that are necessary. Explicitly
5858 named args need not be saved. */
5859 if (n_intregs > 0)
5860 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
5861 adjust_address (regbuf, BLKmode,
5862 n_floatregs * UNITS_PER_WORD),
5863 n_intregs);
5865 if (TARGET_SHMEDIA)
5866 /* Return the address of the regbuf. */
5867 return XEXP (regbuf, 0);
5869 /* Save float args.
5870 This is optimized to only save the regs that are necessary. Explicitly
5871 named args need not be saved.
5872 We explicitly build a pointer to the buffer because it halves the insn
5873 count when not optimizing (otherwise the pointer is built for each reg
5874 saved).
5875 We emit the moves in reverse order so that we can use predecrement. */
5877 fpregs = gen_reg_rtx (Pmode);
5878 emit_move_insn (fpregs, XEXP (regbuf, 0));
5879 emit_insn (gen_addsi3 (fpregs, fpregs,
5880 GEN_INT (n_floatregs * UNITS_PER_WORD)));
5881 if (TARGET_SH4)
5883 rtx mem;
5884 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
5886 emit_insn (gen_addsi3 (fpregs, fpregs,
5887 GEN_INT (-2 * UNITS_PER_WORD)));
5888 mem = gen_rtx_MEM (DFmode, fpregs);
5889 set_mem_alias_set (mem, alias_set);
5890 emit_move_insn (mem,
5891 gen_rtx (REG, DFmode, BASE_ARG_REG (DFmode) + regno));
5893 regno = first_floatreg;
5894 if (regno & 1)
5896 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD)));
5897 mem = gen_rtx_MEM (SFmode, fpregs);
5898 set_mem_alias_set (mem, alias_set);
5899 emit_move_insn (mem,
5900 gen_rtx (REG, SFmode, BASE_ARG_REG (SFmode) + regno
5901 - (TARGET_LITTLE_ENDIAN != 0)));
5904 else
5905 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
5907 rtx mem;
5909 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD)));
5910 mem = gen_rtx_MEM (SFmode, fpregs);
5911 set_mem_alias_set (mem, alias_set);
5912 emit_move_insn (mem,
5913 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
5916 /* Return the address of the regbuf. */
5917 return XEXP (regbuf, 0);
5920 /* Define the `__builtin_va_list' type for the ABI. */
5922 static tree
5923 sh_build_builtin_va_list (void)
5925 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
5926 tree record;
5928 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
5929 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
5930 return ptr_type_node;
5932 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
5934 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
5935 ptr_type_node);
5936 f_next_o_limit = build_decl (FIELD_DECL,
5937 get_identifier ("__va_next_o_limit"),
5938 ptr_type_node);
5939 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
5940 ptr_type_node);
5941 f_next_fp_limit = build_decl (FIELD_DECL,
5942 get_identifier ("__va_next_fp_limit"),
5943 ptr_type_node);
5944 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
5945 ptr_type_node);
5947 DECL_FIELD_CONTEXT (f_next_o) = record;
5948 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
5949 DECL_FIELD_CONTEXT (f_next_fp) = record;
5950 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
5951 DECL_FIELD_CONTEXT (f_next_stack) = record;
5953 TYPE_FIELDS (record) = f_next_o;
5954 TREE_CHAIN (f_next_o) = f_next_o_limit;
5955 TREE_CHAIN (f_next_o_limit) = f_next_fp;
5956 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
5957 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
5959 layout_type (record);
5961 return record;
5964 /* Implement `va_start' for varargs and stdarg. */
5966 void
5967 sh_va_start (tree valist, rtx nextarg)
5969 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
5970 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
5971 tree t, u;
5972 int nfp, nint;
5974 if (TARGET_SH5)
5976 expand_builtin_saveregs ();
5977 std_expand_builtin_va_start (valist, nextarg);
5978 return;
5981 if ((! TARGET_SH2E && ! TARGET_SH4)
5982 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
5984 std_expand_builtin_va_start (valist, nextarg);
5985 return;
5988 f_next_o = TYPE_FIELDS (va_list_type_node);
5989 f_next_o_limit = TREE_CHAIN (f_next_o);
5990 f_next_fp = TREE_CHAIN (f_next_o_limit);
5991 f_next_fp_limit = TREE_CHAIN (f_next_fp);
5992 f_next_stack = TREE_CHAIN (f_next_fp_limit);
5994 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o);
5995 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
5996 valist, f_next_o_limit);
5997 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp);
5998 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
5999 valist, f_next_fp_limit);
6000 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6001 valist, f_next_stack);
6003 /* Call __builtin_saveregs. */
6004 u = make_tree (ptr_type_node, expand_builtin_saveregs ());
6005 t = build (MODIFY_EXPR, ptr_type_node, next_fp, u);
6006 TREE_SIDE_EFFECTS (t) = 1;
6007 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6009 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
6010 if (nfp < 8)
6011 nfp = 8 - nfp;
6012 else
6013 nfp = 0;
6014 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6015 build_int_2 (UNITS_PER_WORD * nfp, 0)));
6016 t = build (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
6017 TREE_SIDE_EFFECTS (t) = 1;
6018 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6020 t = build (MODIFY_EXPR, ptr_type_node, next_o, u);
6021 TREE_SIDE_EFFECTS (t) = 1;
6022 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6024 nint = current_function_args_info.arg_count[SH_ARG_INT];
6025 if (nint < 4)
6026 nint = 4 - nint;
6027 else
6028 nint = 0;
6029 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6030 build_int_2 (UNITS_PER_WORD * nint, 0)));
6031 t = build (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
6032 TREE_SIDE_EFFECTS (t) = 1;
6033 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6035 u = make_tree (ptr_type_node, nextarg);
6036 t = build (MODIFY_EXPR, ptr_type_node, next_stack, u);
6037 TREE_SIDE_EFFECTS (t) = 1;
6038 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6041 /* Implement `va_arg'. */
6044 sh_va_arg (tree valist, tree type)
6046 HOST_WIDE_INT size, rsize;
6047 tree tmp, pptr_type_node;
6048 rtx addr_rtx, r;
6049 rtx result_ptr, result = NULL_RTX;
6050 int pass_by_ref = MUST_PASS_IN_STACK (TYPE_MODE (type), type);
6051 rtx lab_over;
6053 size = int_size_in_bytes (type);
6054 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6055 pptr_type_node = build_pointer_type (ptr_type_node);
6057 if (pass_by_ref)
6058 type = build_pointer_type (type);
6060 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
6061 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
6063 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6064 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6065 int pass_as_float;
6066 rtx lab_false;
6068 f_next_o = TYPE_FIELDS (va_list_type_node);
6069 f_next_o_limit = TREE_CHAIN (f_next_o);
6070 f_next_fp = TREE_CHAIN (f_next_o_limit);
6071 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6072 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6074 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o);
6075 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6076 valist, f_next_o_limit);
6077 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp),
6078 valist, f_next_fp);
6079 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6080 valist, f_next_fp_limit);
6081 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6082 valist, f_next_stack);
6084 /* Structures with a single member with a distinct mode are passed
6085 like their member. This is relevant if the latter has a REAL_TYPE
6086 or COMPLEX_TYPE type. */
6087 if (TREE_CODE (type) == RECORD_TYPE
6088 && TYPE_FIELDS (type)
6089 && TREE_CODE (TYPE_FIELDS (type)) == FIELD_DECL
6090 && (TREE_CODE (TREE_TYPE (TYPE_FIELDS (type))) == REAL_TYPE
6091 || TREE_CODE (TREE_TYPE (TYPE_FIELDS (type))) == COMPLEX_TYPE)
6092 && TREE_CHAIN (TYPE_FIELDS (type)) == NULL_TREE)
6093 type = TREE_TYPE (TYPE_FIELDS (type));
6094 if (TARGET_SH4)
6096 pass_as_float = ((TREE_CODE (type) == REAL_TYPE && size <= 8)
6097 || (TREE_CODE (type) == COMPLEX_TYPE
6098 && TREE_CODE (TREE_TYPE (type)) == REAL_TYPE
6099 && size <= 16));
6101 else
6103 pass_as_float = (TREE_CODE (type) == REAL_TYPE && size == 4);
6106 addr_rtx = gen_reg_rtx (Pmode);
6107 lab_false = gen_label_rtx ();
6108 lab_over = gen_label_rtx ();
6110 tmp = make_tree (pptr_type_node, addr_rtx);
6111 valist = build1 (INDIRECT_REF, ptr_type_node, tmp);
6113 if (pass_as_float)
6115 int first_floatreg
6116 = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6117 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6119 emit_cmp_and_jump_insns (expand_expr (next_fp, NULL_RTX, Pmode,
6120 EXPAND_NORMAL),
6121 expand_expr (next_fp_limit, NULL_RTX,
6122 Pmode, EXPAND_NORMAL),
6123 GE, const1_rtx, Pmode, 1, lab_false);
6125 if (TYPE_ALIGN (type) > BITS_PER_WORD
6126 || (((TREE_CODE (type) == REAL_TYPE && size == 8) || size == 16)
6127 && (n_floatregs & 1)))
6129 tmp = build (BIT_AND_EXPR, ptr_type_node, next_fp,
6130 build_int_2 (UNITS_PER_WORD, 0));
6131 tmp = build (PLUS_EXPR, ptr_type_node, next_fp, tmp);
6132 tmp = build (MODIFY_EXPR, ptr_type_node, next_fp, tmp);
6133 TREE_SIDE_EFFECTS (tmp) = 1;
6134 expand_expr (tmp, const0_rtx, VOIDmode, EXPAND_NORMAL);
6137 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
6138 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
6139 if (r != addr_rtx)
6140 emit_move_insn (addr_rtx, r);
6142 #ifdef FUNCTION_ARG_SCmode_WART
6143 if (TYPE_MODE (type) == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
6145 rtx addr, real, imag, result_value, slot;
6146 tree subtype = TREE_TYPE (type);
6148 addr = std_expand_builtin_va_arg (valist, subtype);
6149 #ifdef POINTERS_EXTEND_UNSIGNED
6150 if (GET_MODE (addr) != Pmode)
6151 addr = convert_memory_address (Pmode, addr);
6152 #endif
6153 imag = gen_rtx_MEM (TYPE_MODE (type), addr);
6154 set_mem_alias_set (imag, get_varargs_alias_set ());
6156 addr = std_expand_builtin_va_arg (valist, subtype);
6157 #ifdef POINTERS_EXTEND_UNSIGNED
6158 if (GET_MODE (addr) != Pmode)
6159 addr = convert_memory_address (Pmode, addr);
6160 #endif
6161 real = gen_rtx_MEM (TYPE_MODE (type), addr);
6162 set_mem_alias_set (real, get_varargs_alias_set ());
6164 result_value = gen_rtx_CONCAT (SCmode, real, imag);
6165 /* ??? this interface is stupid - why require a pointer? */
6166 result = gen_reg_rtx (Pmode);
6167 slot = assign_stack_temp (SCmode, 8, 0);
6168 emit_move_insn (slot, result_value);
6169 emit_move_insn (result, XEXP (slot, 0));
6171 #endif /* FUNCTION_ARG_SCmode_WART */
6173 emit_jump_insn (gen_jump (lab_over));
6174 emit_barrier ();
6175 emit_label (lab_false);
6177 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6178 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
6179 if (r != addr_rtx)
6180 emit_move_insn (addr_rtx, r);
6182 else
6184 tmp = build (PLUS_EXPR, ptr_type_node, next_o,
6185 build_int_2 (rsize, 0));
6187 emit_cmp_and_jump_insns (expand_expr (tmp, NULL_RTX, Pmode,
6188 EXPAND_NORMAL),
6189 expand_expr (next_o_limit, NULL_RTX,
6190 Pmode, EXPAND_NORMAL),
6191 GT, const1_rtx, Pmode, 1, lab_false);
6193 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
6194 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
6195 if (r != addr_rtx)
6196 emit_move_insn (addr_rtx, r);
6198 emit_jump_insn (gen_jump (lab_over));
6199 emit_barrier ();
6200 emit_label (lab_false);
6202 if (size > 4 && ! TARGET_SH4)
6204 tmp = build (MODIFY_EXPR, ptr_type_node, next_o, next_o_limit);
6205 TREE_SIDE_EFFECTS (tmp) = 1;
6206 expand_expr (tmp, const0_rtx, VOIDmode, EXPAND_NORMAL);
6209 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6210 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
6211 if (r != addr_rtx)
6212 emit_move_insn (addr_rtx, r);
6215 if (! result)
6216 emit_label (lab_over);
6219 /* ??? In va-sh.h, there had been code to make values larger than
6220 size 8 indirect. This does not match the FUNCTION_ARG macros. */
6222 result_ptr = std_expand_builtin_va_arg (valist, type);
6223 if (result)
6225 emit_move_insn (result, result_ptr);
6226 emit_label (lab_over);
6228 else
6229 result = result_ptr;
6231 if (pass_by_ref)
6233 #ifdef POINTERS_EXTEND_UNSIGNED
6234 if (GET_MODE (addr) != Pmode)
6235 addr = convert_memory_address (Pmode, result);
6236 #endif
6237 result = gen_rtx_MEM (ptr_mode, force_reg (Pmode, result));
6238 set_mem_alias_set (result, get_varargs_alias_set ());
6240 /* ??? expand_builtin_va_arg will also set the alias set of the dereferenced
6241 argument to the varargs alias set. */
6242 return result;
6245 static bool
6246 sh_promote_prototypes (tree type)
6248 if (TARGET_HITACHI)
6249 return 0;
6250 if (! type)
6251 return 1;
6252 return ! sh_attr_renesas_p (type);
6255 /* Define where to put the arguments to a function.
6256 Value is zero to push the argument on the stack,
6257 or a hard register in which to store the argument.
6259 MODE is the argument's machine mode.
6260 TYPE is the data type of the argument (as a tree).
6261 This is null for libcalls where that information may
6262 not be available.
6263 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6264 the preceding args and about the function being called.
6265 NAMED is nonzero if this argument is a named parameter
6266 (otherwise it is an extra parameter matching an ellipsis).
6268 On SH the first args are normally in registers
6269 and the rest are pushed. Any arg that starts within the first
6270 NPARM_REGS words is at least partially passed in a register unless
6271 its data type forbids. */
6275 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
6276 tree type, int named)
6278 if (! TARGET_SH5 && mode == VOIDmode)
6279 return GEN_INT (ca->renesas_abi ? 1 : 0);
6281 if (! TARGET_SH5
6282 && PASS_IN_REG_P (*ca, mode, type)
6283 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
6285 int regno;
6287 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
6288 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
6290 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
6291 gen_rtx_REG (SFmode,
6292 BASE_ARG_REG (mode)
6293 + (ROUND_REG (*ca, mode) ^ 1)),
6294 const0_rtx);
6295 rtx r2 = gen_rtx_EXPR_LIST(VOIDmode,
6296 gen_rtx_REG (SFmode,
6297 BASE_ARG_REG (mode)
6298 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
6299 GEN_INT (4));
6300 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
6303 /* If the alignment of a DF value causes an SF register to be
6304 skipped, we will use that skipped register for the next SF
6305 value. */
6306 if ((TARGET_HITACHI || ca->renesas_abi)
6307 && ca->free_single_fp_reg
6308 && mode == SFmode)
6309 return gen_rtx_REG (mode, ca->free_single_fp_reg);
6311 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
6312 ^ (mode == SFmode && TARGET_SH4
6313 && TARGET_LITTLE_ENDIAN != 0
6314 && ! TARGET_HITACHI && ! ca->renesas_abi);
6315 return gen_rtx_REG (mode, regno);
6319 if (TARGET_SH5)
6321 if (mode == VOIDmode && TARGET_SHCOMPACT)
6322 return GEN_INT (ca->call_cookie);
6324 /* The following test assumes unnamed arguments are promoted to
6325 DFmode. */
6326 if (mode == SFmode && ca->free_single_fp_reg)
6327 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
6329 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
6330 && (named || ! ca->prototype_p)
6331 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
6333 if (! ca->prototype_p && TARGET_SHMEDIA)
6334 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
6336 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
6337 FIRST_FP_PARM_REG
6338 + ca->arg_count[(int) SH_ARG_FLOAT]);
6341 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
6342 && (! TARGET_SHCOMPACT
6343 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
6344 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
6345 type, named))))
6347 return gen_rtx_REG (mode, (FIRST_PARM_REG
6348 + ca->arg_count[(int) SH_ARG_INT]));
6351 return 0;
6354 return 0;
6357 /* Update the data in CUM to advance over an argument
6358 of mode MODE and data type TYPE.
6359 (TYPE is null for libcalls where that information may not be
6360 available.) */
6362 void
6363 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
6364 tree type, int named)
6366 if (ca->force_mem)
6367 ca->force_mem = 0;
6368 else if (TARGET_SH5)
6370 tree type2 = (ca->byref && type
6371 ? TREE_TYPE (type)
6372 : type);
6373 enum machine_mode mode2 = (ca->byref && type
6374 ? TYPE_MODE (type2)
6375 : mode);
6376 int dwords = ((ca->byref
6377 ? ca->byref
6378 : mode2 == BLKmode
6379 ? int_size_in_bytes (type2)
6380 : GET_MODE_SIZE (mode2)) + 7) / 8;
6381 int numregs = MIN (dwords, NPARM_REGS (SImode)
6382 - ca->arg_count[(int) SH_ARG_INT]);
6384 if (numregs)
6386 ca->arg_count[(int) SH_ARG_INT] += numregs;
6387 if (TARGET_SHCOMPACT
6388 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
6390 ca->call_cookie
6391 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6392 - numregs, 1);
6393 /* N.B. We want this also for outgoing. */
6394 ca->stack_regs += numregs;
6396 else if (ca->byref)
6398 if (! ca->outgoing)
6399 ca->stack_regs += numregs;
6400 ca->byref_regs += numregs;
6401 ca->byref = 0;
6403 ca->call_cookie
6404 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6405 - numregs, 2);
6406 while (--numregs);
6407 ca->call_cookie
6408 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6409 - 1, 1);
6411 else if (dwords > numregs)
6413 int pushregs = numregs;
6415 if (TARGET_SHCOMPACT)
6416 ca->stack_regs += numregs;
6417 while (pushregs < NPARM_REGS (SImode) - 1
6418 && (CALL_COOKIE_INT_REG_GET
6419 (ca->call_cookie,
6420 NPARM_REGS (SImode) - pushregs)
6421 == 1))
6423 ca->call_cookie
6424 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6425 - pushregs, 1);
6426 pushregs++;
6428 if (numregs == NPARM_REGS (SImode))
6429 ca->call_cookie
6430 |= CALL_COOKIE_INT_REG (0, 1)
6431 | CALL_COOKIE_STACKSEQ (numregs - 1);
6432 else
6433 ca->call_cookie
6434 |= CALL_COOKIE_STACKSEQ (numregs);
6437 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
6438 && (named || ! ca->prototype_p))
6440 if (mode2 == SFmode && ca->free_single_fp_reg)
6441 ca->free_single_fp_reg = 0;
6442 else if (ca->arg_count[(int) SH_ARG_FLOAT]
6443 < NPARM_REGS (SFmode))
6445 int numfpregs
6446 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
6447 NPARM_REGS (SFmode)
6448 - ca->arg_count[(int) SH_ARG_FLOAT]);
6450 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
6452 if (TARGET_SHCOMPACT && ! ca->prototype_p)
6454 if (ca->outgoing && numregs > 0)
6457 ca->call_cookie
6458 |= (CALL_COOKIE_INT_REG
6459 (ca->arg_count[(int) SH_ARG_INT]
6460 - numregs + ((numfpregs - 2) / 2),
6461 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
6462 - numfpregs) / 2));
6464 while (numfpregs -= 2);
6466 else if (mode2 == SFmode && (named)
6467 && (ca->arg_count[(int) SH_ARG_FLOAT]
6468 < NPARM_REGS (SFmode)))
6469 ca->free_single_fp_reg
6470 = FIRST_FP_PARM_REG - numfpregs
6471 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
6474 return;
6477 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
6479 /* Note that we've used the skipped register. */
6480 if (mode == SFmode && ca->free_single_fp_reg)
6482 ca->free_single_fp_reg = 0;
6483 return;
6485 /* When we have a DF after an SF, there's an SF register that get
6486 skipped in order to align the DF value. We note this skipped
6487 register, because the next SF value will use it, and not the
6488 SF that follows the DF. */
6489 if (mode == DFmode
6490 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
6492 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
6493 + BASE_ARG_REG (mode));
6497 if (! (TARGET_SH4 || ca->renesas_abi)
6498 || PASS_IN_REG_P (*ca, mode, type))
6499 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
6500 = (ROUND_REG (*ca, mode)
6501 + (mode == BLKmode
6502 ? ROUND_ADVANCE (int_size_in_bytes (type))
6503 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
6506 /* If the structure value address is not passed in a register, define
6507 `STRUCT_VALUE' as an expression returning an RTX for the place
6508 where the address is passed. If it returns 0, the address is
6509 passed as an "invisible" first argument. */
6510 /* The Renesas calling convention doesn't quite fit into this scheme since
6511 the address is passed like an invisible argument, but one that is always
6512 passed in memory. */
6513 static rtx
6514 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
6516 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
6517 return 0;
6518 return gen_rtx_REG (Pmode, 2);
6521 static bool
6522 sh_return_in_memory (tree type, tree fndecl)
6524 if (TARGET_SH5)
6526 if (TYPE_MODE (type) == BLKmode)
6527 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
6528 else
6529 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
6531 else
6533 return (TYPE_MODE (type) == BLKmode
6534 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
6535 && TREE_CODE (type) == RECORD_TYPE));
6539 /* We actually emit the code in sh_expand_prologue. We used to use
6540 a static variable to flag that we need to emit this code, but that
6541 doesn't when inlining, when functions are deferred and then emitted
6542 later. Fortunately, we already have two flags that are part of struct
6543 function that tell if a function uses varargs or stdarg. */
6544 static void
6545 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED,
6546 enum machine_mode mode ATTRIBUTE_UNUSED,
6547 tree type ATTRIBUTE_UNUSED,
6548 int *pretend_arg_size ATTRIBUTE_UNUSED,
6549 int second_time ATTRIBUTE_UNUSED)
6551 if (! current_function_stdarg)
6552 abort ();
6555 static bool
6556 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
6558 return TARGET_SH5;
6561 static bool
6562 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
6564 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
6568 /* Define the offset between two registers, one to be eliminated, and
6569 the other its replacement, at the start of a routine. */
6572 initial_elimination_offset (int from, int to)
6574 int regs_saved;
6575 int regs_saved_rounding = 0;
6576 int total_saved_regs_space;
6577 int total_auto_space;
6578 int save_flags = target_flags;
6579 int copy_flags;
6580 HARD_REG_SET live_regs_mask;
6582 shmedia_space_reserved_for_target_registers = false;
6583 regs_saved = calc_live_regs (&live_regs_mask);
6584 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
6586 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
6588 shmedia_space_reserved_for_target_registers = true;
6589 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
6592 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
6593 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6594 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
6596 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
6597 copy_flags = target_flags;
6598 target_flags = save_flags;
6600 total_saved_regs_space = regs_saved + regs_saved_rounding;
6602 if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
6603 return total_saved_regs_space + total_auto_space
6604 + current_function_args_info.byref_regs * 8;
6606 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
6607 return total_saved_regs_space + total_auto_space
6608 + current_function_args_info.byref_regs * 8;
6610 /* Initial gap between fp and sp is 0. */
6611 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
6612 return 0;
6614 if (from == RETURN_ADDRESS_POINTER_REGNUM
6615 && (to == FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM))
6617 if (TARGET_SH5)
6619 int n = total_saved_regs_space;
6620 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
6621 save_schedule schedule;
6622 save_entry *entry;
6624 n += total_auto_space;
6626 /* If it wasn't saved, there's not much we can do. */
6627 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6628 return n;
6630 target_flags = copy_flags;
6632 sh5_schedule_saves (&live_regs_mask, &schedule, n);
6633 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
6634 if (entry->reg == pr_reg)
6636 target_flags = save_flags;
6637 return entry->offset;
6639 abort ();
6641 else
6642 return total_auto_space;
6645 abort ();
6648 /* Handle machine specific pragmas to be semi-compatible with Renesas
6649 compiler. */
6651 void
6652 sh_pr_interrupt (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
6654 pragma_interrupt = 1;
6657 void
6658 sh_pr_trapa (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
6660 pragma_interrupt = pragma_trapa = 1;
6663 void
6664 sh_pr_nosave_low_regs (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
6666 pragma_nosave_low_regs = 1;
6669 /* Generate 'handle_interrupt' attribute for decls */
6671 static void
6672 sh_insert_attributes (tree node, tree *attributes)
6674 if (! pragma_interrupt
6675 || TREE_CODE (node) != FUNCTION_DECL)
6676 return;
6678 /* We are only interested in fields. */
6679 if (TREE_CODE_CLASS (TREE_CODE (node)) != 'd')
6680 return;
6682 /* Add a 'handle_interrupt' attribute. */
6683 * attributes = tree_cons (get_identifier ("interrupt_handler"), NULL, * attributes);
6685 return;
6688 /* Supported attributes:
6690 interrupt_handler -- specifies this function is an interrupt handler.
6692 sp_switch -- specifies an alternate stack for an interrupt handler
6693 to run on.
6695 trap_exit -- use a trapa to exit an interrupt function instead of
6696 an rte instruction.
6698 renesas -- use Renesas calling/layout conventions (functions and
6699 structures).
6703 const struct attribute_spec sh_attribute_table[] =
6705 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
6706 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
6707 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
6708 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
6709 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
6710 { NULL, 0, 0, false, false, false, NULL }
6713 /* Handle an "interrupt_handler" attribute; arguments as in
6714 struct attribute_spec.handler. */
6715 static tree
6716 sh_handle_interrupt_handler_attribute (tree *node, tree name,
6717 tree args ATTRIBUTE_UNUSED,
6718 int flags ATTRIBUTE_UNUSED,
6719 bool *no_add_attrs)
6721 if (TREE_CODE (*node) != FUNCTION_DECL)
6723 warning ("`%s' attribute only applies to functions",
6724 IDENTIFIER_POINTER (name));
6725 *no_add_attrs = true;
6727 else if (TARGET_SHCOMPACT)
6729 error ("attribute interrupt_handler is not compatible with -m5-compact");
6730 *no_add_attrs = true;
6733 return NULL_TREE;
6736 /* Handle an "sp_switch" attribute; arguments as in
6737 struct attribute_spec.handler. */
6738 static tree
6739 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
6740 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6742 if (TREE_CODE (*node) != FUNCTION_DECL)
6744 warning ("`%s' attribute only applies to functions",
6745 IDENTIFIER_POINTER (name));
6746 *no_add_attrs = true;
6748 else if (!pragma_interrupt)
6750 /* The sp_switch attribute only has meaning for interrupt functions. */
6751 warning ("`%s' attribute only applies to interrupt functions",
6752 IDENTIFIER_POINTER (name));
6753 *no_add_attrs = true;
6755 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
6757 /* The argument must be a constant string. */
6758 warning ("`%s' attribute argument not a string constant",
6759 IDENTIFIER_POINTER (name));
6760 *no_add_attrs = true;
6762 else
6764 sp_switch = gen_rtx_SYMBOL_REF (VOIDmode,
6765 TREE_STRING_POINTER (TREE_VALUE (args)));
6768 return NULL_TREE;
6771 /* Handle an "trap_exit" attribute; arguments as in
6772 struct attribute_spec.handler. */
6773 static tree
6774 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
6775 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6777 if (TREE_CODE (*node) != FUNCTION_DECL)
6779 warning ("`%s' attribute only applies to functions",
6780 IDENTIFIER_POINTER (name));
6781 *no_add_attrs = true;
6783 else if (!pragma_interrupt)
6785 /* The trap_exit attribute only has meaning for interrupt functions. */
6786 warning ("`%s' attribute only applies to interrupt functions",
6787 IDENTIFIER_POINTER (name));
6788 *no_add_attrs = true;
6790 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
6792 /* The argument must be a constant integer. */
6793 warning ("`%s' attribute argument not an integer constant",
6794 IDENTIFIER_POINTER (name));
6795 *no_add_attrs = true;
6797 else
6799 trap_exit = TREE_INT_CST_LOW (TREE_VALUE (args));
6802 return NULL_TREE;
6805 static tree
6806 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
6807 tree name ATTRIBUTE_UNUSED,
6808 tree args ATTRIBUTE_UNUSED,
6809 int flags ATTRIBUTE_UNUSED,
6810 bool *no_add_attrs ATTRIBUTE_UNUSED)
6812 return NULL_TREE;
6815 /* True if __attribute__((renesas)) or -mrenesas. */
6817 sh_attr_renesas_p (tree td)
6819 if (TARGET_HITACHI)
6820 return 1;
6821 if (td == 0)
6822 return 0;
6823 if (DECL_P (td))
6824 td = TREE_TYPE (td);
6825 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
6826 != NULL_TREE);
6829 /* True if __attribute__((renesas)) or -mrenesas, for the current
6830 function. */
6832 sh_cfun_attr_renesas_p (void)
6834 return sh_attr_renesas_p (current_function_decl);
6838 sh_cfun_interrupt_handler_p (void)
6840 return (lookup_attribute ("interrupt_handler",
6841 DECL_ATTRIBUTES (current_function_decl))
6842 != NULL_TREE);
6845 /* ??? target_switches in toplev.c is static, hence we have to duplicate it. */
6846 static const struct
6848 const char *const name;
6849 const int value;
6850 const char *const description;
6852 sh_target_switches[] = TARGET_SWITCHES;
6853 #define target_switches sh_target_switches
6855 /* Like default_pch_valid_p, but take flag_mask into account. */
6856 const char *
6857 sh_pch_valid_p (const void *data_p, size_t len)
6859 const char *data = (const char *)data_p;
6860 const char *flag_that_differs = NULL;
6861 size_t i;
6862 int old_flags;
6863 int flag_mask
6864 = (SH1_BIT | SH2_BIT | SH3_BIT | SH_E_BIT | HARD_SH4_BIT | FPU_SINGLE_BIT
6865 | SH4_BIT | HITACHI_BIT | LITTLE_ENDIAN_BIT);
6867 /* -fpic and -fpie also usually make a PCH invalid. */
6868 if (data[0] != flag_pic)
6869 return _("created and used with different settings of -fpic");
6870 if (data[1] != flag_pie)
6871 return _("created and used with different settings of -fpie");
6872 data += 2;
6874 /* Check target_flags. */
6875 memcpy (&old_flags, data, sizeof (target_flags));
6876 if (((old_flags ^ target_flags) & flag_mask) != 0)
6878 for (i = 0; i < ARRAY_SIZE (target_switches); i++)
6880 int bits;
6882 bits = target_switches[i].value;
6883 if (bits < 0)
6884 bits = -bits;
6885 bits &= flag_mask;
6886 if ((target_flags & bits) != (old_flags & bits))
6888 flag_that_differs = target_switches[i].name;
6889 goto make_message;
6892 abort ();
6894 data += sizeof (target_flags);
6895 len -= sizeof (target_flags);
6897 /* Check string options. */
6898 #ifdef TARGET_OPTIONS
6899 for (i = 0; i < ARRAY_SIZE (target_options); i++)
6901 const char *str = *target_options[i].variable;
6902 size_t l;
6903 if (! str)
6904 str = "";
6905 l = strlen (str) + 1;
6906 if (len < l || memcmp (data, str, l) != 0)
6908 flag_that_differs = target_options[i].prefix;
6909 goto make_message;
6911 data += l;
6912 len -= l;
6914 #endif
6916 return NULL;
6918 make_message:
6920 char *r;
6921 asprintf (&r, _("created and used with differing settings of `-m%s'"),
6922 flag_that_differs);
6923 if (r == NULL)
6924 return _("out of memory");
6925 return r;
6929 /* Predicates used by the templates. */
6931 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
6932 Used only in general_movsrc_operand. */
6935 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
6937 switch (REGNO (op))
6939 case PR_REG:
6940 case MACL_REG:
6941 case MACH_REG:
6942 return 1;
6944 return 0;
6947 /* Returns 1 if OP can be source of a simple move operation.
6948 Same as general_operand, but a LABEL_REF is valid, PRE_DEC is
6949 invalid as are subregs of system registers. */
6952 general_movsrc_operand (rtx op, enum machine_mode mode)
6954 if (GET_CODE (op) == MEM)
6956 rtx inside = XEXP (op, 0);
6957 if (GET_CODE (inside) == CONST)
6958 inside = XEXP (inside, 0);
6960 if (GET_CODE (inside) == LABEL_REF)
6961 return 1;
6963 if (GET_CODE (inside) == PLUS
6964 && GET_CODE (XEXP (inside, 0)) == LABEL_REF
6965 && GET_CODE (XEXP (inside, 1)) == CONST_INT)
6966 return 1;
6968 /* Only post inc allowed. */
6969 if (GET_CODE (inside) == PRE_DEC)
6970 return 0;
6973 if ((mode == QImode || mode == HImode)
6974 && (GET_CODE (op) == SUBREG
6975 && GET_CODE (XEXP (op, 0)) == REG
6976 && system_reg_operand (XEXP (op, 0), mode)))
6977 return 0;
6979 return general_operand (op, mode);
6982 /* Returns 1 if OP can be a destination of a move.
6983 Same as general_operand, but no preinc allowed. */
6986 general_movdst_operand (rtx op, enum machine_mode mode)
6988 /* Only pre dec allowed. */
6989 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == POST_INC)
6990 return 0;
6992 return general_operand (op, mode);
6995 /* Returns 1 if OP is a normal arithmetic register. */
6998 arith_reg_operand (rtx op, enum machine_mode mode)
7000 if (register_operand (op, mode))
7002 int regno;
7004 if (GET_CODE (op) == REG)
7005 regno = REGNO (op);
7006 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
7007 regno = REGNO (SUBREG_REG (op));
7008 else
7009 return 1;
7011 return (regno != T_REG && regno != PR_REG
7012 && ! TARGET_REGISTER_P (regno)
7013 && (regno != FPUL_REG || TARGET_SH4)
7014 && regno != MACH_REG && regno != MACL_REG);
7016 return 0;
7019 /* Like above, but for DImode destinations: forbid paradoxical DImode subregs,
7020 because this would lead to missing sign extensions when truncating from
7021 DImode to SImode. */
7023 arith_reg_dest (rtx op, enum machine_mode mode)
7025 if (mode == DImode && GET_CODE (op) == SUBREG
7026 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) < 8)
7027 return 0;
7028 return arith_reg_operand (op, mode);
7032 int_gpr_dest (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7034 enum machine_mode op_mode = GET_MODE (op);
7036 if (GET_MODE_CLASS (op_mode) != MODE_INT
7037 || GET_MODE_SIZE (op_mode) >= UNITS_PER_WORD)
7038 return 0;
7039 if (! reload_completed)
7040 return 0;
7041 return true_regnum (op) <= LAST_GENERAL_REG;
7045 fp_arith_reg_operand (rtx op, enum machine_mode mode)
7047 if (register_operand (op, mode))
7049 int regno;
7051 if (GET_CODE (op) == REG)
7052 regno = REGNO (op);
7053 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
7054 regno = REGNO (SUBREG_REG (op));
7055 else
7056 return 1;
7058 return (regno >= FIRST_PSEUDO_REGISTER
7059 || FP_REGISTER_P (regno));
7061 return 0;
7064 /* Returns 1 if OP is a valid source operand for an arithmetic insn. */
7067 arith_operand (rtx op, enum machine_mode mode)
7069 if (arith_reg_operand (op, mode))
7070 return 1;
7072 if (TARGET_SHMEDIA)
7074 /* FIXME: We should be checking whether the CONST_INT fits in a
7075 CONST_OK_FOR_I16 here, but this causes reload_cse to crash when
7076 attempting to transform a sequence of two 64-bit sets of the
7077 same register from literal constants into a set and an add,
7078 when the difference is too wide for an add. */
7079 if (GET_CODE (op) == CONST_INT
7080 || EXTRA_CONSTRAINT_C16 (op))
7081 return 1;
7082 else
7083 return 0;
7085 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I08 (INTVAL (op)))
7086 return 1;
7088 return 0;
7091 /* Returns 1 if OP is a valid source operand for a compare insn. */
7094 arith_reg_or_0_operand (rtx op, enum machine_mode mode)
7096 if (arith_reg_operand (op, mode))
7097 return 1;
7099 if (EXTRA_CONSTRAINT_Z (op))
7100 return 1;
7102 return 0;
7105 /* Return 1 if OP is a valid source operand for an SHmedia operation
7106 that takes either a register or a 6-bit immediate. */
7109 shmedia_6bit_operand (rtx op, enum machine_mode mode)
7111 return (arith_reg_operand (op, mode)
7112 || (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I06 (INTVAL (op))));
7115 /* Returns 1 if OP is a valid source operand for a logical operation. */
7118 logical_operand (rtx op, enum machine_mode mode)
7120 if (arith_reg_operand (op, mode))
7121 return 1;
7123 if (TARGET_SHMEDIA)
7125 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I10 (INTVAL (op)))
7126 return 1;
7127 else
7128 return 0;
7130 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_K08 (INTVAL (op)))
7131 return 1;
7133 return 0;
7137 and_operand (rtx op, enum machine_mode mode)
7139 if (logical_operand (op, mode))
7140 return 1;
7142 /* Check mshflo.l / mshflhi.l opportunities. */
7143 if (TARGET_SHMEDIA
7144 && mode == DImode
7145 && GET_CODE (op) == CONST_INT
7146 && CONST_OK_FOR_J16 (INTVAL (op)))
7147 return 1;
7149 return 0;
7152 /* Nonzero if OP is a floating point value with value 0.0. */
7155 fp_zero_operand (rtx op)
7157 REAL_VALUE_TYPE r;
7159 if (GET_MODE (op) != SFmode)
7160 return 0;
7162 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7163 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
7166 /* Nonzero if OP is a floating point value with value 1.0. */
7169 fp_one_operand (rtx op)
7171 REAL_VALUE_TYPE r;
7173 if (GET_MODE (op) != SFmode)
7174 return 0;
7176 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7177 return REAL_VALUES_EQUAL (r, dconst1);
7180 /* For -m4 and -m4-single-only, mode switching is used. If we are
7181 compiling without -mfmovd, movsf_ie isn't taken into account for
7182 mode switching. We could check in machine_dependent_reorg for
7183 cases where we know we are in single precision mode, but there is
7184 interface to find that out during reload, so we must avoid
7185 choosing an fldi alternative during reload and thus failing to
7186 allocate a scratch register for the constant loading. */
7188 fldi_ok (void)
7190 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
7194 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7196 enum rtx_code code = GET_CODE (op);
7197 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
7201 fpscr_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7203 return (GET_CODE (op) == REG && REGNO (op) == FPSCR_REG
7204 && GET_MODE (op) == PSImode);
7208 fpul_operand (rtx op, enum machine_mode mode)
7210 if (TARGET_SHMEDIA)
7211 return fp_arith_reg_operand (op, mode);
7213 return (GET_CODE (op) == REG
7214 && (REGNO (op) == FPUL_REG || REGNO (op) >= FIRST_PSEUDO_REGISTER)
7215 && GET_MODE (op) == mode);
7219 symbol_ref_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7221 return (GET_CODE (op) == SYMBOL_REF);
7224 /* Return the TLS type for TLS symbols, 0 for otherwise. */
7226 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7228 if (GET_CODE (op) != SYMBOL_REF)
7229 return 0;
7230 return SYMBOL_REF_TLS_MODEL (op);
7234 commutative_float_operator (rtx op, enum machine_mode mode)
7236 if (GET_MODE (op) != mode)
7237 return 0;
7238 switch (GET_CODE (op))
7240 case PLUS:
7241 case MULT:
7242 return 1;
7243 default:
7244 break;
7246 return 0;
7250 noncommutative_float_operator (rtx op, enum machine_mode mode)
7252 if (GET_MODE (op) != mode)
7253 return 0;
7254 switch (GET_CODE (op))
7256 case MINUS:
7257 case DIV:
7258 return 1;
7259 default:
7260 break;
7262 return 0;
7266 unary_float_operator (rtx op, enum machine_mode mode)
7268 if (GET_MODE (op) != mode)
7269 return 0;
7270 switch (GET_CODE (op))
7272 case ABS:
7273 case NEG:
7274 case SQRT:
7275 return 1;
7276 default:
7277 break;
7279 return 0;
7283 binary_float_operator (rtx op, enum machine_mode mode)
7285 if (GET_MODE (op) != mode)
7286 return 0;
7287 switch (GET_CODE (op))
7289 case PLUS:
7290 case MINUS:
7291 case MULT:
7292 case DIV:
7293 return 1;
7294 default:
7295 break;
7297 return 0;
7301 binary_logical_operator (rtx op, enum machine_mode mode)
7303 if (GET_MODE (op) != mode)
7304 return 0;
7305 switch (GET_CODE (op))
7307 case IOR:
7308 case AND:
7309 case XOR:
7310 return 1;
7311 default:
7312 break;
7314 return 0;
7318 equality_comparison_operator (rtx op, enum machine_mode mode)
7320 return ((mode == VOIDmode || GET_MODE (op) == mode)
7321 && (GET_CODE (op) == EQ || GET_CODE (op) == NE));
7324 int greater_comparison_operator (rtx op, enum machine_mode mode)
7326 if (mode != VOIDmode && GET_MODE (op) == mode)
7327 return 0;
7328 switch (GET_CODE (op))
7330 case GT:
7331 case GE:
7332 case GTU:
7333 case GEU:
7334 return 1;
7335 default:
7336 return 0;
7340 int less_comparison_operator (rtx op, enum machine_mode mode)
7342 if (mode != VOIDmode && GET_MODE (op) == mode)
7343 return 0;
7344 switch (GET_CODE (op))
7346 case LT:
7347 case LE:
7348 case LTU:
7349 case LEU:
7350 return 1;
7351 default:
7352 return 0;
7356 /* Accept pseudos and branch target registers. */
7358 target_reg_operand (rtx op, enum machine_mode mode)
7360 if (mode != DImode
7361 || GET_MODE (op) != DImode)
7362 return 0;
7364 if (GET_CODE (op) == SUBREG)
7365 op = XEXP (op, 0);
7367 if (GET_CODE (op) != REG)
7368 return 0;
7370 /* We must protect ourselves from matching pseudos that are virtual
7371 register, because they will eventually be replaced with hardware
7372 registers that aren't branch-target registers. */
7373 if (REGNO (op) > LAST_VIRTUAL_REGISTER
7374 || TARGET_REGISTER_P (REGNO (op)))
7375 return 1;
7377 return 0;
7380 /* Same as target_reg_operand, except that label_refs and symbol_refs
7381 are accepted before reload. */
7383 target_operand (rtx op, enum machine_mode mode)
7385 if (mode != DImode)
7386 return 0;
7388 if ((GET_MODE (op) == DImode || GET_MODE (op) == VOIDmode)
7389 && EXTRA_CONSTRAINT_Csy (op))
7390 return ! reload_completed;
7392 return target_reg_operand (op, mode);
7396 mextr_bit_offset (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7398 HOST_WIDE_INT i;
7400 if (GET_CODE (op) != CONST_INT)
7401 return 0;
7402 i = INTVAL (op);
7403 return i >= 1*8 && i <= 7*8 && (i & 7) == 0;
7407 extend_reg_operand (rtx op, enum machine_mode mode)
7409 return (GET_CODE (op) == TRUNCATE
7410 ? arith_operand
7411 : arith_reg_operand) (op, mode);
7415 trunc_hi_operand (rtx op, enum machine_mode mode)
7417 enum machine_mode op_mode = GET_MODE (op);
7419 if (op_mode != SImode && op_mode != DImode
7420 && op_mode != V4HImode && op_mode != V2SImode)
7421 return 0;
7422 return extend_reg_operand (op, mode);
7426 extend_reg_or_0_operand (rtx op, enum machine_mode mode)
7428 return (GET_CODE (op) == TRUNCATE
7429 ? arith_operand
7430 : arith_reg_or_0_operand) (op, mode);
7434 general_extend_operand (rtx op, enum machine_mode mode)
7436 return (GET_CODE (op) == TRUNCATE
7437 ? arith_operand
7438 : nonimmediate_operand) (op, mode);
7442 inqhi_operand (rtx op, enum machine_mode mode)
7444 if (GET_CODE (op) != TRUNCATE || mode != GET_MODE (op))
7445 return 0;
7446 op = XEXP (op, 0);
7447 /* Can't use true_regnum here because copy_cost wants to know about
7448 SECONDARY_INPUT_RELOAD_CLASS. */
7449 return GET_CODE (op) == REG && FP_REGISTER_P (REGNO (op));
7453 sh_rep_vec (rtx v, enum machine_mode mode)
7455 int i;
7456 rtx x, y;
7458 if ((GET_CODE (v) != CONST_VECTOR && GET_CODE (v) != PARALLEL)
7459 || (GET_MODE (v) != mode && mode != VOIDmode))
7460 return 0;
7461 i = XVECLEN (v, 0) - 2;
7462 x = XVECEXP (v, 0, i + 1);
7463 if (GET_MODE_UNIT_SIZE (mode) == 1)
7465 y = XVECEXP (v, 0, i);
7466 for (i -= 2 ; i >= 0; i -= 2)
7467 if (! rtx_equal_p (XVECEXP (v, 0, i + 1), x)
7468 || ! rtx_equal_p (XVECEXP (v, 0, i), y))
7469 return 0;
7471 else
7472 for (; i >= 0; i--)
7473 if (XVECEXP (v, 0, i) != x)
7474 return 0;
7475 return 1;
7478 /* Determine if V is a constant vector matching MODE with only one element
7479 that is not a sign extension. Two byte-sized elements count as one. */
7481 sh_1el_vec (rtx v, enum machine_mode mode)
7483 int unit_size;
7484 int i, last, least, sign_ix;
7485 rtx sign;
7487 if (GET_CODE (v) != CONST_VECTOR
7488 || (GET_MODE (v) != mode && mode != VOIDmode))
7489 return 0;
7490 /* Determine numbers of last and of least significant elements. */
7491 last = XVECLEN (v, 0) - 1;
7492 least = TARGET_LITTLE_ENDIAN ? 0 : last;
7493 if (GET_CODE (XVECEXP (v, 0, least)) != CONST_INT)
7494 return 0;
7495 sign_ix = least;
7496 if (GET_MODE_UNIT_SIZE (mode) == 1)
7497 sign_ix = TARGET_LITTLE_ENDIAN ? 1 : last - 1;
7498 if (GET_CODE (XVECEXP (v, 0, sign_ix)) != CONST_INT)
7499 return 0;
7500 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (v));
7501 sign = (INTVAL (XVECEXP (v, 0, sign_ix)) >> (unit_size * BITS_PER_UNIT - 1)
7502 ? constm1_rtx : const0_rtx);
7503 i = XVECLEN (v, 0) - 1;
7505 if (i != least && i != sign_ix && XVECEXP (v, 0, i) != sign)
7506 return 0;
7507 while (--i);
7508 return 1;
7512 sh_const_vec (rtx v, enum machine_mode mode)
7514 int i;
7516 if (GET_CODE (v) != CONST_VECTOR
7517 || (GET_MODE (v) != mode && mode != VOIDmode))
7518 return 0;
7519 i = XVECLEN (v, 0) - 1;
7520 for (; i >= 0; i--)
7521 if (GET_CODE (XVECEXP (v, 0, i)) != CONST_INT)
7522 return 0;
7523 return 1;
7526 /* Return the destination address of a branch. */
7528 static int
7529 branch_dest (rtx branch)
7531 rtx dest = SET_SRC (PATTERN (branch));
7532 int dest_uid;
7534 if (GET_CODE (dest) == IF_THEN_ELSE)
7535 dest = XEXP (dest, 1);
7536 dest = XEXP (dest, 0);
7537 dest_uid = INSN_UID (dest);
7538 return INSN_ADDRESSES (dest_uid);
7541 /* Return nonzero if REG is not used after INSN.
7542 We assume REG is a reload reg, and therefore does
7543 not live past labels. It may live past calls or jumps though. */
7545 reg_unused_after (rtx reg, rtx insn)
7547 enum rtx_code code;
7548 rtx set;
7550 /* If the reg is set by this instruction, then it is safe for our
7551 case. Disregard the case where this is a store to memory, since
7552 we are checking a register used in the store address. */
7553 set = single_set (insn);
7554 if (set && GET_CODE (SET_DEST (set)) != MEM
7555 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7556 return 1;
7558 while ((insn = NEXT_INSN (insn)))
7560 code = GET_CODE (insn);
7562 #if 0
7563 /* If this is a label that existed before reload, then the register
7564 if dead here. However, if this is a label added by reorg, then
7565 the register may still be live here. We can't tell the difference,
7566 so we just ignore labels completely. */
7567 if (code == CODE_LABEL)
7568 return 1;
7569 /* else */
7570 #endif
7572 if (code == JUMP_INSN)
7573 return 0;
7575 /* If this is a sequence, we must handle them all at once.
7576 We could have for instance a call that sets the target register,
7577 and an insn in a delay slot that uses the register. In this case,
7578 we must return 0. */
7579 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
7581 int i;
7582 int retval = 0;
7584 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
7586 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
7587 rtx set = single_set (this_insn);
7589 if (GET_CODE (this_insn) == CALL_INSN)
7590 code = CALL_INSN;
7591 else if (GET_CODE (this_insn) == JUMP_INSN)
7593 if (INSN_ANNULLED_BRANCH_P (this_insn))
7594 return 0;
7595 code = JUMP_INSN;
7598 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
7599 return 0;
7600 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7602 if (GET_CODE (SET_DEST (set)) != MEM)
7603 retval = 1;
7604 else
7605 return 0;
7607 if (set == 0
7608 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
7609 return 0;
7611 if (retval == 1)
7612 return 1;
7613 else if (code == JUMP_INSN)
7614 return 0;
7616 else if (GET_RTX_CLASS (code) == 'i')
7618 rtx set = single_set (insn);
7620 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
7621 return 0;
7622 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7623 return GET_CODE (SET_DEST (set)) != MEM;
7624 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
7625 return 0;
7628 if (code == CALL_INSN && call_used_regs[REGNO (reg)])
7629 return 1;
7631 return 1;
7634 #include "ggc.h"
7636 static GTY(()) rtx fpscr_rtx;
7638 get_fpscr_rtx (void)
7640 if (! fpscr_rtx)
7642 fpscr_rtx = gen_rtx (REG, PSImode, FPSCR_REG);
7643 REG_USERVAR_P (fpscr_rtx) = 1;
7644 mark_user_reg (fpscr_rtx);
7646 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
7647 mark_user_reg (fpscr_rtx);
7648 return fpscr_rtx;
7651 void
7652 emit_sf_insn (rtx pat)
7654 emit_insn (pat);
7657 void
7658 emit_df_insn (rtx pat)
7660 emit_insn (pat);
7663 void
7664 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
7666 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
7669 void
7670 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
7672 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
7673 get_fpscr_rtx ()));
7676 void
7677 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
7679 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
7682 void
7683 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
7685 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
7686 get_fpscr_rtx ()));
7689 /* ??? gcc does flow analysis strictly after common subexpression
7690 elimination. As a result, common subexpression elimination fails
7691 when there are some intervening statements setting the same register.
7692 If we did nothing about this, this would hurt the precision switching
7693 for SH4 badly. There is some cse after reload, but it is unable to
7694 undo the extra register pressure from the unused instructions, and
7695 it cannot remove auto-increment loads.
7697 A C code example that shows this flow/cse weakness for (at least) SH
7698 and sparc (as of gcc ss-970706) is this:
7700 double
7701 f(double a)
7703 double d;
7704 d = 0.1;
7705 a += d;
7706 d = 1.1;
7707 d = 0.1;
7708 a *= d;
7709 return a;
7712 So we add another pass before common subexpression elimination, to
7713 remove assignments that are dead due to a following assignment in the
7714 same basic block. */
7716 static void
7717 mark_use (rtx x, rtx *reg_set_block)
7719 enum rtx_code code;
7721 if (! x)
7722 return;
7723 code = GET_CODE (x);
7724 switch (code)
7726 case REG:
7728 int regno = REGNO (x);
7729 int nregs = (regno < FIRST_PSEUDO_REGISTER
7730 ? HARD_REGNO_NREGS (regno, GET_MODE (x))
7731 : 1);
7734 reg_set_block[regno + nregs - 1] = 0;
7736 while (--nregs);
7737 break;
7739 case SET:
7741 rtx dest = SET_DEST (x);
7743 if (GET_CODE (dest) == SUBREG)
7744 dest = SUBREG_REG (dest);
7745 if (GET_CODE (dest) != REG)
7746 mark_use (dest, reg_set_block);
7747 mark_use (SET_SRC (x), reg_set_block);
7748 break;
7750 case CLOBBER:
7751 break;
7752 default:
7754 const char *fmt = GET_RTX_FORMAT (code);
7755 int i, j;
7756 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
7758 if (fmt[i] == 'e')
7759 mark_use (XEXP (x, i), reg_set_block);
7760 else if (fmt[i] == 'E')
7761 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
7762 mark_use (XVECEXP (x, i, j), reg_set_block);
7764 break;
7769 static rtx get_free_reg (HARD_REG_SET);
7771 /* This function returns a register to use to load the address to load
7772 the fpscr from. Currently it always returns r1 or r7, but when we are
7773 able to use pseudo registers after combine, or have a better mechanism
7774 for choosing a register, it should be done here. */
7775 /* REGS_LIVE is the liveness information for the point for which we
7776 need this allocation. In some bare-bones exit blocks, r1 is live at the
7777 start. We can even have all of r0..r3 being live:
7778 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
7779 INSN before which new insns are placed with will clobber the register
7780 we return. If a basic block consists only of setting the return value
7781 register to a pseudo and using that register, the return value is not
7782 live before or after this block, yet we we'll insert our insns right in
7783 the middle. */
7785 static rtx
7786 get_free_reg (HARD_REG_SET regs_live)
7788 if (! TEST_HARD_REG_BIT (regs_live, 1))
7789 return gen_rtx_REG (Pmode, 1);
7791 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
7792 there shouldn't be anything but a jump before the function end. */
7793 if (! TEST_HARD_REG_BIT (regs_live, 7))
7794 return gen_rtx_REG (Pmode, 7);
7796 abort ();
7799 /* This function will set the fpscr from memory.
7800 MODE is the mode we are setting it to. */
7801 void
7802 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
7804 enum attr_fp_mode fp_mode = mode;
7805 rtx addr_reg = get_free_reg (regs_live);
7807 if (fp_mode == (enum attr_fp_mode) ACTUAL_NORMAL_MODE (FP_MODE))
7808 emit_insn (gen_fpu_switch1 (addr_reg));
7809 else
7810 emit_insn (gen_fpu_switch0 (addr_reg));
7813 /* Is the given character a logical line separator for the assembler? */
7814 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
7815 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
7816 #endif
7819 sh_insn_length_adjustment (rtx insn)
7821 /* Instructions with unfilled delay slots take up an extra two bytes for
7822 the nop in the delay slot. */
7823 if (((GET_CODE (insn) == INSN
7824 && GET_CODE (PATTERN (insn)) != USE
7825 && GET_CODE (PATTERN (insn)) != CLOBBER)
7826 || GET_CODE (insn) == CALL_INSN
7827 || (GET_CODE (insn) == JUMP_INSN
7828 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
7829 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
7830 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
7831 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
7832 return 2;
7834 /* SH2e has a bug that prevents the use of annulled branches, so if
7835 the delay slot is not filled, we'll have to put a NOP in it. */
7836 if (sh_cpu == CPU_SH2E
7837 && GET_CODE (insn) == JUMP_INSN
7838 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
7839 && GET_CODE (PATTERN (insn)) != ADDR_VEC
7840 && get_attr_type (insn) == TYPE_CBRANCH
7841 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
7842 return 2;
7844 /* sh-dsp parallel processing insn take four bytes instead of two. */
7846 if (GET_CODE (insn) == INSN)
7848 int sum = 0;
7849 rtx body = PATTERN (insn);
7850 const char *template;
7851 char c;
7852 int maybe_label = 1;
7854 if (GET_CODE (body) == ASM_INPUT)
7855 template = XSTR (body, 0);
7856 else if (asm_noperands (body) >= 0)
7857 template
7858 = decode_asm_operands (body, NULL, NULL, NULL, NULL);
7859 else
7860 return 0;
7863 int ppi_adjust = 0;
7866 c = *template++;
7867 while (c == ' ' || c == '\t');
7868 /* all sh-dsp parallel-processing insns start with p.
7869 The only non-ppi sh insn starting with p is pref.
7870 The only ppi starting with pr is prnd. */
7871 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
7872 ppi_adjust = 2;
7873 /* The repeat pseudo-insn expands two three insns, a total of
7874 six bytes in size. */
7875 else if ((c == 'r' || c == 'R')
7876 && ! strncasecmp ("epeat", template, 5))
7877 ppi_adjust = 4;
7878 while (c && c != '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c))
7880 /* If this is a label, it is obviously not a ppi insn. */
7881 if (c == ':' && maybe_label)
7883 ppi_adjust = 0;
7884 break;
7886 else if (c == '\'' || c == '"')
7887 maybe_label = 0;
7888 c = *template++;
7890 sum += ppi_adjust;
7891 maybe_label = c != ':';
7893 while (c);
7894 return sum;
7896 return 0;
7899 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
7900 isn't protected by a PIC unspec. */
7902 nonpic_symbol_mentioned_p (rtx x)
7904 register const char *fmt;
7905 register int i;
7907 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
7908 || GET_CODE (x) == PC)
7909 return 1;
7911 /* We don't want to look into the possible MEM location of a
7912 CONST_DOUBLE, since we're not going to use it, in general. */
7913 if (GET_CODE (x) == CONST_DOUBLE)
7914 return 0;
7916 if (GET_CODE (x) == UNSPEC
7917 && (XINT (x, 1) == UNSPEC_PIC
7918 || XINT (x, 1) == UNSPEC_GOT
7919 || XINT (x, 1) == UNSPEC_GOTOFF
7920 || XINT (x, 1) == UNSPEC_GOTPLT
7921 || XINT (x, 1) == UNSPEC_GOTTPOFF
7922 || XINT (x, 1) == UNSPEC_DTPOFF
7923 || XINT (x, 1) == UNSPEC_PLT))
7924 return 0;
7926 fmt = GET_RTX_FORMAT (GET_CODE (x));
7927 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
7929 if (fmt[i] == 'E')
7931 register int j;
7933 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
7934 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
7935 return 1;
7937 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
7938 return 1;
7941 return 0;
7944 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
7945 @GOTOFF in `reg'. */
7947 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
7948 rtx reg)
7950 if (tls_symbolic_operand (orig, Pmode))
7951 return orig;
7953 if (GET_CODE (orig) == LABEL_REF
7954 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
7956 if (reg == 0)
7957 reg = gen_reg_rtx (Pmode);
7959 emit_insn (gen_symGOTOFF2reg (reg, orig));
7960 return reg;
7962 else if (GET_CODE (orig) == SYMBOL_REF)
7964 if (reg == 0)
7965 reg = gen_reg_rtx (Pmode);
7967 emit_insn (gen_symGOT2reg (reg, orig));
7968 return reg;
7970 return orig;
7973 /* Mark the use of a constant in the literal table. If the constant
7974 has multiple labels, make it unique. */
7975 static rtx
7976 mark_constant_pool_use (rtx x)
7978 rtx insn, lab, pattern;
7980 if (x == NULL)
7981 return x;
7983 switch (GET_CODE (x))
7985 case LABEL_REF:
7986 x = XEXP (x, 0);
7987 case CODE_LABEL:
7988 break;
7989 default:
7990 return x;
7993 /* Get the first label in the list of labels for the same constant
7994 and delete another labels in the list. */
7995 lab = x;
7996 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
7998 if (GET_CODE (insn) != CODE_LABEL
7999 || LABEL_REFS (insn) != NEXT_INSN (insn))
8000 break;
8001 lab = insn;
8004 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
8005 INSN_DELETED_P (insn) = 1;
8007 /* Mark constants in a window. */
8008 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
8010 if (GET_CODE (insn) != INSN)
8011 continue;
8013 pattern = PATTERN (insn);
8014 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
8015 continue;
8017 switch (XINT (pattern, 1))
8019 case UNSPECV_CONST2:
8020 case UNSPECV_CONST4:
8021 case UNSPECV_CONST8:
8022 XVECEXP (pattern, 0, 1) = const1_rtx;
8023 break;
8024 case UNSPECV_WINDOW_END:
8025 if (XVECEXP (pattern, 0, 0) == x)
8026 return lab;
8027 break;
8028 case UNSPECV_CONST_END:
8029 return lab;
8030 default:
8031 break;
8035 return lab;
8038 /* Return true if it's possible to redirect BRANCH1 to the destination
8039 of an unconditional jump BRANCH2. We only want to do this if the
8040 resulting branch will have a short displacement. */
8041 int
8042 sh_can_redirect_branch (rtx branch1, rtx branch2)
8044 if (flag_expensive_optimizations && simplejump_p (branch2))
8046 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
8047 rtx insn;
8048 int distance;
8050 for (distance = 0, insn = NEXT_INSN (branch1);
8051 insn && distance < 256;
8052 insn = PREV_INSN (insn))
8054 if (insn == dest)
8055 return 1;
8056 else
8057 distance += get_attr_length (insn);
8059 for (distance = 0, insn = NEXT_INSN (branch1);
8060 insn && distance < 256;
8061 insn = NEXT_INSN (insn))
8063 if (insn == dest)
8064 return 1;
8065 else
8066 distance += get_attr_length (insn);
8069 return 0;
8072 /* Return nonzero if register old_reg can be renamed to register new_reg. */
8074 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
8075 unsigned int new_reg)
8078 /* Interrupt functions can only use registers that have already been
8079 saved by the prologue, even if they would normally be
8080 call-clobbered. */
8082 if (sh_cfun_interrupt_handler_p () && !regs_ever_live[new_reg])
8083 return 0;
8085 return 1;
8088 /* Function to update the integer COST
8089 based on the relationship between INSN that is dependent on
8090 DEP_INSN through the dependence LINK. The default is to make no
8091 adjustment to COST. This can be used for example to specify to
8092 the scheduler that an output- or anti-dependence does not incur
8093 the same cost as a data-dependence. The return value should be
8094 the new value for COST. */
8095 static int
8096 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
8098 rtx reg, use_pat;
8100 if (TARGET_SHMEDIA)
8102 /* On SHmedia, if the dependence is an anti-dependence or
8103 output-dependence, there is no cost. */
8104 if (REG_NOTE_KIND (link) != 0)
8105 cost = 0;
8107 if (get_attr_is_mac_media (insn)
8108 && get_attr_is_mac_media (dep_insn))
8109 cost = 1;
8111 else if (REG_NOTE_KIND (link) == 0)
8113 enum attr_type dep_type, type;
8115 if (recog_memoized (insn) < 0
8116 || recog_memoized (dep_insn) < 0)
8117 return cost;
8119 dep_type = get_attr_type (dep_insn);
8120 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
8121 cost--;
8122 if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
8123 && (type = get_attr_type (insn)) != TYPE_CALL
8124 && type != TYPE_SFUNC)
8125 cost--;
8127 /* The only input for a call that is timing-critical is the
8128 function's address. */
8129 if (GET_CODE(insn) == CALL_INSN)
8131 rtx call = PATTERN (insn);
8133 if (GET_CODE (call) == PARALLEL)
8134 call = XVECEXP (call, 0 ,0);
8135 if (GET_CODE (call) == SET)
8136 call = SET_SRC (call);
8137 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
8138 && ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn))
8139 cost = 0;
8141 /* Likewise, the most timing critical input for an sfuncs call
8142 is the function address. However, sfuncs typically start
8143 using their arguments pretty quickly.
8144 Assume a four cycle delay before they are needed. */
8145 /* All sfunc calls are parallels with at least four components.
8146 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
8147 else if (GET_CODE (PATTERN (insn)) == PARALLEL
8148 && XVECLEN (PATTERN (insn), 0) >= 4
8149 && (reg = sfunc_uses_reg (insn)))
8151 if (! reg_set_p (reg, dep_insn))
8152 cost -= 4;
8154 /* When the preceding instruction loads the shift amount of
8155 the following SHAD/SHLD, the latency of the load is increased
8156 by 1 cycle. */
8157 else if (TARGET_SH4
8158 && get_attr_type (insn) == TYPE_DYN_SHIFT
8159 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
8160 && reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)),
8161 XEXP (SET_SRC (single_set(insn)),
8162 1)))
8163 cost++;
8164 /* When an LS group instruction with a latency of less than
8165 3 cycles is followed by a double-precision floating-point
8166 instruction, FIPR, or FTRV, the latency of the first
8167 instruction is increased to 3 cycles. */
8168 else if (cost < 3
8169 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
8170 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
8171 cost = 3;
8172 /* The lsw register of a double-precision computation is ready one
8173 cycle earlier. */
8174 else if (reload_completed
8175 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
8176 && (use_pat = single_set (insn))
8177 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
8178 SET_SRC (use_pat)))
8179 cost -= 1;
8181 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
8182 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
8183 cost -= 1;
8185 /* An anti-dependence penalty of two applies if the first insn is a double
8186 precision fadd / fsub / fmul. */
8187 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8188 && recog_memoized (dep_insn) >= 0
8189 && get_attr_type (dep_insn) == TYPE_DFP_ARITH
8190 /* A lot of alleged anti-flow dependences are fake,
8191 so check this one is real. */
8192 && flow_dependent_p (dep_insn, insn))
8193 cost = 2;
8196 return cost;
8199 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
8200 if DEP_INSN is anti-flow dependent on INSN. */
8201 static int
8202 flow_dependent_p (rtx insn, rtx dep_insn)
8204 rtx tmp = PATTERN (insn);
8206 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
8207 return tmp == NULL_RTX;
8210 /* A helper function for flow_dependent_p called through note_stores. */
8211 static void
8212 flow_dependent_p_1 (rtx x, rtx pat ATTRIBUTE_UNUSED, void *data)
8214 rtx * pinsn = (rtx *) data;
8216 if (*pinsn && reg_referenced_p (x, *pinsn))
8217 *pinsn = NULL_RTX;
8220 /* For use by ALLOCATE_INITIAL_VALUE. Note that sh.md contains some
8221 'special function' patterns (type sfunc) that clobber pr, but that
8222 do not look like function calls to leaf_function_p. Hence we must
8223 do this extra check. */
8225 sh_pr_n_sets (void)
8227 return REG_N_SETS (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
8230 /* This Function returns nonzero if the DFA based scheduler interface
8231 is to be used. At present this is supported for the SH4 only. */
8232 static int
8233 sh_use_dfa_interface(void)
8235 if (TARGET_HARD_SH4)
8236 return 1;
8237 else
8238 return 0;
8241 /* This function returns "2" to indicate dual issue for the SH4
8242 processor. To be used by the DFA pipeline description. */
8243 static int
8244 sh_issue_rate(void)
8246 if (TARGET_SUPERSCALAR)
8247 return 2;
8248 else
8249 return 1;
8252 /* SHmedia requires registers for branches, so we can't generate new
8253 branches past reload. */
8254 static bool
8255 sh_cannot_modify_jumps_p (void)
8257 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
8260 static int
8261 sh_target_reg_class (void)
8263 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
8266 static bool
8267 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
8269 return (shmedia_space_reserved_for_target_registers
8270 && (! after_prologue_epilogue_gen || TARGET_SAVE_ALL_TARGET_REGS));
8273 static bool
8274 sh_ms_bitfield_layout_p (record_type)
8275 tree record_type ATTRIBUTE_UNUSED;
8277 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
8281 On the SH1..SH4, the trampoline looks like
8282 2 0002 D202 mov.l l2,r2
8283 1 0000 D301 mov.l l1,r3
8284 3 0004 422B jmp @r2
8285 4 0006 0009 nop
8286 5 0008 00000000 l1: .long area
8287 6 000c 00000000 l2: .long function
8289 SH5 (compact) uses r1 instead of r3 for the static chain. */
8292 /* Emit RTL insns to initialize the variable parts of a trampoline.
8293 FNADDR is an RTX for the address of the function's pure code.
8294 CXT is an RTX for the static chain value for the function. */
8296 void
8297 sh_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
8299 if (TARGET_SHMEDIA64)
8301 rtx tramp_templ;
8302 int fixed_len;
8304 rtx movi1 = GEN_INT (0xcc000010);
8305 rtx shori1 = GEN_INT (0xc8000010);
8306 rtx src, dst;
8308 /* The following trampoline works within a +- 128 KB range for cxt:
8309 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
8310 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
8311 gettr tr1,r1; blink tr0,r63 */
8312 /* Address rounding makes it hard to compute the exact bounds of the
8313 offset for this trampoline, but we have a rather generous offset
8314 range, so frame_offset should do fine as an upper bound. */
8315 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
8317 /* ??? could optimize this trampoline initialization
8318 by writing DImode words with two insns each. */
8319 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
8320 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
8321 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
8322 insn = gen_rtx_AND (DImode, insn, mask);
8323 /* Or in ptb/u .,tr1 pattern */
8324 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
8325 insn = force_operand (insn, NULL_RTX);
8326 insn = gen_lowpart (SImode, insn);
8327 emit_move_insn (gen_rtx_MEM (SImode, tramp), insn);
8328 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
8329 insn = gen_rtx_AND (DImode, insn, mask);
8330 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
8331 insn = gen_lowpart (SImode, insn);
8332 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)), insn);
8333 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
8334 insn = gen_rtx_AND (DImode, insn, mask);
8335 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
8336 insn = gen_lowpart (SImode, insn);
8337 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)), insn);
8338 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
8339 insn = gen_rtx_AND (DImode, insn, mask);
8340 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
8341 insn = gen_lowpart (SImode, insn);
8342 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
8343 insn);
8344 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
8345 insn = gen_rtx_AND (DImode, insn, mask);
8346 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
8347 insn = gen_lowpart (SImode, insn);
8348 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 16)),
8349 insn);
8350 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 20)),
8351 GEN_INT (0x6bf10600));
8352 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 24)),
8353 GEN_INT (0x4415fc10));
8354 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 28)),
8355 GEN_INT (0x4401fff0));
8356 emit_insn (gen_ic_invalidate_line (tramp));
8357 return;
8359 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
8360 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
8362 tramp_templ = gen_datalabel_ref (tramp_templ);
8363 dst = gen_rtx_MEM (BLKmode, tramp);
8364 src = gen_rtx_MEM (BLKmode, tramp_templ);
8365 set_mem_align (dst, 256);
8366 set_mem_align (src, 64);
8367 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
8369 emit_move_insn (gen_rtx_MEM (Pmode, plus_constant (tramp, fixed_len)),
8370 fnaddr);
8371 emit_move_insn (gen_rtx_MEM (Pmode,
8372 plus_constant (tramp,
8373 fixed_len
8374 + GET_MODE_SIZE (Pmode))),
8375 cxt);
8376 emit_insn (gen_ic_invalidate_line (tramp));
8377 return;
8379 else if (TARGET_SHMEDIA)
8381 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
8382 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
8383 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
8384 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
8385 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
8386 rotated 10 right, and higher 16 bit of every 32 selected. */
8387 rtx movishori
8388 = force_reg (V2HImode, (simplify_gen_subreg
8389 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
8390 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
8391 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
8393 tramp = force_reg (Pmode, tramp);
8394 fnaddr = force_reg (SImode, fnaddr);
8395 cxt = force_reg (SImode, cxt);
8396 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
8397 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
8398 movishori));
8399 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
8400 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
8401 emit_insn (gen_ashldi3_media (quad0, quad0, GEN_INT (2)));
8402 emit_move_insn (gen_rtx_MEM (DImode, tramp), quad0);
8403 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
8404 gen_rtx_SUBREG (V2HImode, cxt, 0),
8405 movishori));
8406 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
8407 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
8408 emit_insn (gen_ashldi3_media (cxtload, cxtload, GEN_INT (2)));
8409 if (TARGET_LITTLE_ENDIAN)
8411 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
8412 emit_insn (gen_mextr4 (quad2, cxtload, blink));
8414 else
8416 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
8417 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
8419 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 8)), quad1);
8420 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), quad2);
8421 emit_insn (gen_ic_invalidate_line (tramp));
8422 return;
8424 else if (TARGET_SHCOMPACT)
8426 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
8427 return;
8429 emit_move_insn (gen_rtx_MEM (SImode, tramp),
8430 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
8431 SImode));
8432 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
8433 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
8434 SImode));
8435 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
8436 cxt);
8437 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
8438 fnaddr);
8439 if (TARGET_HARVARD)
8441 if (TARGET_USERMODE)
8442 emit_library_call (function_symbol ("__ic_invalidate"),
8443 0, VOIDmode, 1, tramp, SImode);
8444 else
8445 emit_insn (gen_ic_invalidate_line (tramp));
8449 /* FIXME: This is overly conservative. A SHcompact function that
8450 receives arguments ``by reference'' will have them stored in its
8451 own stack frame, so it must not pass pointers or references to
8452 these arguments to other functions by means of sibling calls. */
8453 static bool
8454 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8456 return (decl
8457 && (! TARGET_SHCOMPACT
8458 || current_function_args_info.stack_regs == 0)
8459 && ! sh_cfun_interrupt_handler_p ());
8462 /* Machine specific built-in functions. */
8464 struct builtin_description
8466 const enum insn_code icode;
8467 const char *const name;
8468 int signature;
8471 /* describe number and signedness of arguments; arg[0] == result
8472 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
8473 static const char signature_args[][4] =
8475 #define SH_BLTIN_V2SI2 0
8476 { 4, 4 },
8477 #define SH_BLTIN_V4HI2 1
8478 { 4, 4 },
8479 #define SH_BLTIN_V2SI3 2
8480 { 4, 4, 4 },
8481 #define SH_BLTIN_V4HI3 3
8482 { 4, 4, 4 },
8483 #define SH_BLTIN_V8QI3 4
8484 { 4, 4, 4 },
8485 #define SH_BLTIN_MAC_HISI 5
8486 { 1, 4, 4, 1 },
8487 #define SH_BLTIN_SH_HI 6
8488 { 4, 4, 1 },
8489 #define SH_BLTIN_SH_SI 7
8490 { 4, 4, 1 },
8491 #define SH_BLTIN_V4HI2V2SI 8
8492 { 4, 4, 4 },
8493 #define SH_BLTIN_V4HI2V8QI 9
8494 { 4, 4, 4 },
8495 #define SH_BLTIN_SISF 10
8496 { 4, 2 },
8497 #define SH_BLTIN_LDUA_L 11
8498 { 2, 8 },
8499 #define SH_BLTIN_LDUA_Q 12
8500 { 1, 8 },
8501 #define SH_BLTIN_STUA_L 13
8502 { 0, 8, 2 },
8503 #define SH_BLTIN_STUA_Q 14
8504 { 0, 8, 1 },
8505 #define SH_BLTIN_UDI 15
8506 { 0, 8, 1 },
8507 #define SH_BLTIN_NUM_SHARED_SIGNATURES 16
8508 #define SH_BLTIN_2 16
8509 #define SH_BLTIN_SU 16
8510 { 1, 2 },
8511 #define SH_BLTIN_3 17
8512 #define SH_BLTIN_SUS 17
8513 { 2, 2, 1 },
8514 #define SH_BLTIN_PSSV 18
8515 { 0, 8, 2, 2 },
8516 #define SH_BLTIN_XXUU 19
8517 #define SH_BLTIN_UUUU 19
8518 { 1, 1, 1, 1 },
8519 #define SH_BLTIN_PV 20
8520 { 0, 8 },
8522 /* mcmv: operands considered unsigned. */
8523 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
8524 /* mperm: control value considered unsigned int. */
8525 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
8526 /* mshards_q: returns signed short. */
8527 /* nsb: takes long long arg, returns unsigned char. */
8528 static const struct builtin_description bdesc[] =
8530 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
8531 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
8532 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
8533 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
8534 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
8535 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
8536 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
8537 #if 0
8538 { CODE_FOR_alloco32, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
8539 { CODE_FOR_alloco64, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
8540 #endif
8541 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
8542 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
8543 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
8544 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
8545 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
8546 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
8547 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
8548 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
8549 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
8550 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
8551 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_UDI },
8552 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_UDI },
8553 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_UDI },
8554 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_UDI },
8555 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_UDI },
8556 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_UDI },
8557 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_UDI },
8558 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
8559 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
8560 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
8561 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
8562 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
8563 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
8564 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
8565 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
8566 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
8567 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
8568 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
8569 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
8570 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
8571 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
8572 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
8573 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
8574 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
8575 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
8576 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
8577 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
8578 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
8579 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
8580 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
8581 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
8582 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
8583 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
8584 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
8585 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
8586 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
8587 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
8588 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
8589 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
8590 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
8591 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
8592 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
8593 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
8594 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
8595 #if 0
8596 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
8597 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
8598 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
8599 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
8600 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
8601 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
8602 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
8603 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
8604 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
8605 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
8606 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
8607 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
8608 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
8609 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
8610 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
8611 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
8612 #endif
8613 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
8614 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
8615 #if 0
8616 { CODE_FOR_prefetch32,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
8617 { CODE_FOR_prefetch64,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV }
8618 #endif
8621 static void
8622 sh_media_init_builtins (void)
8624 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
8625 const struct builtin_description *d;
8627 memset (shared, 0, sizeof shared);
8628 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
8630 tree type, arg_type;
8631 int signature = d->signature;
8632 int i;
8634 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
8635 type = shared[signature];
8636 else
8638 int has_result = signature_args[signature][0] != 0;
8640 if (signature_args[signature][1] == 8
8641 && (insn_data[d->icode].operand[has_result].mode != Pmode))
8642 continue;
8643 if (! TARGET_FPU_ANY
8644 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
8645 continue;
8646 type = void_list_node;
8647 for (i = 3; ; i--)
8649 int arg = signature_args[signature][i];
8650 int opno = i - 1 + has_result;
8652 if (arg == 8)
8653 arg_type = ptr_type_node;
8654 else if (arg)
8655 arg_type = ((*lang_hooks.types.type_for_mode)
8656 (insn_data[d->icode].operand[opno].mode,
8657 (arg & 1)));
8658 else if (i)
8659 continue;
8660 else
8661 arg_type = void_type_node;
8662 if (i == 0)
8663 break;
8664 type = tree_cons (NULL_TREE, arg_type, type);
8666 type = build_function_type (arg_type, type);
8667 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
8668 shared[signature] = type;
8670 builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
8671 NULL, NULL_TREE);
8675 static void
8676 sh_init_builtins (void)
8678 if (TARGET_SHMEDIA)
8679 sh_media_init_builtins ();
8682 /* Expand an expression EXP that calls a built-in function,
8683 with result going to TARGET if that's convenient
8684 (and in mode MODE if that's convenient).
8685 SUBTARGET may be used as the target for computing one of EXP's operands.
8686 IGNORE is nonzero if the value is to be ignored. */
8688 static rtx
8689 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
8690 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
8692 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
8693 tree arglist = TREE_OPERAND (exp, 1);
8694 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
8695 const struct builtin_description *d = &bdesc[fcode];
8696 enum insn_code icode = d->icode;
8697 int signature = d->signature;
8698 enum machine_mode tmode = VOIDmode;
8699 int nop = 0, i;
8700 rtx op[4];
8701 rtx pat;
8703 if (signature_args[signature][0])
8705 if (ignore)
8706 return 0;
8708 tmode = insn_data[icode].operand[0].mode;
8709 if (! target
8710 || GET_MODE (target) != tmode
8711 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8712 target = gen_reg_rtx (tmode);
8713 op[nop++] = target;
8715 else
8716 target = 0;
8718 for (i = 1; i <= 3; i++, nop++)
8720 tree arg;
8721 enum machine_mode opmode, argmode;
8723 if (! signature_args[signature][i])
8724 break;
8725 arg = TREE_VALUE (arglist);
8726 if (arg == error_mark_node)
8727 return const0_rtx;
8728 arglist = TREE_CHAIN (arglist);
8729 opmode = insn_data[icode].operand[nop].mode;
8730 argmode = TYPE_MODE (TREE_TYPE (arg));
8731 if (argmode != opmode)
8732 arg = build1 (NOP_EXPR,
8733 (*lang_hooks.types.type_for_mode) (opmode, 0), arg);
8734 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
8735 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
8736 op[nop] = copy_to_mode_reg (opmode, op[nop]);
8739 switch (nop)
8741 case 1:
8742 pat = (*insn_data[d->icode].genfun) (op[0]);
8743 break;
8744 case 2:
8745 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
8746 break;
8747 case 3:
8748 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
8749 break;
8750 case 4:
8751 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
8752 break;
8753 default:
8754 abort ();
8756 if (! pat)
8757 return 0;
8758 emit_insn (pat);
8759 return target;
8762 void
8763 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
8765 rtx sel0 = const0_rtx;
8766 rtx sel1 = const1_rtx;
8767 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
8768 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
8770 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
8771 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
8774 void
8775 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
8777 rtx sel0 = const0_rtx;
8778 rtx sel1 = const1_rtx;
8779 rtx (*fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx)
8780 = gen_binary_sf_op;
8781 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
8783 emit_insn ((*fn) (op0, op1, op2, op, sel0, sel0, sel0, sel1));
8784 emit_insn ((*fn) (op0, op1, op2, op, sel1, sel1, sel1, sel0));
8787 /* Return the class of registers for which a mode change from FROM to TO
8788 is invalid. */
8789 bool
8790 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
8791 enum reg_class class)
8793 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
8795 if (TARGET_LITTLE_ENDIAN)
8797 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
8798 return reg_classes_intersect_p (DF_REGS, class);
8800 else
8802 if (GET_MODE_SIZE (from) < 8)
8803 return reg_classes_intersect_p (DF_HI_REGS, class);
8806 return 0;
8810 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
8811 that label is used. */
8813 void
8814 sh_mark_label (rtx address, int nuses)
8816 if (GOTOFF_P (address))
8818 /* Extract the label or symbol. */
8819 address = XEXP (address, 0);
8820 if (GET_CODE (address) == PLUS)
8821 address = XEXP (address, 0);
8822 address = XVECEXP (address, 0, 0);
8824 if (GET_CODE (address) == LABEL_REF
8825 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
8826 LABEL_NUSES (XEXP (address, 0)) += nuses;
8829 /* Compute extra cost of moving data between one register class
8830 and another. */
8832 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
8833 uses this information. Hence, the general register <-> floating point
8834 register information here is not used for SFmode. */
8837 sh_register_move_cost (enum machine_mode mode,
8838 enum reg_class srcclass, enum reg_class dstclass)
8840 if (dstclass == T_REGS || dstclass == PR_REGS)
8841 return 10;
8843 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
8844 return 4;
8846 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
8847 && REGCLASS_HAS_FP_REG (srcclass)
8848 && REGCLASS_HAS_FP_REG (dstclass))
8849 return 4;
8851 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
8852 || (dstclass== MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
8853 return 9;
8855 if ((REGCLASS_HAS_FP_REG (dstclass)
8856 && REGCLASS_HAS_GENERAL_REG (srcclass))
8857 || (REGCLASS_HAS_GENERAL_REG (dstclass)
8858 && REGCLASS_HAS_FP_REG (srcclass)))
8859 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
8860 * ((GET_MODE_SIZE (mode) + 7) / 8U));
8862 if ((dstclass == FPUL_REGS
8863 && REGCLASS_HAS_GENERAL_REG (srcclass))
8864 || (srcclass == FPUL_REGS
8865 && REGCLASS_HAS_GENERAL_REG (dstclass)))
8866 return 5;
8868 if ((dstclass == FPUL_REGS
8869 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
8870 || (srcclass == FPUL_REGS
8871 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
8872 return 7;
8874 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
8875 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
8876 return 20;
8878 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
8879 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
8880 return 4;
8882 if (TARGET_SHMEDIA
8883 || (TARGET_FMOVD
8884 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
8885 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
8886 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
8888 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
8891 /* Like register_operand, but take into account that SHMEDIA can use
8892 the constant zero like a general register. */
8894 sh_register_operand (rtx op, enum machine_mode mode)
8896 if (op == CONST0_RTX (mode) && TARGET_SHMEDIA)
8897 return 1;
8898 return register_operand (op, mode);
8902 cmpsi_operand (rtx op, enum machine_mode mode)
8904 if (GET_CODE (op) == REG && REGNO (op) == T_REG
8905 && GET_MODE (op) == SImode)
8906 return 1;
8907 return arith_operand (op, mode);
8910 static rtx emit_load_ptr (rtx, rtx);
8912 static rtx
8913 emit_load_ptr (rtx reg, rtx addr)
8915 rtx mem = gen_rtx_MEM (ptr_mode, addr);
8917 if (Pmode != ptr_mode)
8918 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
8919 return emit_move_insn (reg, mem);
8922 void
8923 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
8924 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
8925 tree function)
8927 CUMULATIVE_ARGS cum;
8928 int structure_value_byref = 0;
8929 rtx this, this_value, sibcall, insns, funexp;
8930 tree funtype = TREE_TYPE (function);
8931 int simple_add = CONST_OK_FOR_ADD (delta);
8932 int did_load = 0;
8933 rtx scratch0, scratch1, scratch2;
8935 reload_completed = 1;
8936 epilogue_completed = 1;
8937 no_new_pseudos = 1;
8938 current_function_uses_only_leaf_regs = 1;
8940 emit_note (NOTE_INSN_PROLOGUE_END);
8942 /* Find the "this" pointer. We have such a wide range of ABIs for the
8943 SH that it's best to do this completely machine independently.
8944 "this" is passed as first argument, unless a structure return pointer
8945 comes first, in which case "this" comes second. */
8946 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0);
8947 #ifndef PCC_STATIC_STRUCT_RETURN
8948 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
8949 structure_value_byref = 1;
8950 #endif /* not PCC_STATIC_STRUCT_RETURN */
8951 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
8953 tree ptype = build_pointer_type (TREE_TYPE (funtype));
8955 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
8957 this = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
8959 /* For SHcompact, we only have r0 for a scratch register: r1 is the
8960 static chain pointer (even if you can't have nested virtual functions
8961 right now, someone might implement them sometime), and the rest of the
8962 registers are used for argument passing, are callee-saved, or reserved. */
8963 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
8964 if (! TARGET_SH5)
8966 scratch1 = gen_rtx_REG (ptr_mode, 1);
8967 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
8968 pointing where to return struct values. */
8969 scratch2 = gen_rtx_REG (Pmode, 3);
8971 else if (TARGET_SHMEDIA)
8973 scratch1 = gen_rtx_REG (ptr_mode, 21);
8974 scratch2 = gen_rtx_REG (Pmode, TR0_REG);
8977 this_value = plus_constant (this, delta);
8978 if (vcall_offset
8979 && (simple_add || scratch0 != scratch1)
8980 && strict_memory_address_p (ptr_mode, this_value))
8982 emit_load_ptr (scratch0, this_value);
8983 did_load = 1;
8986 if (!delta)
8987 ; /* Do nothing. */
8988 else if (simple_add)
8989 emit_move_insn (this, this_value);
8990 else
8992 emit_move_insn (scratch1, GEN_INT (delta));
8993 emit_insn (gen_add2_insn (this, scratch1));
8996 if (vcall_offset)
8998 rtx offset_addr;
9000 if (!did_load)
9001 emit_load_ptr (scratch0, this);
9003 offset_addr = plus_constant (scratch0, vcall_offset);
9004 if (strict_memory_address_p (ptr_mode, offset_addr))
9005 ; /* Do nothing. */
9006 else if (! TARGET_SH5)
9008 /* scratch0 != scratch1, and we have indexed loads. Get better
9009 schedule by loading the offset into r1 and using an indexed
9010 load - then the load of r1 can issue before the load from
9011 (this + delta) finishes. */
9012 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9013 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
9015 else if (CONST_OK_FOR_ADD (vcall_offset))
9017 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
9018 offset_addr = scratch0;
9020 else if (scratch0 != scratch1)
9022 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9023 emit_insn (gen_add2_insn (scratch0, scratch1));
9024 offset_addr = scratch0;
9026 else
9027 abort (); /* FIXME */
9028 emit_load_ptr (scratch0, offset_addr);
9030 if (Pmode != ptr_mode)
9031 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
9032 emit_insn (gen_add2_insn (this, scratch0));
9035 /* Generate a tail call to the target function. */
9036 if (! TREE_USED (function))
9038 assemble_external (function);
9039 TREE_USED (function) = 1;
9041 funexp = XEXP (DECL_RTL (function), 0);
9042 emit_move_insn (scratch2, funexp);
9043 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
9044 sibcall = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
9045 SIBLING_CALL_P (sibcall) = 1;
9046 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this);
9047 emit_barrier ();
9049 /* Run just enough of rest_of_compilation to do scheduling and get
9050 the insns emitted. Note that use_thunk calls
9051 assemble_start_function and assemble_end_function. */
9053 insn_locators_initialize ();
9054 insns = get_insns ();
9056 if (optimize > 0 && flag_schedule_insns_after_reload)
9059 find_basic_blocks (insns, max_reg_num (), rtl_dump_file);
9060 life_analysis (insns, rtl_dump_file, PROP_FINAL);
9062 split_all_insns (1);
9064 schedule_insns (rtl_dump_file);
9067 sh_reorg ();
9069 if (optimize > 0 && flag_delayed_branch)
9070 dbr_schedule (insns, rtl_dump_file);
9071 shorten_branches (insns);
9072 final_start_function (insns, file, 1);
9073 final (insns, file, 1, 0);
9074 final_end_function ();
9076 if (optimize > 0 && flag_schedule_insns_after_reload)
9078 /* Release all memory allocated by flow. */
9079 free_basic_block_vars (0);
9081 /* Release all memory held by regsets now. */
9082 regset_release_memory ();
9085 reload_completed = 0;
9086 epilogue_completed = 0;
9087 no_new_pseudos = 0;
9091 function_symbol (const char *name)
9093 rtx sym = gen_rtx_SYMBOL_REF (Pmode, name);
9094 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
9095 return sym;
9098 /* Find the number of a general purpose register in S. */
9099 static int
9100 scavenge_reg (HARD_REG_SET *s)
9102 int r;
9103 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
9104 if (TEST_HARD_REG_BIT (*s, r))
9105 return r;
9106 return -1;
9110 sh_get_pr_initial_val (void)
9112 rtx val;
9114 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
9115 PR register on SHcompact, because it might be clobbered by the prologue.
9116 We check first if that is known to be the case. */
9117 if (TARGET_SHCOMPACT
9118 && ((current_function_args_info.call_cookie
9119 & ~ CALL_COOKIE_RET_TRAMP (1))
9120 || current_function_has_nonlocal_label))
9121 return gen_rtx_MEM (SImode, return_address_pointer_rtx);
9123 /* If we haven't finished rtl generation, there might be a nonlocal label
9124 that we haven't seen yet.
9125 ??? get_hard_reg_initial_val fails if it is called while no_new_pseudos
9126 is set, unless it has been called before for the same register. And even
9127 then, we end in trouble if we didn't use the register in the same
9128 basic block before. So call get_hard_reg_initial_val now and wrap it
9129 in an unspec if we might need to replace it. */
9130 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
9131 combine can put the pseudo returned by get_hard_reg_initial_val into
9132 instructions that need a general purpose registers, which will fail to
9133 be recognized when the pseudo becomes allocated to PR. */
9135 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
9136 if (TARGET_SH1)
9137 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
9138 return val;
9142 sh_expand_t_scc (enum rtx_code code, rtx target)
9144 rtx result = target;
9145 HOST_WIDE_INT val;
9147 if (GET_CODE (sh_compare_op0) != REG || REGNO (sh_compare_op0) != T_REG
9148 || GET_CODE (sh_compare_op1) != CONST_INT)
9149 return 0;
9150 if (GET_CODE (result) != REG)
9151 result = gen_reg_rtx (SImode);
9152 val = INTVAL (sh_compare_op1);
9153 if ((code == EQ && val == 1) || (code == NE && val == 0))
9154 emit_insn (gen_movt (result));
9155 else if ((code == EQ && val == 0) || (code == NE && val == 1))
9157 emit_insn (gen_rtx_CLOBBER (VOIDmode, result));
9158 emit_insn (gen_subc (result, result, result));
9159 emit_insn (gen_addsi3 (result, result, GEN_INT (1)));
9161 else if (code == EQ || code == NE)
9162 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
9163 else
9164 return 0;
9165 if (result != target)
9166 emit_move_insn (target, result);
9167 return 1;
9170 #include "gt-sh.h"