2002-06-05 David S. Miller <davem@redhat.com>
[official-gcc.git] / gcc / config / sh / sh.c
blobedd4c4998cb0ded3c1b5dec5c0a3f52ea10097c6
1 /* Output routines for GCC for Hitachi / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1997, 1997, 1998, 1999, 2000, 2001, 2002
3 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GNU CC.
9 GNU CC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
12 any later version.
14 GNU CC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GNU CC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
24 #include "config.h"
25 #include "system.h"
26 #include "insn-config.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "flags.h"
30 #include "expr.h"
31 #include "optabs.h"
32 #include "function.h"
33 #include "regs.h"
34 #include "hard-reg-set.h"
35 #include "output.h"
36 #include "insn-attr.h"
37 #include "toplev.h"
38 #include "recog.h"
39 #include "c-pragma.h"
40 #include "integrate.h"
41 #include "tm_p.h"
42 #include "target.h"
43 #include "target-def.h"
44 #include "real.h"
46 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
48 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
49 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
51 /* These are some macros to abstract register modes. */
52 #define CONST_OK_FOR_ADD(size) \
53 (TARGET_SHMEDIA ? CONST_OK_FOR_P (size) : CONST_OK_FOR_I (size))
54 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
55 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
56 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
58 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
59 int current_function_interrupt;
61 /* ??? The pragma interrupt support will not work for SH3. */
62 /* This is set by #pragma interrupt and #pragma trapa, and causes gcc to
63 output code for the next function appropriate for an interrupt handler. */
64 int pragma_interrupt;
66 /* This is set by the trap_exit attribute for functions. It specifies
67 a trap number to be used in a trapa instruction at function exit
68 (instead of an rte instruction). */
69 int trap_exit;
71 /* This is used by the sp_switch attribute for functions. It specifies
72 a variable holding the address of the stack the interrupt function
73 should switch to/from at entry/exit. */
74 rtx sp_switch;
76 /* This is set by #pragma trapa, and is similar to the above, except that
77 the compiler doesn't emit code to preserve all registers. */
78 static int pragma_trapa;
80 /* This is set by #pragma nosave_low_regs. This is useful on the SH3,
81 which has a separate set of low regs for User and Supervisor modes.
82 This should only be used for the lowest level of interrupts. Higher levels
83 of interrupts must save the registers in case they themselves are
84 interrupted. */
85 int pragma_nosave_low_regs;
87 /* This is used for communication between SETUP_INCOMING_VARARGS and
88 sh_expand_prologue. */
89 int current_function_anonymous_args;
91 /* Global variables for machine-dependent things. */
93 /* Which cpu are we scheduling for. */
94 enum processor_type sh_cpu;
96 /* Saved operands from the last compare to use when we generate an scc
97 or bcc insn. */
99 rtx sh_compare_op0;
100 rtx sh_compare_op1;
102 /* Provides the class number of the smallest class containing
103 reg number. */
105 int regno_reg_class[FIRST_PSEUDO_REGISTER] =
107 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
108 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
109 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
110 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
111 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
112 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
113 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
114 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
115 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
116 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
117 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
118 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
119 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
124 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
125 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
126 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
127 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
128 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
129 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
130 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
131 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
132 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
133 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
134 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
135 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
140 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
141 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
142 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
143 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
144 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
145 GENERAL_REGS,
148 char sh_register_names[FIRST_PSEUDO_REGISTER] \
149 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
151 char sh_additional_register_names[ADDREGNAMES_SIZE] \
152 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
153 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
155 /* Provide reg_class from a letter such as appears in the machine
156 description. */
158 const enum reg_class reg_class_from_letter[] =
160 /* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
161 /* e */ NO_REGS, /* f */ FP_REGS, /* g */ NO_REGS, /* h */ NO_REGS,
162 /* i */ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
163 /* m */ NO_REGS, /* n */ NO_REGS, /* o */ NO_REGS, /* p */ NO_REGS,
164 /* q */ NO_REGS, /* r */ NO_REGS, /* s */ NO_REGS, /* t */ T_REGS,
165 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
166 /* y */ FPUL_REGS, /* z */ R0_REGS
169 int assembler_dialect;
171 static void split_branches PARAMS ((rtx));
172 static int branch_dest PARAMS ((rtx));
173 static void force_into PARAMS ((rtx, rtx));
174 static void print_slot PARAMS ((rtx));
175 static rtx add_constant PARAMS ((rtx, enum machine_mode, rtx));
176 static void dump_table PARAMS ((rtx));
177 static int hi_const PARAMS ((rtx));
178 static int broken_move PARAMS ((rtx));
179 static int mova_p PARAMS ((rtx));
180 static rtx find_barrier PARAMS ((int, rtx, rtx));
181 static int noncall_uses_reg PARAMS ((rtx, rtx, rtx *));
182 static rtx gen_block_redirect PARAMS ((rtx, int, int));
183 static void output_stack_adjust PARAMS ((int, rtx, int, rtx (*) (rtx)));
184 static rtx frame_insn PARAMS ((rtx));
185 static rtx push PARAMS ((int));
186 static void pop PARAMS ((int));
187 static void push_regs PARAMS ((HOST_WIDE_INT *));
188 static void calc_live_regs PARAMS ((int *, HOST_WIDE_INT *));
189 static void mark_use PARAMS ((rtx, rtx *));
190 static HOST_WIDE_INT rounded_frame_size PARAMS ((int));
191 static rtx mark_constant_pool_use PARAMS ((rtx));
192 const struct attribute_spec sh_attribute_table[];
193 static tree sh_handle_interrupt_handler_attribute PARAMS ((tree *, tree, tree, int, bool *));
194 static tree sh_handle_sp_switch_attribute PARAMS ((tree *, tree, tree, int, bool *));
195 static tree sh_handle_trap_exit_attribute PARAMS ((tree *, tree, tree, int, bool *));
196 static void sh_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
197 static void sh_insert_attributes PARAMS ((tree, tree *));
198 static int sh_adjust_cost PARAMS ((rtx, rtx, rtx, int));
199 static int sh_use_dfa_interface PARAMS ((void));
200 static int sh_issue_rate PARAMS ((void));
202 static bool sh_cannot_modify_jumps_p PARAMS ((void));
203 static bool sh_ms_bitfield_layout_p PARAMS ((tree));
205 static void sh_encode_section_info PARAMS ((tree, int));
206 static const char *sh_strip_name_encoding PARAMS ((const char *));
208 /* Initialize the GCC target structure. */
209 #undef TARGET_ATTRIBUTE_TABLE
210 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
212 /* The next two are used for debug info when compiling with -gdwarf. */
213 #undef TARGET_ASM_UNALIGNED_HI_OP
214 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
215 #undef TARGET_ASM_UNALIGNED_SI_OP
216 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
218 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
219 #undef TARGET_ASM_UNALIGNED_DI_OP
220 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
221 #undef TARGET_ASM_ALIGNED_DI_OP
222 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
224 #undef TARGET_ASM_FUNCTION_EPILOGUE
225 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
227 #undef TARGET_INSERT_ATTRIBUTES
228 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
230 #undef TARGET_SCHED_ADJUST_COST
231 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
233 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
234 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
235 sh_use_dfa_interface
236 #undef TARGET_SCHED_ISSUE_RATE
237 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
239 #undef TARGET_CANNOT_MODIFY_JUMPS_P
240 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
242 #undef TARGET_MS_BITFIELD_LAYOUT_P
243 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
245 #undef TARGET_ENCODE_SECTION_INFO
246 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
247 #undef TARGET_STRIP_NAME_ENCODING
248 #define TARGET_STRIP_NAME_ENCODING sh_strip_name_encoding
250 struct gcc_target targetm = TARGET_INITIALIZER;
252 /* Print the operand address in x to the stream. */
254 void
255 print_operand_address (stream, x)
256 FILE *stream;
257 rtx x;
259 switch (GET_CODE (x))
261 case REG:
262 case SUBREG:
263 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
264 break;
266 case PLUS:
268 rtx base = XEXP (x, 0);
269 rtx index = XEXP (x, 1);
271 switch (GET_CODE (index))
273 case CONST_INT:
274 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
275 reg_names[true_regnum (base)]);
276 break;
278 case REG:
279 case SUBREG:
281 int base_num = true_regnum (base);
282 int index_num = true_regnum (index);
284 fprintf (stream, "@(r0,%s)",
285 reg_names[MAX (base_num, index_num)]);
286 break;
289 default:
290 debug_rtx (x);
291 abort ();
294 break;
296 case PRE_DEC:
297 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
298 break;
300 case POST_INC:
301 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
302 break;
304 default:
305 x = mark_constant_pool_use (x);
306 output_addr_const (stream, x);
307 break;
311 /* Print operand x (an rtx) in assembler syntax to file stream
312 according to modifier code.
314 '.' print a .s if insn needs delay slot
315 ',' print LOCAL_LABEL_PREFIX
316 '@' print trap, rte or rts depending upon pragma interruptness
317 '#' output a nop if there is nothing to put in the delay slot
318 'O' print a constant without the #
319 'R' print the LSW of a dp value - changes if in little endian
320 'S' print the MSW of a dp value - changes if in little endian
321 'T' print the next word of a dp value - same as 'R' in big endian mode.
322 'M' print an `x' if `m' will print `base,index'.
323 'm' print a pair `base,offset' or `base,index', for LD and ST.
324 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
325 'o' output an operator. */
327 void
328 print_operand (stream, x, code)
329 FILE *stream;
330 rtx x;
331 int code;
333 switch (code)
335 case '.':
336 if (final_sequence
337 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
338 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
339 break;
340 case ',':
341 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
342 break;
343 case '@':
345 int interrupt_handler;
347 if ((lookup_attribute
348 ("interrupt_handler",
349 DECL_ATTRIBUTES (current_function_decl)))
350 != NULL_TREE)
351 interrupt_handler = 1;
352 else
353 interrupt_handler = 0;
355 if (trap_exit)
356 fprintf (stream, "trapa #%d", trap_exit);
357 else if (interrupt_handler)
358 fprintf (stream, "rte");
359 else
360 fprintf (stream, "rts");
361 break;
363 case '#':
364 /* Output a nop if there's nothing in the delay slot. */
365 if (dbr_sequence_length () == 0)
366 fprintf (stream, "\n\tnop");
367 break;
368 case 'O':
369 x = mark_constant_pool_use (x);
370 output_addr_const (stream, x);
371 break;
372 case 'R':
373 fputs (reg_names[REGNO (x) + LSW], (stream));
374 break;
375 case 'S':
376 fputs (reg_names[REGNO (x) + MSW], (stream));
377 break;
378 case 'T':
379 /* Next word of a double. */
380 switch (GET_CODE (x))
382 case REG:
383 fputs (reg_names[REGNO (x) + 1], (stream));
384 break;
385 case MEM:
386 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
387 && GET_CODE (XEXP (x, 0)) != POST_INC)
388 x = adjust_address (x, SImode, 4);
389 print_operand_address (stream, XEXP (x, 0));
390 break;
391 default:
392 break;
394 break;
395 case 'o':
396 switch (GET_CODE (x))
398 case PLUS: fputs ("add", stream); break;
399 case MINUS: fputs ("sub", stream); break;
400 case MULT: fputs ("mul", stream); break;
401 case DIV: fputs ("div", stream); break;
402 default:
403 break;
405 break;
406 case 'M':
407 if (GET_CODE (x) == MEM
408 && GET_CODE (XEXP (x, 0)) == PLUS
409 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
410 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
411 fputc ('x', stream);
412 break;
414 case 'm':
415 if (GET_CODE (x) != MEM)
416 abort ();
417 x = XEXP (x, 0);
418 switch (GET_CODE (x))
420 case REG:
421 case SUBREG:
422 print_operand (stream, x, 0);
423 fputs (", 0", stream);
424 break;
426 case PLUS:
427 print_operand (stream, XEXP (x, 0), 0);
428 fputs (", ", stream);
429 print_operand (stream, XEXP (x, 1), 0);
430 break;
432 default:
433 abort ();
435 break;
437 case 'u':
438 if (GET_CODE (x) == CONST_INT)
440 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
441 break;
443 /* Fall through. */
445 default:
446 switch (GET_CODE (x))
448 /* FIXME: We need this on SHmedia32 because reload generates
449 some sign-extended HI or QI loads into DImode registers
450 but, because Pmode is SImode, the address ends up with a
451 subreg:SI of the DImode register. Maybe reload should be
452 fixed so as to apply alter_subreg to such loads? */
453 case SUBREG:
454 if (SUBREG_BYTE (x) != 0
455 || GET_CODE (SUBREG_REG (x)) != REG)
456 abort ();
458 x = SUBREG_REG (x);
459 /* Fall through. */
461 case REG:
462 if (FP_REGISTER_P (REGNO (x))
463 && GET_MODE (x) == V16SFmode)
464 fprintf ((stream), "mtrx%s", reg_names[REGNO (x)] + 2);
465 else if (FP_REGISTER_P (REGNO (x))
466 && GET_MODE (x) == V4SFmode)
467 fprintf ((stream), "fv%s", reg_names[REGNO (x)] + 2);
468 else if (GET_CODE (x) == REG
469 && GET_MODE (x) == V2SFmode)
470 fprintf ((stream), "fp%s", reg_names[REGNO (x)] + 2);
471 else if (FP_REGISTER_P (REGNO (x))
472 && GET_MODE_SIZE (GET_MODE (x)) > 4)
473 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
474 else
475 fputs (reg_names[REGNO (x)], (stream));
476 break;
478 case MEM:
479 output_address (XEXP (x, 0));
480 break;
482 case CONST:
483 if (TARGET_SHMEDIA
484 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
485 && GET_MODE (XEXP (x, 0)) == DImode
486 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
487 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
489 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
491 fputc ('(', stream);
492 if (GET_CODE (val) == ASHIFTRT)
494 fputc ('(', stream);
495 if (GET_CODE (XEXP (val, 0)) == CONST)
496 fputc ('(', stream);
497 output_addr_const (stream, XEXP (val, 0));
498 if (GET_CODE (XEXP (val, 0)) == CONST)
499 fputc (')', stream);
500 fputs (" >> ", stream);
501 output_addr_const (stream, XEXP (val, 1));
502 fputc (')', stream);
504 else
506 if (GET_CODE (val) == CONST)
507 fputc ('(', stream);
508 output_addr_const (stream, val);
509 if (GET_CODE (val) == CONST)
510 fputc (')', stream);
512 fputs (" & 65535)", stream);
513 break;
516 /* Fall through. */
517 default:
518 if (TARGET_SH1)
519 fputc ('#', stream);
520 output_addr_const (stream, x);
521 break;
523 break;
527 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
528 static void
529 force_into (value, target)
530 rtx value, target;
532 value = force_operand (value, target);
533 if (! rtx_equal_p (value, target))
534 emit_insn (gen_move_insn (target, value));
537 /* Emit code to perform a block move. Choose the best method.
539 OPERANDS[0] is the destination.
540 OPERANDS[1] is the source.
541 OPERANDS[2] is the size.
542 OPERANDS[3] is the alignment safe to use. */
545 expand_block_move (operands)
546 rtx *operands;
548 int align = INTVAL (operands[3]);
549 int constp = (GET_CODE (operands[2]) == CONST_INT);
550 int bytes = (constp ? INTVAL (operands[2]) : 0);
552 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
553 alignment, or if it isn't a multiple of 4 bytes, then fail. */
554 if (! constp || align < 4 || (bytes % 4 != 0))
555 return 0;
557 if (TARGET_HARD_SH4)
559 if (bytes < 12)
560 return 0;
561 else if (bytes == 12)
563 tree entry_name;
564 rtx sym;
565 rtx func_addr_rtx;
566 rtx r4 = gen_rtx (REG, SImode, 4);
567 rtx r5 = gen_rtx (REG, SImode, 5);
569 entry_name = get_identifier ("__movstrSI12_i4");
571 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (entry_name));
572 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
573 force_into (XEXP (operands[0], 0), r4);
574 force_into (XEXP (operands[1], 0), r5);
575 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
576 return 1;
578 else if (! TARGET_SMALLCODE)
580 tree entry_name;
581 rtx sym;
582 rtx func_addr_rtx;
583 int dwords;
584 rtx r4 = gen_rtx (REG, SImode, 4);
585 rtx r5 = gen_rtx (REG, SImode, 5);
586 rtx r6 = gen_rtx (REG, SImode, 6);
588 entry_name = get_identifier (bytes & 4
589 ? "__movstr_i4_odd"
590 : "__movstr_i4_even");
591 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (entry_name));
592 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
593 force_into (XEXP (operands[0], 0), r4);
594 force_into (XEXP (operands[1], 0), r5);
596 dwords = bytes >> 3;
597 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
598 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
599 return 1;
601 else
602 return 0;
604 if (bytes < 64)
606 char entry[30];
607 tree entry_name;
608 rtx sym;
609 rtx func_addr_rtx;
610 rtx r4 = gen_rtx_REG (SImode, 4);
611 rtx r5 = gen_rtx_REG (SImode, 5);
613 sprintf (entry, "__movstrSI%d", bytes);
614 entry_name = get_identifier (entry);
615 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (entry_name));
616 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
617 force_into (XEXP (operands[0], 0), r4);
618 force_into (XEXP (operands[1], 0), r5);
619 emit_insn (gen_block_move_real (func_addr_rtx));
620 return 1;
623 /* This is the same number of bytes as a memcpy call, but to a different
624 less common function name, so this will occasionally use more space. */
625 if (! TARGET_SMALLCODE)
627 tree entry_name;
628 rtx sym;
629 rtx func_addr_rtx;
630 int final_switch, while_loop;
631 rtx r4 = gen_rtx_REG (SImode, 4);
632 rtx r5 = gen_rtx_REG (SImode, 5);
633 rtx r6 = gen_rtx_REG (SImode, 6);
635 entry_name = get_identifier ("__movstr");
636 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (entry_name));
637 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
638 force_into (XEXP (operands[0], 0), r4);
639 force_into (XEXP (operands[1], 0), r5);
641 /* r6 controls the size of the move. 16 is decremented from it
642 for each 64 bytes moved. Then the negative bit left over is used
643 as an index into a list of move instructions. e.g., a 72 byte move
644 would be set up with size(r6) = 14, for one iteration through the
645 big while loop, and a switch of -2 for the last part. */
647 final_switch = 16 - ((bytes / 4) % 16);
648 while_loop = ((bytes / 4) / 16 - 1) * 16;
649 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
650 emit_insn (gen_block_lump_real (func_addr_rtx));
651 return 1;
654 return 0;
657 /* Prepare operands for a move define_expand; specifically, one of the
658 operands must be in a register. */
661 prepare_move_operands (operands, mode)
662 rtx operands[];
663 enum machine_mode mode;
665 if ((mode == SImode || mode == DImode) && flag_pic)
667 rtx temp;
668 if (SYMBOLIC_CONST_P (operands[1]))
670 if (GET_CODE (operands[0]) == MEM)
671 operands[1] = force_reg (Pmode, operands[1]);
672 else if (GET_CODE (operands[1]) == LABEL_REF
673 && target_reg_operand (operands[0], mode))
674 /* It's ok. */;
675 else
677 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
678 operands[1] = legitimize_pic_address (operands[1], mode, temp);
681 else if (GET_CODE (operands[1]) == CONST
682 && GET_CODE (XEXP (operands[1], 0)) == PLUS
683 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
685 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
686 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
687 mode, temp);
688 operands[1] = expand_binop (mode, add_optab, temp,
689 XEXP (XEXP (operands[1], 0), 1),
690 no_new_pseudos ? temp
691 : gen_reg_rtx (Pmode),
692 0, OPTAB_LIB_WIDEN);
696 if (! reload_in_progress && ! reload_completed)
698 /* Copy the source to a register if both operands aren't registers. */
699 if (! register_operand (operands[0], mode)
700 && ! register_operand (operands[1], mode))
701 operands[1] = copy_to_mode_reg (mode, operands[1]);
703 /* This case can happen while generating code to move the result
704 of a library call to the target. Reject `st r0,@(rX,rY)' because
705 reload will fail to find a spill register for rX, since r0 is already
706 being used for the source. */
707 else if (GET_CODE (operands[1]) == REG && REGNO (operands[1]) == 0
708 && GET_CODE (operands[0]) == MEM
709 && GET_CODE (XEXP (operands[0], 0)) == PLUS
710 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
711 operands[1] = copy_to_mode_reg (mode, operands[1]);
714 return 0;
717 /* Prepare the operands for an scc instruction; make sure that the
718 compare has been done. */
720 prepare_scc_operands (code)
721 enum rtx_code code;
723 rtx t_reg = gen_rtx_REG (SImode, T_REG);
724 enum rtx_code oldcode = code;
725 enum machine_mode mode;
727 /* First need a compare insn. */
728 switch (code)
730 case NE:
731 /* It isn't possible to handle this case. */
732 abort ();
733 case LT:
734 code = GT;
735 break;
736 case LE:
737 code = GE;
738 break;
739 case LTU:
740 code = GTU;
741 break;
742 case LEU:
743 code = GEU;
744 break;
745 default:
746 break;
748 if (code != oldcode)
750 rtx tmp = sh_compare_op0;
751 sh_compare_op0 = sh_compare_op1;
752 sh_compare_op1 = tmp;
755 mode = GET_MODE (sh_compare_op0);
756 if (mode == VOIDmode)
757 mode = GET_MODE (sh_compare_op1);
759 sh_compare_op0 = force_reg (mode, sh_compare_op0);
760 if ((code != EQ && code != NE
761 && (sh_compare_op1 != const0_rtx
762 || code == GTU || code == GEU || code == LTU || code == LEU))
763 || (mode == DImode && sh_compare_op1 != const0_rtx)
764 || (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT))
765 sh_compare_op1 = force_reg (mode, sh_compare_op1);
767 if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
768 (mode == SFmode ? emit_sf_insn : emit_df_insn)
769 (gen_rtx (PARALLEL, VOIDmode, gen_rtvec (2,
770 gen_rtx (SET, VOIDmode, t_reg,
771 gen_rtx (code, SImode,
772 sh_compare_op0, sh_compare_op1)),
773 gen_rtx (USE, VOIDmode, get_fpscr_rtx ()))));
774 else
775 emit_insn (gen_rtx (SET, VOIDmode, t_reg,
776 gen_rtx (code, SImode, sh_compare_op0,
777 sh_compare_op1)));
779 return t_reg;
782 /* Called from the md file, set up the operands of a compare instruction. */
784 void
785 from_compare (operands, code)
786 rtx *operands;
787 int code;
789 enum machine_mode mode = GET_MODE (sh_compare_op0);
790 rtx insn;
791 if (mode == VOIDmode)
792 mode = GET_MODE (sh_compare_op1);
793 if (code != EQ
794 || mode == DImode
795 || (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT))
797 /* Force args into regs, since we can't use constants here. */
798 sh_compare_op0 = force_reg (mode, sh_compare_op0);
799 if (sh_compare_op1 != const0_rtx
800 || code == GTU || code == GEU
801 || (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT))
802 sh_compare_op1 = force_reg (mode, sh_compare_op1);
804 if (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
806 from_compare (operands, GT);
807 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
809 else
810 insn = gen_rtx_SET (VOIDmode,
811 gen_rtx_REG (SImode, T_REG),
812 gen_rtx (code, SImode, sh_compare_op0,
813 sh_compare_op1));
814 if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
816 insn = gen_rtx (PARALLEL, VOIDmode,
817 gen_rtvec (2, insn,
818 gen_rtx (USE, VOIDmode, get_fpscr_rtx ())));
819 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
821 else
822 emit_insn (insn);
825 /* Functions to output assembly code. */
827 /* Return a sequence of instructions to perform DI or DF move.
829 Since the SH cannot move a DI or DF in one instruction, we have
830 to take care when we see overlapping source and dest registers. */
832 const char *
833 output_movedouble (insn, operands, mode)
834 rtx insn ATTRIBUTE_UNUSED;
835 rtx operands[];
836 enum machine_mode mode;
838 rtx dst = operands[0];
839 rtx src = operands[1];
841 if (GET_CODE (dst) == MEM
842 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
843 return "mov.l %T1,%0\n\tmov.l %1,%0";
845 if (register_operand (dst, mode)
846 && register_operand (src, mode))
848 if (REGNO (src) == MACH_REG)
849 return "sts mach,%S0\n\tsts macl,%R0";
851 /* When mov.d r1,r2 do r2->r3 then r1->r2;
852 when mov.d r1,r0 do r1->r0 then r2->r1. */
854 if (REGNO (src) + 1 == REGNO (dst))
855 return "mov %T1,%T0\n\tmov %1,%0";
856 else
857 return "mov %1,%0\n\tmov %T1,%T0";
859 else if (GET_CODE (src) == CONST_INT)
861 if (INTVAL (src) < 0)
862 output_asm_insn ("mov #-1,%S0", operands);
863 else
864 output_asm_insn ("mov #0,%S0", operands);
866 return "mov %1,%R0";
868 else if (GET_CODE (src) == MEM)
870 int ptrreg = -1;
871 int dreg = REGNO (dst);
872 rtx inside = XEXP (src, 0);
874 if (GET_CODE (inside) == REG)
875 ptrreg = REGNO (inside);
876 else if (GET_CODE (inside) == SUBREG)
877 ptrreg = subreg_regno (inside);
878 else if (GET_CODE (inside) == PLUS)
880 ptrreg = REGNO (XEXP (inside, 0));
881 /* ??? A r0+REG address shouldn't be possible here, because it isn't
882 an offsettable address. Unfortunately, offsettable addresses use
883 QImode to check the offset, and a QImode offsettable address
884 requires r0 for the other operand, which is not currently
885 supported, so we can't use the 'o' constraint.
886 Thus we must check for and handle r0+REG addresses here.
887 We punt for now, since this is likely very rare. */
888 if (GET_CODE (XEXP (inside, 1)) == REG)
889 abort ();
891 else if (GET_CODE (inside) == LABEL_REF)
892 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
893 else if (GET_CODE (inside) == POST_INC)
894 return "mov.l %1,%0\n\tmov.l %1,%T0";
895 else
896 abort ();
898 /* Work out the safe way to copy. Copy into the second half first. */
899 if (dreg == ptrreg)
900 return "mov.l %T1,%T0\n\tmov.l %1,%0";
903 return "mov.l %1,%0\n\tmov.l %T1,%T0";
906 /* Print an instruction which would have gone into a delay slot after
907 another instruction, but couldn't because the other instruction expanded
908 into a sequence where putting the slot insn at the end wouldn't work. */
910 static void
911 print_slot (insn)
912 rtx insn;
914 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 0, 1);
916 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
919 const char *
920 output_far_jump (insn, op)
921 rtx insn;
922 rtx op;
924 struct { rtx lab, reg, op; } this;
925 rtx braf_base_lab = NULL_RTX;
926 const char *jump;
927 int far;
928 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
930 this.lab = gen_label_rtx ();
932 if (TARGET_SH2
933 && offset >= -32764
934 && offset - get_attr_length (insn) <= 32766)
936 far = 0;
937 jump = "mov.w %O0,%1; braf %1";
939 else
941 far = 1;
942 if (flag_pic)
944 if (TARGET_SH2)
945 jump = "mov.l %O0,%1; braf %1";
946 else
947 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
949 else
950 jump = "mov.l %O0,%1; jmp @%1";
952 /* If we have a scratch register available, use it. */
953 if (GET_CODE (PREV_INSN (insn)) == INSN
954 && INSN_CODE (PREV_INSN (insn)) == CODE_FOR_indirect_jump_scratch)
956 this.reg = SET_DEST (PATTERN (PREV_INSN (insn)));
957 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
958 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
959 output_asm_insn (jump, &this.lab);
960 if (dbr_sequence_length ())
961 print_slot (final_sequence);
962 else
963 output_asm_insn ("nop", 0);
965 else
967 /* Output the delay slot insn first if any. */
968 if (dbr_sequence_length ())
969 print_slot (final_sequence);
971 this.reg = gen_rtx_REG (SImode, 13);
972 /* We must keep the stack aligned to 8-byte boundaries on SH5.
973 Fortunately, MACL is fixed and call-clobbered, and we never
974 need its value across jumps, so save r13 in it instead of in
975 the stack. */
976 if (TARGET_SH5)
977 output_asm_insn ("lds r13, macl", 0);
978 else
979 output_asm_insn ("mov.l r13,@-r15", 0);
980 output_asm_insn (jump, &this.lab);
981 if (TARGET_SH5)
982 output_asm_insn ("sts macl, r13", 0);
983 else
984 output_asm_insn ("mov.l @r15+,r13", 0);
986 if (far && flag_pic && TARGET_SH2)
988 braf_base_lab = gen_label_rtx ();
989 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
990 CODE_LABEL_NUMBER (braf_base_lab));
992 if (far)
993 output_asm_insn (".align 2", 0);
994 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
995 this.op = op;
996 if (far && flag_pic)
998 if (TARGET_SH2)
999 this.lab = braf_base_lab;
1000 output_asm_insn (".long %O2-%O0", &this.lab);
1002 else
1003 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1004 return "";
1007 /* Local label counter, used for constants in the pool and inside
1008 pattern branches. */
1010 static int lf = 100;
1012 /* Output code for ordinary branches. */
1014 const char *
1015 output_branch (logic, insn, operands)
1016 int logic;
1017 rtx insn;
1018 rtx *operands;
1020 switch (get_attr_length (insn))
1022 case 6:
1023 /* This can happen if filling the delay slot has caused a forward
1024 branch to exceed its range (we could reverse it, but only
1025 when we know we won't overextend other branches; this should
1026 best be handled by relaxation).
1027 It can also happen when other condbranches hoist delay slot insn
1028 from their destination, thus leading to code size increase.
1029 But the branch will still be in the range -4092..+4098 bytes. */
1031 if (! TARGET_RELAX)
1033 int label = lf++;
1034 /* The call to print_slot will clobber the operands. */
1035 rtx op0 = operands[0];
1037 /* If the instruction in the delay slot is annulled (true), then
1038 there is no delay slot where we can put it now. The only safe
1039 place for it is after the label. final will do that by default. */
1041 if (final_sequence
1042 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
1044 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1045 ASSEMBLER_DIALECT ? "/" : ".", label);
1046 print_slot (final_sequence);
1048 else
1049 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1051 output_asm_insn ("bra\t%l0", &op0);
1052 fprintf (asm_out_file, "\tnop\n");
1053 ASM_OUTPUT_INTERNAL_LABEL(asm_out_file, "LF", label);
1055 return "";
1057 /* When relaxing, handle this like a short branch. The linker
1058 will fix it up if it still doesn't fit after relaxation. */
1059 case 2:
1060 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1061 default:
1062 /* There should be no longer branches now - that would
1063 indicate that something has destroyed the branches set
1064 up in machine_dependent_reorg. */
1065 abort ();
1069 const char *
1070 output_branchy_insn (code, template, insn, operands)
1071 enum rtx_code code;
1072 const char *template;
1073 rtx insn;
1074 rtx *operands;
1076 rtx next_insn = NEXT_INSN (insn);
1078 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
1080 rtx src = SET_SRC (PATTERN (next_insn));
1081 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
1083 /* Following branch not taken */
1084 operands[9] = gen_label_rtx ();
1085 emit_label_after (operands[9], next_insn);
1086 INSN_ADDRESSES_NEW (operands[9],
1087 INSN_ADDRESSES (INSN_UID (next_insn))
1088 + get_attr_length (next_insn));
1089 return template;
1091 else
1093 int offset = (branch_dest (next_insn)
1094 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
1095 if (offset >= -252 && offset <= 258)
1097 if (GET_CODE (src) == IF_THEN_ELSE)
1098 /* branch_true */
1099 src = XEXP (src, 1);
1100 operands[9] = src;
1101 return template;
1105 operands[9] = gen_label_rtx ();
1106 emit_label_after (operands[9], insn);
1107 INSN_ADDRESSES_NEW (operands[9],
1108 INSN_ADDRESSES (INSN_UID (insn))
1109 + get_attr_length (insn));
1110 return template;
1113 const char *
1114 output_ieee_ccmpeq (insn, operands)
1115 rtx insn, *operands;
1117 return output_branchy_insn (NE, "bt\t%l9\\;fcmp/eq\t%1,%0", insn, operands);
1120 /* Output to FILE the start of the assembler file. */
1122 void
1123 output_file_start (file)
1124 FILE *file;
1126 output_file_directive (file, main_input_filename);
1128 /* Switch to the data section so that the coffsem symbol
1129 isn't in the text section. */
1130 data_section ();
1132 if (TARGET_LITTLE_ENDIAN)
1133 fprintf (file, "\t.little\n");
1135 if (TARGET_SHCOMPACT)
1136 fprintf (file, "\t.mode\tSHcompact\n");
1137 else if (TARGET_SHMEDIA)
1138 fprintf (file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
1139 TARGET_SHMEDIA64 ? 64 : 32);
1142 /* Actual number of instructions used to make a shift by N. */
1143 static const char ashiftrt_insns[] =
1144 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
1146 /* Left shift and logical right shift are the same. */
1147 static const char shift_insns[] =
1148 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1150 /* Individual shift amounts needed to get the above length sequences.
1151 One bit right shifts clobber the T bit, so when possible, put one bit
1152 shifts in the middle of the sequence, so the ends are eligible for
1153 branch delay slots. */
1154 static const short shift_amounts[32][5] = {
1155 {0}, {1}, {2}, {2, 1},
1156 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
1157 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1158 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
1159 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1160 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1161 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1162 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1164 /* Likewise, but for shift amounts < 16, up to three highmost bits
1165 might be clobbered. This is typically used when combined with some
1166 kind of sign or zero extension. */
1168 static const char ext_shift_insns[] =
1169 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1171 static const short ext_shift_amounts[32][4] = {
1172 {0}, {1}, {2}, {2, 1},
1173 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
1174 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1175 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
1176 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1177 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1178 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1179 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1181 /* Assuming we have a value that has been sign-extended by at least one bit,
1182 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
1183 to shift it by N without data loss, and quicker than by other means? */
1184 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
1186 /* This is used in length attributes in sh.md to help compute the length
1187 of arbitrary constant shift instructions. */
1190 shift_insns_rtx (insn)
1191 rtx insn;
1193 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1194 int shift_count = INTVAL (XEXP (set_src, 1));
1195 enum rtx_code shift_code = GET_CODE (set_src);
1197 switch (shift_code)
1199 case ASHIFTRT:
1200 return ashiftrt_insns[shift_count];
1201 case LSHIFTRT:
1202 case ASHIFT:
1203 return shift_insns[shift_count];
1204 default:
1205 abort();
1209 /* Return the cost of a shift. */
1212 shiftcosts (x)
1213 rtx x;
1215 int value;
1217 if (TARGET_SHMEDIA)
1218 return 1;
1220 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
1222 if (GET_MODE (x) == DImode
1223 && GET_CODE (XEXP (x, 1)) == CONST_INT
1224 && INTVAL (XEXP (x, 1)) == 1)
1225 return 2;
1227 /* Everything else is invalid, because there is no pattern for it. */
1228 return 10000;
1230 /* If shift by a non constant, then this will be expensive. */
1231 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1232 return SH_DYNAMIC_SHIFT_COST;
1234 value = INTVAL (XEXP (x, 1));
1236 /* Otherwise, return the true cost in instructions. */
1237 if (GET_CODE (x) == ASHIFTRT)
1239 int cost = ashiftrt_insns[value];
1240 /* If SH3, then we put the constant in a reg and use shad. */
1241 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
1242 cost = 1 + SH_DYNAMIC_SHIFT_COST;
1243 return cost;
1245 else
1246 return shift_insns[value];
1249 /* Return the cost of an AND operation. */
1252 andcosts (x)
1253 rtx x;
1255 int i;
1257 /* Anding with a register is a single cycle and instruction. */
1258 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1259 return 1;
1261 i = INTVAL (XEXP (x, 1));
1263 if (TARGET_SHMEDIA)
1265 if ((GET_CODE (XEXP (x, 1)) == CONST_INT
1266 && CONST_OK_FOR_J (INTVAL (XEXP (x, 1))))
1267 || EXTRA_CONSTRAINT_S (XEXP (x, 1)))
1268 return 1;
1269 else
1270 return 2;
1273 /* These constants are single cycle extu.[bw] instructions. */
1274 if (i == 0xff || i == 0xffff)
1275 return 1;
1276 /* Constants that can be used in an and immediate instruction is a single
1277 cycle, but this requires r0, so make it a little more expensive. */
1278 if (CONST_OK_FOR_L (i))
1279 return 2;
1280 /* Constants that can be loaded with a mov immediate and an and.
1281 This case is probably unnecessary. */
1282 if (CONST_OK_FOR_I (i))
1283 return 2;
1284 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1285 This case is probably unnecessary. */
1286 return 3;
1289 /* Return the cost of an addition or a subtraction. */
1292 addsubcosts (x)
1293 rtx x;
1295 /* Adding a register is a single cycle insn. */
1296 if (GET_CODE (XEXP (x, 1)) == REG
1297 || GET_CODE (XEXP (x, 1)) == SUBREG)
1298 return 1;
1300 /* Likewise for small constants. */
1301 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1302 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
1303 return 1;
1305 if (TARGET_SHMEDIA)
1306 switch (GET_CODE (XEXP (x, 1)))
1308 case CONST:
1309 case LABEL_REF:
1310 case SYMBOL_REF:
1311 return TARGET_SHMEDIA64 ? 5 : 3;
1313 case CONST_INT:
1314 if (CONST_OK_FOR_J (INTVAL (XEXP (x, 1))))
1315 return 2;
1316 else if (CONST_OK_FOR_J (INTVAL (XEXP (x, 1)) >> 16))
1317 return 3;
1318 else if (CONST_OK_FOR_J ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
1319 return 4;
1321 /* Fall through. */
1322 default:
1323 return 5;
1326 /* Any other constant requires a 2 cycle pc-relative load plus an
1327 addition. */
1328 return 3;
1331 /* Return the cost of a multiply. */
1333 multcosts (x)
1334 rtx x ATTRIBUTE_UNUSED;
1336 if (TARGET_SHMEDIA)
1337 return 3;
1339 if (TARGET_SH2)
1341 /* We have a mul insn, so we can never take more than the mul and the
1342 read of the mac reg, but count more because of the latency and extra
1343 reg usage. */
1344 if (TARGET_SMALLCODE)
1345 return 2;
1346 return 3;
1349 /* If we're aiming at small code, then just count the number of
1350 insns in a multiply call sequence. */
1351 if (TARGET_SMALLCODE)
1352 return 5;
1354 /* Otherwise count all the insns in the routine we'd be calling too. */
1355 return 20;
1358 /* Code to expand a shift. */
1360 void
1361 gen_ashift (type, n, reg)
1362 int type;
1363 int n;
1364 rtx reg;
1366 /* Negative values here come from the shift_amounts array. */
1367 if (n < 0)
1369 if (type == ASHIFT)
1370 type = LSHIFTRT;
1371 else
1372 type = ASHIFT;
1373 n = -n;
1376 switch (type)
1378 case ASHIFTRT:
1379 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
1380 break;
1381 case LSHIFTRT:
1382 if (n == 1)
1383 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
1384 else
1385 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
1386 break;
1387 case ASHIFT:
1388 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
1389 break;
1393 /* Same for HImode */
1395 void
1396 gen_ashift_hi (type, n, reg)
1397 int type;
1398 int n;
1399 rtx reg;
1401 /* Negative values here come from the shift_amounts array. */
1402 if (n < 0)
1404 if (type == ASHIFT)
1405 type = LSHIFTRT;
1406 else
1407 type = ASHIFT;
1408 n = -n;
1411 switch (type)
1413 case ASHIFTRT:
1414 case LSHIFTRT:
1415 /* We don't have HImode right shift operations because using the
1416 ordinary 32 bit shift instructions for that doesn't generate proper
1417 zero/sign extension.
1418 gen_ashift_hi is only called in contexts where we know that the
1419 sign extension works out correctly. */
1421 int offset = 0;
1422 if (GET_CODE (reg) == SUBREG)
1424 offset = SUBREG_BYTE (reg);
1425 reg = SUBREG_REG (reg);
1427 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
1428 break;
1430 case ASHIFT:
1431 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
1432 break;
1436 /* Output RTL to split a constant shift into its component SH constant
1437 shift instructions. */
1439 void
1440 gen_shifty_op (code, operands)
1441 int code;
1442 rtx *operands;
1444 int value = INTVAL (operands[2]);
1445 int max, i;
1447 /* Truncate the shift count in case it is out of bounds. */
1448 value = value & 0x1f;
1450 if (value == 31)
1452 if (code == LSHIFTRT)
1454 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
1455 emit_insn (gen_movt (operands[0]));
1456 return;
1458 else if (code == ASHIFT)
1460 /* There is a two instruction sequence for 31 bit left shifts,
1461 but it requires r0. */
1462 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
1464 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
1465 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
1466 return;
1470 else if (value == 0)
1472 /* This can happen when not optimizing. We must output something here
1473 to prevent the compiler from aborting in final.c after the try_split
1474 call. */
1475 emit_insn (gen_nop ());
1476 return;
1479 max = shift_insns[value];
1480 for (i = 0; i < max; i++)
1481 gen_ashift (code, shift_amounts[value][i], operands[0]);
1484 /* Same as above, but optimized for values where the topmost bits don't
1485 matter. */
1487 void
1488 gen_shifty_hi_op (code, operands)
1489 int code;
1490 rtx *operands;
1492 int value = INTVAL (operands[2]);
1493 int max, i;
1494 void (*gen_fun) PARAMS ((int, int, rtx));
1496 /* This operation is used by and_shl for SImode values with a few
1497 high bits known to be cleared. */
1498 value &= 31;
1499 if (value == 0)
1501 emit_insn (gen_nop ());
1502 return;
1505 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
1506 if (code == ASHIFT)
1508 max = ext_shift_insns[value];
1509 for (i = 0; i < max; i++)
1510 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
1512 else
1513 /* When shifting right, emit the shifts in reverse order, so that
1514 solitary negative values come first. */
1515 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
1516 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
1519 /* Output RTL for an arithmetic right shift. */
1521 /* ??? Rewrite to use super-optimizer sequences. */
1524 expand_ashiftrt (operands)
1525 rtx *operands;
1527 rtx sym;
1528 rtx wrk;
1529 char func[18];
1530 tree func_name;
1531 int value;
1533 if (TARGET_SH3)
1535 if (GET_CODE (operands[2]) != CONST_INT)
1537 rtx count = copy_to_mode_reg (SImode, operands[2]);
1538 emit_insn (gen_negsi2 (count, count));
1539 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
1540 return 1;
1542 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
1543 > 1 + SH_DYNAMIC_SHIFT_COST)
1545 rtx count
1546 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
1547 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
1548 return 1;
1551 if (GET_CODE (operands[2]) != CONST_INT)
1552 return 0;
1554 value = INTVAL (operands[2]) & 31;
1556 if (value == 31)
1558 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
1559 return 1;
1561 else if (value >= 16 && value <= 19)
1563 wrk = gen_reg_rtx (SImode);
1564 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
1565 value -= 16;
1566 while (value--)
1567 gen_ashift (ASHIFTRT, 1, wrk);
1568 emit_move_insn (operands[0], wrk);
1569 return 1;
1571 /* Expand a short sequence inline, longer call a magic routine. */
1572 else if (value <= 5)
1574 wrk = gen_reg_rtx (SImode);
1575 emit_move_insn (wrk, operands[1]);
1576 while (value--)
1577 gen_ashift (ASHIFTRT, 1, wrk);
1578 emit_move_insn (operands[0], wrk);
1579 return 1;
1582 wrk = gen_reg_rtx (Pmode);
1584 /* Load the value into an arg reg and call a helper. */
1585 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
1586 sprintf (func, "__ashiftrt_r4_%d", value);
1587 func_name = get_identifier (func);
1588 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (func_name));
1589 emit_move_insn (wrk, sym);
1590 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
1591 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
1592 return 1;
1596 sh_dynamicalize_shift_p (count)
1597 rtx count;
1599 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
1602 /* Try to find a good way to implement the combiner pattern
1603 [(set (match_operand:SI 0 "register_operand" "r")
1604 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
1605 (match_operand:SI 2 "const_int_operand" "n"))
1606 (match_operand:SI 3 "const_int_operand" "n"))) .
1607 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
1608 return 0 for simple right / left or left/right shift combination.
1609 return 1 for a combination of shifts with zero_extend.
1610 return 2 for a combination of shifts with an AND that needs r0.
1611 return 3 for a combination of shifts with an AND that needs an extra
1612 scratch register, when the three highmost bits of the AND mask are clear.
1613 return 4 for a combination of shifts with an AND that needs an extra
1614 scratch register, when any of the three highmost bits of the AND mask
1615 is set.
1616 If ATTRP is set, store an initial right shift width in ATTRP[0],
1617 and the instruction length in ATTRP[1] . These values are not valid
1618 when returning 0.
1619 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
1620 shift_amounts for the last shift value that is to be used before the
1621 sign extend. */
1623 shl_and_kind (left_rtx, mask_rtx, attrp)
1624 rtx left_rtx, mask_rtx;
1625 int *attrp;
1627 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
1628 int left = INTVAL (left_rtx), right;
1629 int best = 0;
1630 int cost, best_cost = 10000;
1631 int best_right = 0, best_len = 0;
1632 int i;
1633 int can_ext;
1635 if (left < 0 || left > 31)
1636 return 0;
1637 if (GET_CODE (mask_rtx) == CONST_INT)
1638 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
1639 else
1640 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
1641 /* Can this be expressed as a right shift / left shift pair ? */
1642 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
1643 right = exact_log2 (lsb);
1644 mask2 = ~(mask + lsb - 1);
1645 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
1646 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
1647 if (! mask2)
1648 best_cost = shift_insns[right] + shift_insns[right + left];
1649 /* mask has no trailing zeroes <==> ! right */
1650 else if (! right && mask2 == ~(lsb2 - 1))
1652 int late_right = exact_log2 (lsb2);
1653 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
1655 /* Try to use zero extend */
1656 if (mask2 == ~(lsb2 - 1))
1658 int width, first;
1660 for (width = 8; width <= 16; width += 8)
1662 /* Can we zero-extend right away? */
1663 if (lsb2 == (unsigned HOST_WIDE_INT)1 << width)
1665 cost
1666 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
1667 if (cost < best_cost)
1669 best = 1;
1670 best_cost = cost;
1671 best_right = right;
1672 best_len = cost;
1673 if (attrp)
1674 attrp[2] = -1;
1676 continue;
1678 /* ??? Could try to put zero extend into initial right shift,
1679 or even shift a bit left before the right shift. */
1680 /* Determine value of first part of left shift, to get to the
1681 zero extend cut-off point. */
1682 first = width - exact_log2 (lsb2) + right;
1683 if (first >= 0 && right + left - first >= 0)
1685 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
1686 + ext_shift_insns[right + left - first];
1687 if (cost < best_cost)
1689 best = 1;
1690 best_cost = cost;
1691 best_right = right;
1692 best_len = cost;
1693 if (attrp)
1694 attrp[2] = first;
1699 /* Try to use r0 AND pattern */
1700 for (i = 0; i <= 2; i++)
1702 if (i > right)
1703 break;
1704 if (! CONST_OK_FOR_L (mask >> i))
1705 continue;
1706 cost = (i != 0) + 2 + ext_shift_insns[left + i];
1707 if (cost < best_cost)
1709 best = 2;
1710 best_cost = cost;
1711 best_right = i;
1712 best_len = cost - 1;
1715 /* Try to use a scratch register to hold the AND operand. */
1716 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT)3 << 30)) == 0;
1717 for (i = 0; i <= 2; i++)
1719 if (i > right)
1720 break;
1721 cost = (i != 0) + (CONST_OK_FOR_I (mask >> i) ? 2 : 3)
1722 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
1723 if (cost < best_cost)
1725 best = 4 - can_ext;
1726 best_cost = cost;
1727 best_right = i;
1728 best_len = cost - 1 - ! CONST_OK_FOR_I (mask >> i);
1732 if (attrp)
1734 attrp[0] = best_right;
1735 attrp[1] = best_len;
1737 return best;
1740 /* This is used in length attributes of the unnamed instructions
1741 corresponding to shl_and_kind return values of 1 and 2. */
1743 shl_and_length (insn)
1744 rtx insn;
1746 rtx set_src, left_rtx, mask_rtx;
1747 int attributes[3];
1749 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1750 left_rtx = XEXP (XEXP (set_src, 0), 1);
1751 mask_rtx = XEXP (set_src, 1);
1752 shl_and_kind (left_rtx, mask_rtx, attributes);
1753 return attributes[1];
1756 /* This is used in length attribute of the and_shl_scratch instruction. */
1759 shl_and_scr_length (insn)
1760 rtx insn;
1762 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1763 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
1764 rtx op = XEXP (set_src, 0);
1765 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
1766 op = XEXP (XEXP (op, 0), 0);
1767 return len + shift_insns[INTVAL (XEXP (op, 1))];
1770 /* Generating rtl? */
1771 extern int rtx_equal_function_value_matters;
1773 /* Generate rtl for instructions for which shl_and_kind advised a particular
1774 method of generating them, i.e. returned zero. */
1777 gen_shl_and (dest, left_rtx, mask_rtx, source)
1778 rtx dest, left_rtx, mask_rtx, source;
1780 int attributes[3];
1781 unsigned HOST_WIDE_INT mask;
1782 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
1783 int right, total_shift;
1784 void (*shift_gen_fun) PARAMS ((int, rtx*)) = gen_shifty_hi_op;
1786 right = attributes[0];
1787 total_shift = INTVAL (left_rtx) + right;
1788 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
1789 switch (kind)
1791 default:
1792 return -1;
1793 case 1:
1795 int first = attributes[2];
1796 rtx operands[3];
1798 if (first < 0)
1800 emit_insn ((mask << right) <= 0xff
1801 ? gen_zero_extendqisi2(dest,
1802 gen_lowpart (QImode, source))
1803 : gen_zero_extendhisi2(dest,
1804 gen_lowpart (HImode, source)));
1805 source = dest;
1807 if (source != dest)
1808 emit_insn (gen_movsi (dest, source));
1809 operands[0] = dest;
1810 if (right)
1812 operands[2] = GEN_INT (right);
1813 gen_shifty_hi_op (LSHIFTRT, operands);
1815 if (first > 0)
1817 operands[2] = GEN_INT (first);
1818 gen_shifty_hi_op (ASHIFT, operands);
1819 total_shift -= first;
1820 mask <<= first;
1822 if (first >= 0)
1823 emit_insn (mask <= 0xff
1824 ? gen_zero_extendqisi2(dest, gen_lowpart (QImode, dest))
1825 : gen_zero_extendhisi2(dest, gen_lowpart (HImode, dest)));
1826 if (total_shift > 0)
1828 operands[2] = GEN_INT (total_shift);
1829 gen_shifty_hi_op (ASHIFT, operands);
1831 break;
1833 case 4:
1834 shift_gen_fun = gen_shifty_op;
1835 case 3:
1836 /* If the topmost bit that matters is set, set the topmost bits
1837 that don't matter. This way, we might be able to get a shorter
1838 signed constant. */
1839 if (mask & ((HOST_WIDE_INT)1 << (31 - total_shift)))
1840 mask |= (HOST_WIDE_INT)~0 << (31 - total_shift);
1841 case 2:
1842 /* Don't expand fine-grained when combining, because that will
1843 make the pattern fail. */
1844 if (rtx_equal_function_value_matters
1845 || reload_in_progress || reload_completed)
1847 rtx operands[3];
1849 /* Cases 3 and 4 should be handled by this split
1850 only while combining */
1851 if (kind > 2)
1852 abort ();
1853 if (right)
1855 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
1856 source = dest;
1858 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
1859 if (total_shift)
1861 operands[0] = dest;
1862 operands[1] = dest;
1863 operands[2] = GEN_INT (total_shift);
1864 shift_gen_fun (ASHIFT, operands);
1866 break;
1868 else
1870 int neg = 0;
1871 if (kind != 4 && total_shift < 16)
1873 neg = -ext_shift_amounts[total_shift][1];
1874 if (neg > 0)
1875 neg -= ext_shift_amounts[total_shift][2];
1876 else
1877 neg = 0;
1879 emit_insn (gen_and_shl_scratch (dest, source,
1880 GEN_INT (right),
1881 GEN_INT (mask),
1882 GEN_INT (total_shift + neg),
1883 GEN_INT (neg)));
1884 emit_insn (gen_movsi (dest, dest));
1885 break;
1888 return 0;
1891 /* Try to find a good way to implement the combiner pattern
1892 [(set (match_operand:SI 0 "register_operand" "=r")
1893 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
1894 (match_operand:SI 2 "const_int_operand" "n")
1895 (match_operand:SI 3 "const_int_operand" "n")
1896 (const_int 0)))
1897 (clobber (reg:SI T_REG))]
1898 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
1899 return 0 for simple left / right shift combination.
1900 return 1 for left shift / 8 bit sign extend / left shift.
1901 return 2 for left shift / 16 bit sign extend / left shift.
1902 return 3 for left shift / 8 bit sign extend / shift / sign extend.
1903 return 4 for left shift / 16 bit sign extend / shift / sign extend.
1904 return 5 for left shift / 16 bit sign extend / right shift
1905 return 6 for < 8 bit sign extend / left shift.
1906 return 7 for < 8 bit sign extend / left shift / single right shift.
1907 If COSTP is nonzero, assign the calculated cost to *COSTP. */
1910 shl_sext_kind (left_rtx, size_rtx, costp)
1911 rtx left_rtx, size_rtx;
1912 int *costp;
1914 int left, size, insize, ext;
1915 int cost, best_cost;
1916 int kind;
1918 left = INTVAL (left_rtx);
1919 size = INTVAL (size_rtx);
1920 insize = size - left;
1921 if (insize <= 0)
1922 abort ();
1923 /* Default to left / right shift. */
1924 kind = 0;
1925 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
1926 if (size <= 16)
1928 /* 16 bit shift / sign extend / 16 bit shift */
1929 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
1930 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
1931 below, by alternative 3 or something even better. */
1932 if (cost < best_cost)
1934 kind = 5;
1935 best_cost = cost;
1938 /* Try a plain sign extend between two shifts. */
1939 for (ext = 16; ext >= insize; ext -= 8)
1941 if (ext <= size)
1943 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
1944 if (cost < best_cost)
1946 kind = ext / (unsigned) 8;
1947 best_cost = cost;
1950 /* Check if we can do a sloppy shift with a final signed shift
1951 restoring the sign. */
1952 if (EXT_SHIFT_SIGNED (size - ext))
1953 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
1954 /* If not, maybe it's still cheaper to do the second shift sloppy,
1955 and do a final sign extend? */
1956 else if (size <= 16)
1957 cost = ext_shift_insns[ext - insize] + 1
1958 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
1959 else
1960 continue;
1961 if (cost < best_cost)
1963 kind = ext / (unsigned) 8 + 2;
1964 best_cost = cost;
1967 /* Check if we can sign extend in r0 */
1968 if (insize < 8)
1970 cost = 3 + shift_insns[left];
1971 if (cost < best_cost)
1973 kind = 6;
1974 best_cost = cost;
1976 /* Try the same with a final signed shift. */
1977 if (left < 31)
1979 cost = 3 + ext_shift_insns[left + 1] + 1;
1980 if (cost < best_cost)
1982 kind = 7;
1983 best_cost = cost;
1987 if (TARGET_SH3)
1989 /* Try to use a dynamic shift. */
1990 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
1991 if (cost < best_cost)
1993 kind = 0;
1994 best_cost = cost;
1997 if (costp)
1998 *costp = cost;
1999 return kind;
2002 /* Function to be used in the length attribute of the instructions
2003 implementing this pattern. */
2006 shl_sext_length (insn)
2007 rtx insn;
2009 rtx set_src, left_rtx, size_rtx;
2010 int cost;
2012 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2013 left_rtx = XEXP (XEXP (set_src, 0), 1);
2014 size_rtx = XEXP (set_src, 1);
2015 shl_sext_kind (left_rtx, size_rtx, &cost);
2016 return cost;
2019 /* Generate rtl for this pattern */
2022 gen_shl_sext (dest, left_rtx, size_rtx, source)
2023 rtx dest, left_rtx, size_rtx, source;
2025 int kind;
2026 int left, size, insize, cost;
2027 rtx operands[3];
2029 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
2030 left = INTVAL (left_rtx);
2031 size = INTVAL (size_rtx);
2032 insize = size - left;
2033 switch (kind)
2035 case 1:
2036 case 2:
2037 case 3:
2038 case 4:
2040 int ext = kind & 1 ? 8 : 16;
2041 int shift2 = size - ext;
2043 /* Don't expand fine-grained when combining, because that will
2044 make the pattern fail. */
2045 if (! rtx_equal_function_value_matters
2046 && ! reload_in_progress && ! reload_completed)
2048 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2049 emit_insn (gen_movsi (dest, source));
2050 break;
2052 if (dest != source)
2053 emit_insn (gen_movsi (dest, source));
2054 operands[0] = dest;
2055 if (ext - insize)
2057 operands[2] = GEN_INT (ext - insize);
2058 gen_shifty_hi_op (ASHIFT, operands);
2060 emit_insn (kind & 1
2061 ? gen_extendqisi2(dest, gen_lowpart (QImode, dest))
2062 : gen_extendhisi2(dest, gen_lowpart (HImode, dest)));
2063 if (kind <= 2)
2065 if (shift2)
2067 operands[2] = GEN_INT (shift2);
2068 gen_shifty_op (ASHIFT, operands);
2071 else
2073 if (shift2 > 0)
2075 if (EXT_SHIFT_SIGNED (shift2))
2077 operands[2] = GEN_INT (shift2 + 1);
2078 gen_shifty_op (ASHIFT, operands);
2079 operands[2] = GEN_INT (1);
2080 gen_shifty_op (ASHIFTRT, operands);
2081 break;
2083 operands[2] = GEN_INT (shift2);
2084 gen_shifty_hi_op (ASHIFT, operands);
2086 else if (shift2)
2088 operands[2] = GEN_INT (-shift2);
2089 gen_shifty_hi_op (LSHIFTRT, operands);
2091 emit_insn (size <= 8
2092 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2093 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2095 break;
2097 case 5:
2099 int i = 16 - size;
2100 if (! rtx_equal_function_value_matters
2101 && ! reload_in_progress && ! reload_completed)
2102 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2103 else
2105 operands[0] = dest;
2106 operands[2] = GEN_INT (16 - insize);
2107 gen_shifty_hi_op (ASHIFT, operands);
2108 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2110 /* Don't use gen_ashrsi3 because it generates new pseudos. */
2111 while (--i >= 0)
2112 gen_ashift (ASHIFTRT, 1, dest);
2113 break;
2115 case 6:
2116 case 7:
2117 /* Don't expand fine-grained when combining, because that will
2118 make the pattern fail. */
2119 if (! rtx_equal_function_value_matters
2120 && ! reload_in_progress && ! reload_completed)
2122 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2123 emit_insn (gen_movsi (dest, source));
2124 break;
2126 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
2127 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
2128 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
2129 operands[0] = dest;
2130 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
2131 gen_shifty_op (ASHIFT, operands);
2132 if (kind == 7)
2133 emit_insn (gen_ashrsi3_k (dest, dest, GEN_INT (1)));
2134 break;
2135 default:
2136 return -1;
2138 return 0;
2141 /* Prefix a symbol_ref name with "datalabel". */
2144 gen_datalabel_ref (sym)
2145 rtx sym;
2147 if (GET_CODE (sym) == LABEL_REF)
2148 return gen_rtx_CONST (GET_MODE (sym),
2149 gen_rtx_UNSPEC (GET_MODE (sym),
2150 gen_rtvec (1, sym),
2151 UNSPEC_DATALABEL));
2153 if (GET_CODE (sym) != SYMBOL_REF)
2154 abort ();
2156 XSTR (sym, 0) = concat (SH_DATALABEL_ENCODING, XSTR (sym, 0), NULL);
2158 return sym;
2162 /* The SH cannot load a large constant into a register, constants have to
2163 come from a pc relative load. The reference of a pc relative load
2164 instruction must be less than 1k infront of the instruction. This
2165 means that we often have to dump a constant inside a function, and
2166 generate code to branch around it.
2168 It is important to minimize this, since the branches will slow things
2169 down and make things bigger.
2171 Worst case code looks like:
2173 mov.l L1,rn
2174 bra L2
2176 align
2177 L1: .long value
2181 mov.l L3,rn
2182 bra L4
2184 align
2185 L3: .long value
2189 We fix this by performing a scan before scheduling, which notices which
2190 instructions need to have their operands fetched from the constant table
2191 and builds the table.
2193 The algorithm is:
2195 scan, find an instruction which needs a pcrel move. Look forward, find the
2196 last barrier which is within MAX_COUNT bytes of the requirement.
2197 If there isn't one, make one. Process all the instructions between
2198 the find and the barrier.
2200 In the above example, we can tell that L3 is within 1k of L1, so
2201 the first move can be shrunk from the 3 insn+constant sequence into
2202 just 1 insn, and the constant moved to L3 to make:
2204 mov.l L1,rn
2206 mov.l L3,rn
2207 bra L4
2209 align
2210 L3:.long value
2211 L4:.long value
2213 Then the second move becomes the target for the shortening process. */
2215 typedef struct
2217 rtx value; /* Value in table. */
2218 rtx label; /* Label of value. */
2219 rtx wend; /* End of window. */
2220 enum machine_mode mode; /* Mode of value. */
2221 } pool_node;
2223 /* The maximum number of constants that can fit into one pool, since
2224 the pc relative range is 0...1020 bytes and constants are at least 4
2225 bytes long. */
2227 #define MAX_POOL_SIZE (1020/4)
2228 static pool_node pool_vector[MAX_POOL_SIZE];
2229 static int pool_size;
2230 static rtx pool_window_label;
2231 static int pool_window_last;
2233 /* ??? If we need a constant in HImode which is the truncated value of a
2234 constant we need in SImode, we could combine the two entries thus saving
2235 two bytes. Is this common enough to be worth the effort of implementing
2236 it? */
2238 /* ??? This stuff should be done at the same time that we shorten branches.
2239 As it is now, we must assume that all branches are the maximum size, and
2240 this causes us to almost always output constant pools sooner than
2241 necessary. */
2243 /* Add a constant to the pool and return its label. */
2245 static rtx
2246 add_constant (x, mode, last_value)
2247 rtx x;
2248 enum machine_mode mode;
2249 rtx last_value;
2251 int i;
2252 rtx lab, new, ref, newref;
2254 /* First see if we've already got it. */
2255 for (i = 0; i < pool_size; i++)
2257 if (x->code == pool_vector[i].value->code
2258 && mode == pool_vector[i].mode)
2260 if (x->code == CODE_LABEL)
2262 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
2263 continue;
2265 if (rtx_equal_p (x, pool_vector[i].value))
2267 lab = new = 0;
2268 if (! last_value
2269 || ! i
2270 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
2272 new = gen_label_rtx ();
2273 LABEL_REFS (new) = pool_vector[i].label;
2274 pool_vector[i].label = lab = new;
2276 if (lab && pool_window_label)
2278 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2279 ref = pool_vector[pool_window_last].wend;
2280 LABEL_NEXTREF (newref) = ref;
2281 pool_vector[pool_window_last].wend = newref;
2283 if (new)
2284 pool_window_label = new;
2285 pool_window_last = i;
2286 return lab;
2291 /* Need a new one. */
2292 pool_vector[pool_size].value = x;
2293 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
2294 lab = 0;
2295 else
2296 lab = gen_label_rtx ();
2297 pool_vector[pool_size].mode = mode;
2298 pool_vector[pool_size].label = lab;
2299 pool_vector[pool_size].wend = NULL_RTX;
2300 if (lab && pool_window_label)
2302 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2303 ref = pool_vector[pool_window_last].wend;
2304 LABEL_NEXTREF (newref) = ref;
2305 pool_vector[pool_window_last].wend = newref;
2307 if (lab)
2308 pool_window_label = lab;
2309 pool_window_last = pool_size;
2310 pool_size++;
2311 return lab;
2314 /* Output the literal table. */
2316 static void
2317 dump_table (scan)
2318 rtx scan;
2320 int i;
2321 int need_align = 1;
2322 rtx lab, ref;
2323 int have_di = 0;
2325 /* Do two passes, first time dump out the HI sized constants. */
2327 for (i = 0; i < pool_size; i++)
2329 pool_node *p = &pool_vector[i];
2331 if (p->mode == HImode)
2333 if (need_align)
2335 scan = emit_insn_after (gen_align_2 (), scan);
2336 need_align = 0;
2338 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2339 scan = emit_label_after (lab, scan);
2340 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
2341 scan);
2342 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2344 lab = XEXP (ref, 0);
2345 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2348 else if (p->mode == DImode || p->mode == DFmode)
2349 have_di = 1;
2352 need_align = 1;
2354 if (TARGET_SHCOMPACT && have_di)
2356 rtx align_insn = NULL_RTX;
2358 scan = emit_label_after (gen_label_rtx (), scan);
2359 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2360 need_align = 0;
2362 for (i = 0; i < pool_size; i++)
2364 pool_node *p = &pool_vector[i];
2366 switch (p->mode)
2368 case HImode:
2369 break;
2370 case SImode:
2371 case SFmode:
2372 if (align_insn)
2374 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2375 emit_label_before (lab, align_insn);
2376 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
2377 align_insn);
2378 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2380 lab = XEXP (ref, 0);
2381 emit_insn_before (gen_consttable_window_end (lab),
2382 align_insn);
2384 delete_insn (align_insn);
2385 align_insn = NULL_RTX;
2386 continue;
2388 else
2390 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2391 scan = emit_label_after (lab, scan);
2392 scan = emit_insn_after (gen_consttable_4 (p->value,
2393 const0_rtx), scan);
2394 need_align = ! need_align;
2396 break;
2397 case DFmode:
2398 case DImode:
2399 if (need_align)
2401 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2402 align_insn = scan;
2403 need_align = 0;
2405 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2406 scan = emit_label_after (lab, scan);
2407 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2408 scan);
2409 break;
2410 default:
2411 abort ();
2412 break;
2415 if (p->mode != HImode)
2417 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2419 lab = XEXP (ref, 0);
2420 scan = emit_insn_after (gen_consttable_window_end (lab),
2421 scan);
2426 pool_size = 0;
2429 for (i = 0; i < pool_size; i++)
2431 pool_node *p = &pool_vector[i];
2433 switch (p->mode)
2435 case HImode:
2436 break;
2437 case SImode:
2438 case SFmode:
2439 if (need_align)
2441 need_align = 0;
2442 scan = emit_label_after (gen_label_rtx (), scan);
2443 scan = emit_insn_after (gen_align_4 (), scan);
2445 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2446 scan = emit_label_after (lab, scan);
2447 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
2448 scan);
2449 break;
2450 case DFmode:
2451 case DImode:
2452 if (need_align)
2454 need_align = 0;
2455 scan = emit_label_after (gen_label_rtx (), scan);
2456 scan = emit_insn_after (gen_align_4 (), scan);
2458 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2459 scan = emit_label_after (lab, scan);
2460 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2461 scan);
2462 break;
2463 default:
2464 abort ();
2465 break;
2468 if (p->mode != HImode)
2470 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2472 lab = XEXP (ref, 0);
2473 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2478 scan = emit_insn_after (gen_consttable_end (), scan);
2479 scan = emit_barrier_after (scan);
2480 pool_size = 0;
2481 pool_window_label = NULL_RTX;
2482 pool_window_last = 0;
2485 /* Return non-zero if constant would be an ok source for a
2486 mov.w instead of a mov.l. */
2488 static int
2489 hi_const (src)
2490 rtx src;
2492 return (GET_CODE (src) == CONST_INT
2493 && INTVAL (src) >= -32768
2494 && INTVAL (src) <= 32767);
2497 /* Non-zero if the insn is a move instruction which needs to be fixed. */
2499 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
2500 CONST_DOUBLE input value is CONST_OK_FOR_I. For a SFmode move, we don't
2501 need to fix it if the input value is CONST_OK_FOR_I. */
2503 static int
2504 broken_move (insn)
2505 rtx insn;
2507 if (GET_CODE (insn) == INSN)
2509 rtx pat = PATTERN (insn);
2510 if (GET_CODE (pat) == PARALLEL)
2511 pat = XVECEXP (pat, 0, 0);
2512 if (GET_CODE (pat) == SET
2513 /* We can load any 8 bit value if we don't care what the high
2514 order bits end up as. */
2515 && GET_MODE (SET_DEST (pat)) != QImode
2516 && (CONSTANT_P (SET_SRC (pat))
2517 /* Match mova_const. */
2518 || (GET_CODE (SET_SRC (pat)) == UNSPEC
2519 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
2520 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
2521 && ! (TARGET_SH3E
2522 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
2523 && (fp_zero_operand (SET_SRC (pat))
2524 || fp_one_operand (SET_SRC (pat)))
2525 /* ??? If this is a -m4 or -m4-single compilation, in general
2526 we don't know the current setting of fpscr, so disable fldi.
2527 There is an exception if this was a register-register move
2528 before reload - and hence it was ascertained that we have
2529 single precision setting - and in a post-reload optimization
2530 we changed this to do a constant load. In that case
2531 we don't have an r0 clobber, hence we must use fldi. */
2532 && (! TARGET_SH4 || TARGET_FMOVD
2533 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
2534 == SCRATCH))
2535 && GET_CODE (SET_DEST (pat)) == REG
2536 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
2537 && (GET_CODE (SET_SRC (pat)) != CONST_INT
2538 || ! CONST_OK_FOR_I (INTVAL (SET_SRC (pat)))))
2539 return 1;
2542 return 0;
2545 static int
2546 mova_p (insn)
2547 rtx insn;
2549 return (GET_CODE (insn) == INSN
2550 && GET_CODE (PATTERN (insn)) == SET
2551 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
2552 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
2553 /* Don't match mova_const. */
2554 && GET_CODE (XVECEXP (SET_SRC (PATTERN (insn)), 0, 0)) == LABEL_REF);
2557 /* Find the last barrier from insn FROM which is close enough to hold the
2558 constant pool. If we can't find one, then create one near the end of
2559 the range. */
2561 static rtx
2562 find_barrier (num_mova, mova, from)
2563 int num_mova;
2564 rtx mova, from;
2566 int count_si = 0;
2567 int count_hi = 0;
2568 int found_hi = 0;
2569 int found_si = 0;
2570 int found_di = 0;
2571 int hi_align = 2;
2572 int si_align = 2;
2573 int leading_mova = num_mova;
2574 rtx barrier_before_mova, found_barrier = 0, good_barrier = 0;
2575 int si_limit;
2576 int hi_limit;
2578 /* For HImode: range is 510, add 4 because pc counts from address of
2579 second instruction after this one, subtract 2 for the jump instruction
2580 that we may need to emit before the table, subtract 2 for the instruction
2581 that fills the jump delay slot (in very rare cases, reorg will take an
2582 instruction from after the constant pool or will leave the delay slot
2583 empty). This gives 510.
2584 For SImode: range is 1020, add 4 because pc counts from address of
2585 second instruction after this one, subtract 2 in case pc is 2 byte
2586 aligned, subtract 2 for the jump instruction that we may need to emit
2587 before the table, subtract 2 for the instruction that fills the jump
2588 delay slot. This gives 1018. */
2590 /* The branch will always be shortened now that the reference address for
2591 forward branches is the successor address, thus we need no longer make
2592 adjustments to the [sh]i_limit for -O0. */
2594 si_limit = 1018;
2595 hi_limit = 510;
2597 while (from && count_si < si_limit && count_hi < hi_limit)
2599 int inc = get_attr_length (from);
2600 int new_align = 1;
2602 if (GET_CODE (from) == CODE_LABEL)
2604 if (optimize)
2605 new_align = 1 << label_to_alignment (from);
2606 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
2607 new_align = 1 << barrier_align (from);
2608 else
2609 new_align = 1;
2610 inc = 0;
2613 if (GET_CODE (from) == BARRIER)
2616 found_barrier = from;
2618 /* If we are at the end of the function, or in front of an alignment
2619 instruction, we need not insert an extra alignment. We prefer
2620 this kind of barrier. */
2621 if (barrier_align (from) > 2)
2622 good_barrier = from;
2625 if (broken_move (from))
2627 rtx pat, src, dst;
2628 enum machine_mode mode;
2630 pat = PATTERN (from);
2631 if (GET_CODE (pat) == PARALLEL)
2632 pat = XVECEXP (pat, 0, 0);
2633 src = SET_SRC (pat);
2634 dst = SET_DEST (pat);
2635 mode = GET_MODE (dst);
2637 /* We must explicitly check the mode, because sometimes the
2638 front end will generate code to load unsigned constants into
2639 HImode targets without properly sign extending them. */
2640 if (mode == HImode
2641 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
2643 found_hi += 2;
2644 /* We put the short constants before the long constants, so
2645 we must count the length of short constants in the range
2646 for the long constants. */
2647 /* ??? This isn't optimal, but is easy to do. */
2648 si_limit -= 2;
2650 else
2652 /* We dump DF/DI constants before SF/SI ones, because
2653 the limit is the same, but the alignment requirements
2654 are higher. We may waste up to 4 additional bytes
2655 for alignment, and the DF/DI constant may have
2656 another SF/SI constant placed before it. */
2657 if (TARGET_SHCOMPACT
2658 && ! found_di
2659 && (mode == DFmode || mode == DImode))
2661 found_di = 1;
2662 si_limit -= 8;
2664 while (si_align > 2 && found_si + si_align - 2 > count_si)
2665 si_align >>= 1;
2666 if (found_si > count_si)
2667 count_si = found_si;
2668 found_si += GET_MODE_SIZE (mode);
2669 if (num_mova)
2670 si_limit -= GET_MODE_SIZE (mode);
2673 /* See the code in machine_dependent_reorg, which has a similar if
2674 statement that generates a new mova insn in many cases. */
2675 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
2676 inc += 2;
2679 if (mova_p (from))
2681 if (! num_mova++)
2683 leading_mova = 0;
2684 mova = from;
2685 barrier_before_mova = good_barrier ? good_barrier : found_barrier;
2687 if (found_si > count_si)
2688 count_si = found_si;
2690 else if (GET_CODE (from) == JUMP_INSN
2691 && (GET_CODE (PATTERN (from)) == ADDR_VEC
2692 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
2694 if (num_mova)
2695 num_mova--;
2696 if (barrier_align (next_real_insn (from)) == CACHE_LOG)
2698 /* We have just passed the barrier in front of the
2699 ADDR_DIFF_VEC, which is stored in found_barrier. Since
2700 the ADDR_DIFF_VEC is accessed as data, just like our pool
2701 constants, this is a good opportunity to accommodate what
2702 we have gathered so far.
2703 If we waited any longer, we could end up at a barrier in
2704 front of code, which gives worse cache usage for separated
2705 instruction / data caches. */
2706 good_barrier = found_barrier;
2707 break;
2709 else
2711 rtx body = PATTERN (from);
2712 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
2715 /* For the SH1, we generate alignments even after jumps-around-jumps. */
2716 else if (GET_CODE (from) == JUMP_INSN
2717 && ! TARGET_SH2
2718 && ! TARGET_SMALLCODE)
2719 new_align = 4;
2721 if (found_si)
2723 count_si += inc;
2724 if (new_align > si_align)
2726 si_limit -= (count_si - 1) & (new_align - si_align);
2727 si_align = new_align;
2729 count_si = (count_si + new_align - 1) & -new_align;
2731 if (found_hi)
2733 count_hi += inc;
2734 if (new_align > hi_align)
2736 hi_limit -= (count_hi - 1) & (new_align - hi_align);
2737 hi_align = new_align;
2739 count_hi = (count_hi + new_align - 1) & -new_align;
2741 from = NEXT_INSN (from);
2744 if (num_mova)
2746 if (leading_mova)
2748 /* Try as we might, the leading mova is out of range. Change
2749 it into a load (which will become a pcload) and retry. */
2750 SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
2751 INSN_CODE (mova) = -1;
2752 return find_barrier (0, 0, mova);
2754 else
2756 /* Insert the constant pool table before the mova instruction,
2757 to prevent the mova label reference from going out of range. */
2758 from = mova;
2759 good_barrier = found_barrier = barrier_before_mova;
2763 if (found_barrier)
2765 if (good_barrier && next_real_insn (found_barrier))
2766 found_barrier = good_barrier;
2768 else
2770 /* We didn't find a barrier in time to dump our stuff,
2771 so we'll make one. */
2772 rtx label = gen_label_rtx ();
2774 /* If we exceeded the range, then we must back up over the last
2775 instruction we looked at. Otherwise, we just need to undo the
2776 NEXT_INSN at the end of the loop. */
2777 if (count_hi > hi_limit || count_si > si_limit)
2778 from = PREV_INSN (PREV_INSN (from));
2779 else
2780 from = PREV_INSN (from);
2782 /* Walk back to be just before any jump or label.
2783 Putting it before a label reduces the number of times the branch
2784 around the constant pool table will be hit. Putting it before
2785 a jump makes it more likely that the bra delay slot will be
2786 filled. */
2787 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
2788 || GET_CODE (from) == CODE_LABEL)
2789 from = PREV_INSN (from);
2791 from = emit_jump_insn_after (gen_jump (label), from);
2792 JUMP_LABEL (from) = label;
2793 LABEL_NUSES (label) = 1;
2794 found_barrier = emit_barrier_after (from);
2795 emit_label_after (label, found_barrier);
2798 return found_barrier;
2801 /* If the instruction INSN is implemented by a special function, and we can
2802 positively find the register that is used to call the sfunc, and this
2803 register is not used anywhere else in this instruction - except as the
2804 destination of a set, return this register; else, return 0. */
2806 sfunc_uses_reg (insn)
2807 rtx insn;
2809 int i;
2810 rtx pattern, part, reg_part, reg;
2812 if (GET_CODE (insn) != INSN)
2813 return 0;
2814 pattern = PATTERN (insn);
2815 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
2816 return 0;
2818 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
2820 part = XVECEXP (pattern, 0, i);
2821 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
2822 reg_part = part;
2824 if (! reg_part)
2825 return 0;
2826 reg = XEXP (reg_part, 0);
2827 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
2829 part = XVECEXP (pattern, 0, i);
2830 if (part == reg_part || GET_CODE (part) == CLOBBER)
2831 continue;
2832 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
2833 && GET_CODE (SET_DEST (part)) == REG)
2834 ? SET_SRC (part) : part)))
2835 return 0;
2837 return reg;
2840 /* See if the only way in which INSN uses REG is by calling it, or by
2841 setting it while calling it. Set *SET to a SET rtx if the register
2842 is set by INSN. */
2844 static int
2845 noncall_uses_reg (reg, insn, set)
2846 rtx reg;
2847 rtx insn;
2848 rtx *set;
2850 rtx pattern, reg2;
2852 *set = NULL_RTX;
2854 reg2 = sfunc_uses_reg (insn);
2855 if (reg2 && REGNO (reg2) == REGNO (reg))
2857 pattern = single_set (insn);
2858 if (pattern
2859 && GET_CODE (SET_DEST (pattern)) == REG
2860 && REGNO (reg) == REGNO (SET_DEST (pattern)))
2861 *set = pattern;
2862 return 0;
2864 if (GET_CODE (insn) != CALL_INSN)
2866 /* We don't use rtx_equal_p because we don't care if the mode is
2867 different. */
2868 pattern = single_set (insn);
2869 if (pattern
2870 && GET_CODE (SET_DEST (pattern)) == REG
2871 && REGNO (reg) == REGNO (SET_DEST (pattern)))
2873 rtx par, part;
2874 int i;
2876 *set = pattern;
2877 par = PATTERN (insn);
2878 if (GET_CODE (par) == PARALLEL)
2879 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
2881 part = XVECEXP (par, 0, i);
2882 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
2883 return 1;
2885 return reg_mentioned_p (reg, SET_SRC (pattern));
2888 return 1;
2891 pattern = PATTERN (insn);
2893 if (GET_CODE (pattern) == PARALLEL)
2895 int i;
2897 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
2898 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
2899 return 1;
2900 pattern = XVECEXP (pattern, 0, 0);
2903 if (GET_CODE (pattern) == SET)
2905 if (reg_mentioned_p (reg, SET_DEST (pattern)))
2907 /* We don't use rtx_equal_p, because we don't care if the
2908 mode is different. */
2909 if (GET_CODE (SET_DEST (pattern)) != REG
2910 || REGNO (reg) != REGNO (SET_DEST (pattern)))
2911 return 1;
2913 *set = pattern;
2916 pattern = SET_SRC (pattern);
2919 if (GET_CODE (pattern) != CALL
2920 || GET_CODE (XEXP (pattern, 0)) != MEM
2921 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
2922 return 1;
2924 return 0;
2927 /* Given a X, a pattern of an insn or a part of it, return a mask of used
2928 general registers. Bits 0..15 mean that the respective registers
2929 are used as inputs in the instruction. Bits 16..31 mean that the
2930 registers 0..15, respectively, are used as outputs, or are clobbered.
2931 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
2933 regs_used (x, is_dest)
2934 rtx x; int is_dest;
2936 enum rtx_code code;
2937 const char *fmt;
2938 int i, used = 0;
2940 if (! x)
2941 return used;
2942 code = GET_CODE (x);
2943 switch (code)
2945 case REG:
2946 if (REGNO (x) < 16)
2947 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
2948 << (REGNO (x) + is_dest));
2949 return 0;
2950 case SUBREG:
2952 rtx y = SUBREG_REG (x);
2954 if (GET_CODE (y) != REG)
2955 break;
2956 if (REGNO (y) < 16)
2957 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
2958 << (REGNO (y) +
2959 subreg_regno_offset (REGNO (y),
2960 GET_MODE (y),
2961 SUBREG_BYTE (x),
2962 GET_MODE (x)) + is_dest));
2963 return 0;
2965 case SET:
2966 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
2967 case RETURN:
2968 /* If there was a return value, it must have been indicated with USE. */
2969 return 0x00ffff00;
2970 case CLOBBER:
2971 is_dest = 1;
2972 break;
2973 case MEM:
2974 is_dest = 0;
2975 break;
2976 case CALL:
2977 used |= 0x00ff00f0;
2978 break;
2979 default:
2980 break;
2983 fmt = GET_RTX_FORMAT (code);
2985 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
2987 if (fmt[i] == 'E')
2989 register int j;
2990 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
2991 used |= regs_used (XVECEXP (x, i, j), is_dest);
2993 else if (fmt[i] == 'e')
2994 used |= regs_used (XEXP (x, i), is_dest);
2996 return used;
2999 /* Create an instruction that prevents redirection of a conditional branch
3000 to the destination of the JUMP with address ADDR.
3001 If the branch needs to be implemented as an indirect jump, try to find
3002 a scratch register for it.
3003 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
3004 If any preceding insn that doesn't fit into a delay slot is good enough,
3005 pass 1. Pass 2 if a definite blocking insn is needed.
3006 -1 is used internally to avoid deep recursion.
3007 If a blocking instruction is made or recognized, return it. */
3009 static rtx
3010 gen_block_redirect (jump, addr, need_block)
3011 rtx jump;
3012 int addr, need_block;
3014 int dead = 0;
3015 rtx prev = prev_nonnote_insn (jump);
3016 rtx dest;
3018 /* First, check if we already have an instruction that satisfies our need. */
3019 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
3021 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
3022 return prev;
3023 if (GET_CODE (PATTERN (prev)) == USE
3024 || GET_CODE (PATTERN (prev)) == CLOBBER
3025 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3026 prev = jump;
3027 else if ((need_block &= ~1) < 0)
3028 return prev;
3029 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
3030 need_block = 0;
3032 /* We can't use JUMP_LABEL here because it might be undefined
3033 when not optimizing. */
3034 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
3035 /* If the branch is out of range, try to find a scratch register for it. */
3036 if (optimize
3037 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3038 > 4092 + 4098))
3040 rtx scan;
3041 /* Don't look for the stack pointer as a scratch register,
3042 it would cause trouble if an interrupt occurred. */
3043 unsigned try = 0x7fff, used;
3044 int jump_left = flag_expensive_optimizations + 1;
3046 /* It is likely that the most recent eligible instruction is wanted for
3047 the delay slot. Therefore, find out which registers it uses, and
3048 try to avoid using them. */
3050 for (scan = jump; (scan = PREV_INSN (scan)); )
3052 enum rtx_code code;
3054 if (INSN_DELETED_P (scan))
3055 continue;
3056 code = GET_CODE (scan);
3057 if (code == CODE_LABEL || code == JUMP_INSN)
3058 break;
3059 if (code == INSN
3060 && GET_CODE (PATTERN (scan)) != USE
3061 && GET_CODE (PATTERN (scan)) != CLOBBER
3062 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
3064 try &= ~regs_used (PATTERN (scan), 0);
3065 break;
3068 for (used = dead = 0, scan = JUMP_LABEL (jump);
3069 (scan = NEXT_INSN (scan)); )
3071 enum rtx_code code;
3073 if (INSN_DELETED_P (scan))
3074 continue;
3075 code = GET_CODE (scan);
3076 if (GET_RTX_CLASS (code) == 'i')
3078 used |= regs_used (PATTERN (scan), 0);
3079 if (code == CALL_INSN)
3080 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
3081 dead |= (used >> 16) & ~used;
3082 if (dead & try)
3084 dead &= try;
3085 break;
3087 if (code == JUMP_INSN)
3089 if (jump_left-- && simplejump_p (scan))
3090 scan = JUMP_LABEL (scan);
3091 else
3092 break;
3096 /* Mask out the stack pointer again, in case it was
3097 the only 'free' register we have found. */
3098 dead &= 0x7fff;
3100 /* If the immediate destination is still in range, check for possible
3101 threading with a jump beyond the delay slot insn.
3102 Don't check if we are called recursively; the jump has been or will be
3103 checked in a different invocation then. */
3105 else if (optimize && need_block >= 0)
3107 rtx next = next_active_insn (next_active_insn (dest));
3108 if (next && GET_CODE (next) == JUMP_INSN
3109 && GET_CODE (PATTERN (next)) == SET
3110 && recog_memoized (next) == CODE_FOR_jump)
3112 dest = JUMP_LABEL (next);
3113 if (dest
3114 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3115 > 4092 + 4098))
3116 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
3120 if (dead)
3122 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
3124 /* It would be nice if we could convert the jump into an indirect
3125 jump / far branch right now, and thus exposing all constituent
3126 instructions to further optimization. However, reorg uses
3127 simplejump_p to determine if there is an unconditional jump where
3128 it should try to schedule instructions from the target of the
3129 branch; simplejump_p fails for indirect jumps even if they have
3130 a JUMP_LABEL. */
3131 rtx insn = emit_insn_before (gen_indirect_jump_scratch
3132 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
3133 , jump);
3134 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
3135 return insn;
3137 else if (need_block)
3138 /* We can't use JUMP_LABEL here because it might be undefined
3139 when not optimizing. */
3140 return emit_insn_before (gen_block_branch_redirect
3141 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
3142 , jump);
3143 return prev;
3146 #define CONDJUMP_MIN -252
3147 #define CONDJUMP_MAX 262
3148 struct far_branch
3150 /* A label (to be placed) in front of the jump
3151 that jumps to our ultimate destination. */
3152 rtx near_label;
3153 /* Where we are going to insert it if we cannot move the jump any farther,
3154 or the jump itself if we have picked up an existing jump. */
3155 rtx insert_place;
3156 /* The ultimate destination. */
3157 rtx far_label;
3158 struct far_branch *prev;
3159 /* If the branch has already been created, its address;
3160 else the address of its first prospective user. */
3161 int address;
3164 static void gen_far_branch PARAMS ((struct far_branch *));
3165 enum mdep_reorg_phase_e mdep_reorg_phase;
3166 static void
3167 gen_far_branch (bp)
3168 struct far_branch *bp;
3170 rtx insn = bp->insert_place;
3171 rtx jump;
3172 rtx label = gen_label_rtx ();
3174 emit_label_after (label, insn);
3175 if (bp->far_label)
3177 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
3178 LABEL_NUSES (bp->far_label)++;
3180 else
3181 jump = emit_jump_insn_after (gen_return (), insn);
3182 /* Emit a barrier so that reorg knows that any following instructions
3183 are not reachable via a fall-through path.
3184 But don't do this when not optimizing, since we wouldn't supress the
3185 alignment for the barrier then, and could end up with out-of-range
3186 pc-relative loads. */
3187 if (optimize)
3188 emit_barrier_after (jump);
3189 emit_label_after (bp->near_label, insn);
3190 JUMP_LABEL (jump) = bp->far_label;
3191 if (! invert_jump (insn, label, 1))
3192 abort ();
3193 /* Prevent reorg from undoing our splits. */
3194 gen_block_redirect (jump, bp->address += 2, 2);
3197 /* Fix up ADDR_DIFF_VECs. */
3198 void
3199 fixup_addr_diff_vecs (first)
3200 rtx first;
3202 rtx insn;
3204 for (insn = first; insn; insn = NEXT_INSN (insn))
3206 rtx vec_lab, pat, prev, prevpat, x, braf_label;
3208 if (GET_CODE (insn) != JUMP_INSN
3209 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
3210 continue;
3211 pat = PATTERN (insn);
3212 vec_lab = XEXP (XEXP (pat, 0), 0);
3214 /* Search the matching casesi_jump_2. */
3215 for (prev = vec_lab; ; prev = PREV_INSN (prev))
3217 if (GET_CODE (prev) != JUMP_INSN)
3218 continue;
3219 prevpat = PATTERN (prev);
3220 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
3221 continue;
3222 x = XVECEXP (prevpat, 0, 1);
3223 if (GET_CODE (x) != USE)
3224 continue;
3225 x = XEXP (x, 0);
3226 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
3227 break;
3230 /* Emit the reference label of the braf where it belongs, right after
3231 the casesi_jump_2 (i.e. braf). */
3232 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
3233 emit_label_after (braf_label, prev);
3235 /* Fix up the ADDR_DIF_VEC to be relative
3236 to the reference address of the braf. */
3237 XEXP (XEXP (pat, 0), 0) = braf_label;
3241 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
3242 a barrier. Return the base 2 logarithm of the desired alignment. */
3244 barrier_align (barrier_or_label)
3245 rtx barrier_or_label;
3247 rtx next = next_real_insn (barrier_or_label), pat, prev;
3248 int slot, credit, jump_to_next;
3250 if (! next)
3251 return 0;
3253 pat = PATTERN (next);
3255 if (GET_CODE (pat) == ADDR_DIFF_VEC)
3256 return 2;
3258 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
3259 /* This is a barrier in front of a constant table. */
3260 return 0;
3262 prev = prev_real_insn (barrier_or_label);
3263 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
3265 pat = PATTERN (prev);
3266 /* If this is a very small table, we want to keep the alignment after
3267 the table to the minimum for proper code alignment. */
3268 return ((TARGET_SMALLCODE
3269 || (XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
3270 <= (unsigned)1 << (CACHE_LOG - 2)))
3271 ? 1 << TARGET_SHMEDIA : CACHE_LOG);
3274 if (TARGET_SMALLCODE)
3275 return 0;
3277 if (! TARGET_SH2 || ! optimize)
3278 return CACHE_LOG;
3280 /* When fixing up pcloads, a constant table might be inserted just before
3281 the basic block that ends with the barrier. Thus, we can't trust the
3282 instruction lengths before that. */
3283 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
3285 /* Check if there is an immediately preceding branch to the insn beyond
3286 the barrier. We must weight the cost of discarding useful information
3287 from the current cache line when executing this branch and there is
3288 an alignment, against that of fetching unneeded insn in front of the
3289 branch target when there is no alignment. */
3291 /* There are two delay_slot cases to consider. One is the simple case
3292 where the preceding branch is to the insn beyond the barrier (simple
3293 delay slot filling), and the other is where the preceding branch has
3294 a delay slot that is a duplicate of the insn after the barrier
3295 (fill_eager_delay_slots) and the branch is to the insn after the insn
3296 after the barrier. */
3298 /* PREV is presumed to be the JUMP_INSN for the barrier under
3299 investigation. Skip to the insn before it. */
3300 prev = prev_real_insn (prev);
3302 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
3303 credit >= 0 && prev && GET_CODE (prev) == INSN;
3304 prev = prev_real_insn (prev))
3306 jump_to_next = 0;
3307 if (GET_CODE (PATTERN (prev)) == USE
3308 || GET_CODE (PATTERN (prev)) == CLOBBER)
3309 continue;
3310 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
3312 prev = XVECEXP (PATTERN (prev), 0, 1);
3313 if (INSN_UID (prev) == INSN_UID (next))
3315 /* Delay slot was filled with insn at jump target. */
3316 jump_to_next = 1;
3317 continue;
3321 if (slot &&
3322 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3323 slot = 0;
3324 credit -= get_attr_length (prev);
3326 if (prev
3327 && GET_CODE (prev) == JUMP_INSN
3328 && JUMP_LABEL (prev))
3330 rtx x;
3331 if (jump_to_next
3332 || next_real_insn (JUMP_LABEL (prev)) == next
3333 /* If relax_delay_slots() decides NEXT was redundant
3334 with some previous instruction, it will have
3335 redirected PREV's jump to the following insn. */
3336 || JUMP_LABEL (prev) == next_nonnote_insn (next)
3337 /* There is no upper bound on redundant instructions
3338 that might have been skipped, but we must not put an
3339 alignment where none had been before. */
3340 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
3341 (INSN_P (x)
3342 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
3343 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch))))
3345 rtx pat = PATTERN (prev);
3346 if (GET_CODE (pat) == PARALLEL)
3347 pat = XVECEXP (pat, 0, 0);
3348 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
3349 return 0;
3354 return CACHE_LOG;
3357 /* If we are inside a phony loop, almost any kind of label can turn up as the
3358 first one in the loop. Aligning a braf label causes incorrect switch
3359 destination addresses; we can detect braf labels because they are
3360 followed by a BARRIER.
3361 Applying loop alignment to small constant or switch tables is a waste
3362 of space, so we suppress this too. */
3364 sh_loop_align (label)
3365 rtx label;
3367 rtx next = label;
3370 next = next_nonnote_insn (next);
3371 while (next && GET_CODE (next) == CODE_LABEL);
3373 if (! next
3374 || ! INSN_P (next)
3375 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
3376 || recog_memoized (next) == CODE_FOR_consttable_2)
3377 return 0;
3379 if (TARGET_SH5)
3380 return 3;
3382 return 2;
3385 /* Exported to toplev.c.
3387 Do a final pass over the function, just before delayed branch
3388 scheduling. */
3390 void
3391 machine_dependent_reorg (first)
3392 rtx first;
3394 rtx insn, mova;
3395 int num_mova;
3396 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
3397 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
3399 /* We must split call insns before introducing `mova's. If we're
3400 optimizing, they'll have already been split. Otherwise, make
3401 sure we don't split them too late. */
3402 if (! optimize)
3403 split_all_insns_noflow ();
3405 if (TARGET_SHMEDIA)
3406 return;
3408 /* If relaxing, generate pseudo-ops to associate function calls with
3409 the symbols they call. It does no harm to not generate these
3410 pseudo-ops. However, when we can generate them, it enables to
3411 linker to potentially relax the jsr to a bsr, and eliminate the
3412 register load and, possibly, the constant pool entry. */
3414 mdep_reorg_phase = SH_INSERT_USES_LABELS;
3415 if (TARGET_RELAX)
3417 /* Remove all REG_LABEL notes. We want to use them for our own
3418 purposes. This works because none of the remaining passes
3419 need to look at them.
3421 ??? But it may break in the future. We should use a machine
3422 dependent REG_NOTE, or some other approach entirely. */
3423 for (insn = first; insn; insn = NEXT_INSN (insn))
3425 if (INSN_P (insn))
3427 rtx note;
3429 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
3430 remove_note (insn, note);
3434 for (insn = first; insn; insn = NEXT_INSN (insn))
3436 rtx pattern, reg, link, set, scan, dies, label;
3437 int rescan = 0, foundinsn = 0;
3439 if (GET_CODE (insn) == CALL_INSN)
3441 pattern = PATTERN (insn);
3443 if (GET_CODE (pattern) == PARALLEL)
3444 pattern = XVECEXP (pattern, 0, 0);
3445 if (GET_CODE (pattern) == SET)
3446 pattern = SET_SRC (pattern);
3448 if (GET_CODE (pattern) != CALL
3449 || GET_CODE (XEXP (pattern, 0)) != MEM)
3450 continue;
3452 reg = XEXP (XEXP (pattern, 0), 0);
3454 else
3456 reg = sfunc_uses_reg (insn);
3457 if (! reg)
3458 continue;
3461 if (GET_CODE (reg) != REG)
3462 continue;
3464 /* This is a function call via REG. If the only uses of REG
3465 between the time that it is set and the time that it dies
3466 are in function calls, then we can associate all the
3467 function calls with the setting of REG. */
3469 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
3471 if (REG_NOTE_KIND (link) != 0)
3472 continue;
3473 set = single_set (XEXP (link, 0));
3474 if (set && rtx_equal_p (reg, SET_DEST (set)))
3476 link = XEXP (link, 0);
3477 break;
3481 if (! link)
3483 /* ??? Sometimes global register allocation will have
3484 deleted the insn pointed to by LOG_LINKS. Try
3485 scanning backward to find where the register is set. */
3486 for (scan = PREV_INSN (insn);
3487 scan && GET_CODE (scan) != CODE_LABEL;
3488 scan = PREV_INSN (scan))
3490 if (! INSN_P (scan))
3491 continue;
3493 if (! reg_mentioned_p (reg, scan))
3494 continue;
3496 if (noncall_uses_reg (reg, scan, &set))
3497 break;
3499 if (set)
3501 link = scan;
3502 break;
3507 if (! link)
3508 continue;
3510 /* The register is set at LINK. */
3512 /* We can only optimize the function call if the register is
3513 being set to a symbol. In theory, we could sometimes
3514 optimize calls to a constant location, but the assembler
3515 and linker do not support that at present. */
3516 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
3517 && GET_CODE (SET_SRC (set)) != LABEL_REF)
3518 continue;
3520 /* Scan forward from LINK to the place where REG dies, and
3521 make sure that the only insns which use REG are
3522 themselves function calls. */
3524 /* ??? This doesn't work for call targets that were allocated
3525 by reload, since there may not be a REG_DEAD note for the
3526 register. */
3528 dies = NULL_RTX;
3529 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
3531 rtx scanset;
3533 /* Don't try to trace forward past a CODE_LABEL if we haven't
3534 seen INSN yet. Ordinarily, we will only find the setting insn
3535 in LOG_LINKS if it is in the same basic block. However,
3536 cross-jumping can insert code labels in between the load and
3537 the call, and can result in situations where a single call
3538 insn may have two targets depending on where we came from. */
3540 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
3541 break;
3543 if (! INSN_P (scan))
3544 continue;
3546 /* Don't try to trace forward past a JUMP. To optimize
3547 safely, we would have to check that all the
3548 instructions at the jump destination did not use REG. */
3550 if (GET_CODE (scan) == JUMP_INSN)
3551 break;
3553 if (! reg_mentioned_p (reg, scan))
3554 continue;
3556 if (noncall_uses_reg (reg, scan, &scanset))
3557 break;
3559 if (scan == insn)
3560 foundinsn = 1;
3562 if (scan != insn
3563 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
3565 /* There is a function call to this register other
3566 than the one we are checking. If we optimize
3567 this call, we need to rescan again below. */
3568 rescan = 1;
3571 /* ??? We shouldn't have to worry about SCANSET here.
3572 We should just be able to check for a REG_DEAD note
3573 on a function call. However, the REG_DEAD notes are
3574 apparently not dependable around libcalls; c-torture
3575 execute/920501-2 is a test case. If SCANSET is set,
3576 then this insn sets the register, so it must have
3577 died earlier. Unfortunately, this will only handle
3578 the cases in which the register is, in fact, set in a
3579 later insn. */
3581 /* ??? We shouldn't have to use FOUNDINSN here.
3582 However, the LOG_LINKS fields are apparently not
3583 entirely reliable around libcalls;
3584 newlib/libm/math/e_pow.c is a test case. Sometimes
3585 an insn will appear in LOG_LINKS even though it is
3586 not the most recent insn which sets the register. */
3588 if (foundinsn
3589 && (scanset
3590 || find_reg_note (scan, REG_DEAD, reg)))
3592 dies = scan;
3593 break;
3597 if (! dies)
3599 /* Either there was a branch, or some insn used REG
3600 other than as a function call address. */
3601 continue;
3604 /* Create a code label, and put it in a REG_LABEL note on
3605 the insn which sets the register, and on each call insn
3606 which uses the register. In final_prescan_insn we look
3607 for the REG_LABEL notes, and output the appropriate label
3608 or pseudo-op. */
3610 label = gen_label_rtx ();
3611 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL, label,
3612 REG_NOTES (link));
3613 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, label,
3614 REG_NOTES (insn));
3615 if (rescan)
3617 scan = link;
3620 rtx reg2;
3622 scan = NEXT_INSN (scan);
3623 if (scan != insn
3624 && ((GET_CODE (scan) == CALL_INSN
3625 && reg_mentioned_p (reg, scan))
3626 || ((reg2 = sfunc_uses_reg (scan))
3627 && REGNO (reg2) == REGNO (reg))))
3628 REG_NOTES (scan)
3629 = gen_rtx_INSN_LIST (REG_LABEL, label, REG_NOTES (scan));
3631 while (scan != dies);
3636 if (TARGET_SH2)
3637 fixup_addr_diff_vecs (first);
3639 if (optimize)
3641 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
3642 shorten_branches (first);
3644 /* Scan the function looking for move instructions which have to be
3645 changed to pc-relative loads and insert the literal tables. */
3647 mdep_reorg_phase = SH_FIXUP_PCLOAD;
3648 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
3650 if (mova_p (insn))
3652 if (! num_mova++)
3653 mova = insn;
3655 else if (GET_CODE (insn) == JUMP_INSN
3656 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
3657 && num_mova)
3659 rtx scan;
3660 int total;
3662 num_mova--;
3664 /* Some code might have been inserted between the mova and
3665 its ADDR_DIFF_VEC. Check if the mova is still in range. */
3666 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
3667 total += get_attr_length (scan);
3669 /* range of mova is 1020, add 4 because pc counts from address of
3670 second instruction after this one, subtract 2 in case pc is 2
3671 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
3672 cancels out with alignment effects of the mova itself. */
3673 if (total > 1022)
3675 /* Change the mova into a load, and restart scanning
3676 there. broken_move will then return true for mova. */
3677 SET_SRC (PATTERN (mova))
3678 = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
3679 INSN_CODE (mova) = -1;
3680 insn = mova;
3683 if (broken_move (insn))
3685 rtx scan;
3686 /* Scan ahead looking for a barrier to stick the constant table
3687 behind. */
3688 rtx barrier = find_barrier (num_mova, mova, insn);
3689 rtx last_float_move, last_float = 0, *last_float_addr;
3690 int may_need_align = 1;
3692 if (num_mova && ! mova_p (mova))
3694 /* find_barrier had to change the first mova into a
3695 pcload; thus, we have to start with this new pcload. */
3696 insn = mova;
3697 num_mova = 0;
3699 /* Now find all the moves between the points and modify them. */
3700 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
3702 if (GET_CODE (scan) == CODE_LABEL)
3703 last_float = 0;
3704 if (broken_move (scan))
3706 rtx *patp = &PATTERN (scan), pat = *patp;
3707 rtx src, dst;
3708 rtx lab;
3709 rtx newsrc;
3710 enum machine_mode mode;
3712 if (GET_CODE (pat) == PARALLEL)
3713 patp = &XVECEXP (pat, 0, 0), pat = *patp;
3714 src = SET_SRC (pat);
3715 dst = SET_DEST (pat);
3716 mode = GET_MODE (dst);
3718 if (mode == SImode && hi_const (src)
3719 && REGNO (dst) != FPUL_REG)
3721 int offset = 0;
3723 mode = HImode;
3724 while (GET_CODE (dst) == SUBREG)
3726 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
3727 GET_MODE (SUBREG_REG (dst)),
3728 SUBREG_BYTE (dst),
3729 GET_MODE (dst));
3730 dst = SUBREG_REG (dst);
3732 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
3735 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
3737 /* This must be an insn that clobbers r0. */
3738 rtx clobber = XVECEXP (PATTERN (scan), 0,
3739 XVECLEN (PATTERN (scan), 0) - 1);
3741 if (GET_CODE (clobber) != CLOBBER
3742 || ! rtx_equal_p (XEXP (clobber, 0), r0_rtx))
3743 abort ();
3745 if (last_float
3746 && reg_set_between_p (r0_rtx, last_float_move, scan))
3747 last_float = 0;
3748 if (TARGET_SHCOMPACT)
3750 /* The first SFmode constant after a DFmode
3751 constant may be pulled before a sequence
3752 of DFmode constants, so the second SFmode
3753 needs a label, just in case. */
3754 if (GET_MODE_SIZE (mode) == 4)
3756 if (last_float && may_need_align)
3757 last_float = 0;
3758 may_need_align = 0;
3760 if (last_float
3761 && (GET_MODE_SIZE (GET_MODE (last_float))
3762 != GET_MODE_SIZE (mode)))
3764 last_float = 0;
3765 if (GET_MODE_SIZE (mode) == 4)
3766 may_need_align = 1;
3769 lab = add_constant (src, mode, last_float);
3770 if (lab)
3771 emit_insn_before (gen_mova (lab), scan);
3772 else
3774 /* There will be a REG_UNUSED note for r0 on
3775 LAST_FLOAT_MOVE; we have to change it to REG_INC,
3776 lest reorg:mark_target_live_regs will not
3777 consider r0 to be used, and we end up with delay
3778 slot insn in front of SCAN that clobbers r0. */
3779 rtx note
3780 = find_regno_note (last_float_move, REG_UNUSED, 0);
3782 /* If we are not optimizing, then there may not be
3783 a note. */
3784 if (note)
3785 PUT_MODE (note, REG_INC);
3787 *last_float_addr = r0_inc_rtx;
3789 last_float_move = scan;
3790 last_float = src;
3791 newsrc = gen_rtx (MEM, mode,
3792 (((TARGET_SH4 && ! TARGET_FMOVD)
3793 || REGNO (dst) == FPUL_REG)
3794 ? r0_inc_rtx
3795 : r0_rtx));
3796 last_float_addr = &XEXP (newsrc, 0);
3798 /* Remove the clobber of r0. */
3799 XEXP (clobber, 0) = gen_rtx_SCRATCH (Pmode);
3800 RTX_UNCHANGING_P (newsrc) = 1;
3802 /* This is a mova needing a label. Create it. */
3803 else if (GET_CODE (src) == UNSPEC
3804 && XINT (src, 1) == UNSPEC_MOVA
3805 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
3807 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
3808 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
3809 newsrc = gen_rtx_UNSPEC (VOIDmode,
3810 gen_rtvec (1, newsrc),
3811 UNSPEC_MOVA);
3813 else
3815 lab = add_constant (src, mode, 0);
3816 newsrc = gen_rtx_MEM (mode,
3817 gen_rtx_LABEL_REF (VOIDmode, lab));
3818 RTX_UNCHANGING_P (newsrc) = 1;
3820 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
3821 INSN_CODE (scan) = -1;
3824 dump_table (barrier);
3825 insn = barrier;
3829 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
3830 INSN_ADDRESSES_FREE ();
3831 split_branches (first);
3833 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
3834 also has an effect on the register that holds the addres of the sfunc.
3835 Insert an extra dummy insn in front of each sfunc that pretends to
3836 use this register. */
3837 if (flag_delayed_branch)
3839 for (insn = first; insn; insn = NEXT_INSN (insn))
3841 rtx reg = sfunc_uses_reg (insn);
3843 if (! reg)
3844 continue;
3845 emit_insn_before (gen_use_sfunc_addr (reg), insn);
3848 #if 0
3849 /* fpscr is not actually a user variable, but we pretend it is for the
3850 sake of the previous optimization passes, since we want it handled like
3851 one. However, we don't have any debugging information for it, so turn
3852 it into a non-user variable now. */
3853 if (TARGET_SH4)
3854 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
3855 #endif
3856 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
3860 get_dest_uid (label, max_uid)
3861 rtx label;
3862 int max_uid;
3864 rtx dest = next_real_insn (label);
3865 int dest_uid;
3866 if (! dest)
3867 /* This can happen for an undefined label. */
3868 return 0;
3869 dest_uid = INSN_UID (dest);
3870 /* If this is a newly created branch redirection blocking instruction,
3871 we cannot index the branch_uid or insn_addresses arrays with its
3872 uid. But then, we won't need to, because the actual destination is
3873 the following branch. */
3874 while (dest_uid >= max_uid)
3876 dest = NEXT_INSN (dest);
3877 dest_uid = INSN_UID (dest);
3879 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
3880 return 0;
3881 return dest_uid;
3884 /* Split condbranches that are out of range. Also add clobbers for
3885 scratch registers that are needed in far jumps.
3886 We do this before delay slot scheduling, so that it can take our
3887 newly created instructions into account. It also allows us to
3888 find branches with common targets more easily. */
3890 static void
3891 split_branches (first)
3892 rtx first;
3894 rtx insn;
3895 struct far_branch **uid_branch, *far_branch_list = 0;
3896 int max_uid = get_max_uid ();
3898 /* Find out which branches are out of range. */
3899 shorten_branches (first);
3901 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
3902 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
3904 for (insn = first; insn; insn = NEXT_INSN (insn))
3905 if (! INSN_P (insn))
3906 continue;
3907 else if (INSN_DELETED_P (insn))
3909 /* Shorten_branches would split this instruction again,
3910 so transform it into a note. */
3911 PUT_CODE (insn, NOTE);
3912 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
3913 NOTE_SOURCE_FILE (insn) = 0;
3915 else if (GET_CODE (insn) == JUMP_INSN
3916 /* Don't mess with ADDR_DIFF_VEC */
3917 && (GET_CODE (PATTERN (insn)) == SET
3918 || GET_CODE (PATTERN (insn)) == RETURN))
3920 enum attr_type type = get_attr_type (insn);
3921 if (type == TYPE_CBRANCH)
3923 rtx next, beyond;
3925 if (get_attr_length (insn) > 4)
3927 rtx src = SET_SRC (PATTERN (insn));
3928 rtx olabel = XEXP (XEXP (src, 1), 0);
3929 int addr = INSN_ADDRESSES (INSN_UID (insn));
3930 rtx label = 0;
3931 int dest_uid = get_dest_uid (olabel, max_uid);
3932 struct far_branch *bp = uid_branch[dest_uid];
3934 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
3935 the label if the LABEL_NUSES count drops to zero. There is
3936 always a jump_optimize pass that sets these values, but it
3937 proceeds to delete unreferenced code, and then if not
3938 optimizing, to un-delete the deleted instructions, thus
3939 leaving labels with too low uses counts. */
3940 if (! optimize)
3942 JUMP_LABEL (insn) = olabel;
3943 LABEL_NUSES (olabel)++;
3945 if (! bp)
3947 bp = (struct far_branch *) alloca (sizeof *bp);
3948 uid_branch[dest_uid] = bp;
3949 bp->prev = far_branch_list;
3950 far_branch_list = bp;
3951 bp->far_label
3952 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
3953 LABEL_NUSES (bp->far_label)++;
3955 else
3957 label = bp->near_label;
3958 if (! label && bp->address - addr >= CONDJUMP_MIN)
3960 rtx block = bp->insert_place;
3962 if (GET_CODE (PATTERN (block)) == RETURN)
3963 block = PREV_INSN (block);
3964 else
3965 block = gen_block_redirect (block,
3966 bp->address, 2);
3967 label = emit_label_after (gen_label_rtx (),
3968 PREV_INSN (block));
3969 bp->near_label = label;
3971 else if (label && ! NEXT_INSN (label))
3973 if (addr + 2 - bp->address <= CONDJUMP_MAX)
3974 bp->insert_place = insn;
3975 else
3976 gen_far_branch (bp);
3979 if (! label
3980 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
3982 bp->near_label = label = gen_label_rtx ();
3983 bp->insert_place = insn;
3984 bp->address = addr;
3986 if (! redirect_jump (insn, label, 1))
3987 abort ();
3989 else
3991 /* get_attr_length (insn) == 2 */
3992 /* Check if we have a pattern where reorg wants to redirect
3993 the branch to a label from an unconditional branch that
3994 is too far away. */
3995 /* We can't use JUMP_LABEL here because it might be undefined
3996 when not optimizing. */
3997 /* A syntax error might cause beyond to be NULL_RTX. */
3998 beyond
3999 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
4000 0));
4002 if (beyond
4003 && (GET_CODE (beyond) == JUMP_INSN
4004 || ((beyond = next_active_insn (beyond))
4005 && GET_CODE (beyond) == JUMP_INSN))
4006 && GET_CODE (PATTERN (beyond)) == SET
4007 && recog_memoized (beyond) == CODE_FOR_jump
4008 && ((INSN_ADDRESSES
4009 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
4010 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4011 > 252 + 258 + 2))
4012 gen_block_redirect (beyond,
4013 INSN_ADDRESSES (INSN_UID (beyond)), 1);
4016 next = next_active_insn (insn);
4018 if ((GET_CODE (next) == JUMP_INSN
4019 || GET_CODE (next = next_active_insn (next)) == JUMP_INSN)
4020 && GET_CODE (PATTERN (next)) == SET
4021 && recog_memoized (next) == CODE_FOR_jump
4022 && ((INSN_ADDRESSES
4023 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
4024 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4025 > 252 + 258 + 2))
4026 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
4028 else if (type == TYPE_JUMP || type == TYPE_RETURN)
4030 int addr = INSN_ADDRESSES (INSN_UID (insn));
4031 rtx far_label = 0;
4032 int dest_uid = 0;
4033 struct far_branch *bp;
4035 if (type == TYPE_JUMP)
4037 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
4038 dest_uid = get_dest_uid (far_label, max_uid);
4039 if (! dest_uid)
4041 /* Parse errors can lead to labels outside
4042 the insn stream. */
4043 if (! NEXT_INSN (far_label))
4044 continue;
4046 if (! optimize)
4048 JUMP_LABEL (insn) = far_label;
4049 LABEL_NUSES (far_label)++;
4051 redirect_jump (insn, NULL_RTX, 1);
4052 far_label = 0;
4055 bp = uid_branch[dest_uid];
4056 if (! bp)
4058 bp = (struct far_branch *) alloca (sizeof *bp);
4059 uid_branch[dest_uid] = bp;
4060 bp->prev = far_branch_list;
4061 far_branch_list = bp;
4062 bp->near_label = 0;
4063 bp->far_label = far_label;
4064 if (far_label)
4065 LABEL_NUSES (far_label)++;
4067 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
4068 if (addr - bp->address <= CONDJUMP_MAX)
4069 emit_label_after (bp->near_label, PREV_INSN (insn));
4070 else
4072 gen_far_branch (bp);
4073 bp->near_label = 0;
4075 else
4076 bp->near_label = 0;
4077 bp->address = addr;
4078 bp->insert_place = insn;
4079 if (! far_label)
4080 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
4081 else
4082 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
4085 /* Generate all pending far branches,
4086 and free our references to the far labels. */
4087 while (far_branch_list)
4089 if (far_branch_list->near_label
4090 && ! NEXT_INSN (far_branch_list->near_label))
4091 gen_far_branch (far_branch_list);
4092 if (optimize
4093 && far_branch_list->far_label
4094 && ! --LABEL_NUSES (far_branch_list->far_label))
4095 delete_insn (far_branch_list->far_label);
4096 far_branch_list = far_branch_list->prev;
4099 /* Instruction length information is no longer valid due to the new
4100 instructions that have been generated. */
4101 init_insn_lengths ();
4104 /* Dump out instruction addresses, which is useful for debugging the
4105 constant pool table stuff.
4107 If relaxing, output the label and pseudo-ops used to link together
4108 calls and the instruction which set the registers. */
4110 /* ??? This is unnecessary, and probably should be deleted. This makes
4111 the insn_addresses declaration above unnecessary. */
4113 /* ??? The addresses printed by this routine for insns are nonsense for
4114 insns which are inside of a sequence where none of the inner insns have
4115 variable length. This is because the second pass of shorten_branches
4116 does not bother to update them. */
4118 void
4119 final_prescan_insn (insn, opvec, noperands)
4120 rtx insn;
4121 rtx *opvec ATTRIBUTE_UNUSED;
4122 int noperands ATTRIBUTE_UNUSED;
4124 if (TARGET_DUMPISIZE)
4125 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
4127 if (TARGET_RELAX)
4129 rtx note;
4131 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
4132 if (note)
4134 rtx pattern;
4136 pattern = PATTERN (insn);
4137 if (GET_CODE (pattern) == PARALLEL)
4138 pattern = XVECEXP (pattern, 0, 0);
4139 if (GET_CODE (pattern) == CALL
4140 || (GET_CODE (pattern) == SET
4141 && (GET_CODE (SET_SRC (pattern)) == CALL
4142 || get_attr_type (insn) == TYPE_SFUNC)))
4143 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
4144 CODE_LABEL_NUMBER (XEXP (note, 0)));
4145 else if (GET_CODE (pattern) == SET)
4146 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
4147 CODE_LABEL_NUMBER (XEXP (note, 0)));
4148 else
4149 abort ();
4154 /* Dump out any constants accumulated in the final pass. These will
4155 only be labels. */
4157 const char *
4158 output_jump_label_table ()
4160 int i;
4162 if (pool_size)
4164 fprintf (asm_out_file, "\t.align 2\n");
4165 for (i = 0; i < pool_size; i++)
4167 pool_node *p = &pool_vector[i];
4169 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
4170 CODE_LABEL_NUMBER (p->label));
4171 output_asm_insn (".long %O0", &p->value);
4173 pool_size = 0;
4176 return "";
4179 /* A full frame looks like:
4181 arg-5
4182 arg-4
4183 [ if current_function_anonymous_args
4184 arg-3
4185 arg-2
4186 arg-1
4187 arg-0 ]
4188 saved-fp
4189 saved-r10
4190 saved-r11
4191 saved-r12
4192 saved-pr
4193 local-n
4195 local-1
4196 local-0 <- fp points here. */
4198 /* Number of bytes pushed for anonymous args, used to pass information
4199 between expand_prologue and expand_epilogue. */
4201 static int extra_push;
4203 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register
4204 to be adjusted, and TEMP, if nonnegative, holds the register number
4205 of a general register that we may clobber. */
4207 static void
4208 output_stack_adjust (size, reg, temp, emit_fn)
4209 int size;
4210 rtx reg;
4211 int temp;
4212 rtx (*emit_fn) PARAMS ((rtx));
4214 if (size)
4216 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
4218 if (size % align)
4219 abort ();
4221 if (CONST_OK_FOR_ADD (size))
4222 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
4223 /* Try to do it with two partial adjustments; however, we must make
4224 sure that the stack is properly aligned at all times, in case
4225 an interrupt occurs between the two partial adjustments. */
4226 else if (CONST_OK_FOR_ADD (size / 2 & -align)
4227 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
4229 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
4230 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
4232 else
4234 rtx const_reg;
4235 rtx insn;
4237 /* If TEMP is invalid, we could temporarily save a general
4238 register to MACL. However, there is currently no need
4239 to handle this case, so just abort when we see it. */
4240 if (temp < 0)
4241 abort ();
4242 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
4244 /* If SIZE is negative, subtract the positive value.
4245 This sometimes allows a constant pool entry to be shared
4246 between prologue and epilogue code. */
4247 if (size < 0)
4249 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
4250 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
4252 else
4254 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
4255 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
4257 if (emit_fn == frame_insn)
4258 REG_NOTES (insn)
4259 = (gen_rtx_EXPR_LIST
4260 (REG_FRAME_RELATED_EXPR,
4261 gen_rtx_SET (VOIDmode, reg,
4262 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
4263 REG_NOTES (insn)));
4268 static rtx
4269 frame_insn (x)
4270 rtx x;
4272 x = emit_insn (x);
4273 RTX_FRAME_RELATED_P (x) = 1;
4274 return x;
4277 /* Output RTL to push register RN onto the stack. */
4279 static rtx
4280 push (rn)
4281 int rn;
4283 rtx x;
4284 if (rn == FPUL_REG)
4285 x = gen_push_fpul ();
4286 else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
4287 && FP_OR_XD_REGISTER_P (rn))
4289 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
4290 return;
4291 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
4293 else if (TARGET_SH3E && FP_REGISTER_P (rn))
4294 x = gen_push_e (gen_rtx_REG (SFmode, rn));
4295 else
4296 x = gen_push (gen_rtx_REG (SImode, rn));
4298 x = frame_insn (x);
4299 REG_NOTES (x)
4300 = gen_rtx_EXPR_LIST (REG_INC,
4301 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
4302 return x;
4305 /* Output RTL to pop register RN from the stack. */
4307 static void
4308 pop (rn)
4309 int rn;
4311 rtx x;
4312 if (rn == FPUL_REG)
4313 x = gen_pop_fpul ();
4314 else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
4315 && FP_OR_XD_REGISTER_P (rn))
4317 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
4318 return;
4319 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
4321 else if (TARGET_SH3E && FP_REGISTER_P (rn))
4322 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
4323 else
4324 x = gen_pop (gen_rtx_REG (SImode, rn));
4326 x = emit_insn (x);
4327 REG_NOTES (x)
4328 = gen_rtx_EXPR_LIST (REG_INC,
4329 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
4332 /* Generate code to push the regs specified in the mask. */
4334 static void
4335 push_regs (mask)
4336 HOST_WIDE_INT *mask;
4338 int i;
4340 /* Push PR last; this gives better latencies after the prologue, and
4341 candidates for the return delay slot when there are no general
4342 registers pushed. */
4343 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4344 if (i != PR_REG && mask[i / 32] & (1 << (i % 32)))
4345 push (i);
4346 if (mask[PR_REG / 32] & (1 << (PR_REG % 32)))
4347 push (PR_REG);
4350 /* Work out the registers which need to be saved, both as a mask and a
4351 count of saved words.
4353 If doing a pragma interrupt function, then push all regs used by the
4354 function, and if we call another function (we can tell by looking at PR),
4355 make sure that all the regs it clobbers are safe too. */
4357 static void
4358 calc_live_regs (count_ptr, live_regs_mask)
4359 int *count_ptr;
4360 HOST_WIDE_INT *live_regs_mask;
4362 int reg;
4363 int count;
4364 int interrupt_handler;
4365 rtx pr_initial;
4366 int pr_live;
4368 if ((lookup_attribute
4369 ("interrupt_handler",
4370 DECL_ATTRIBUTES (current_function_decl)))
4371 != NULL_TREE)
4372 interrupt_handler = 1;
4373 else
4374 interrupt_handler = 0;
4376 for (count = 0; 32 * count < FIRST_PSEUDO_REGISTER; count++)
4377 live_regs_mask[count] = 0;
4378 /* If we can save a lot of saves by switching to double mode, do that. */
4379 if (TARGET_SH4 && TARGET_FMOVD && TARGET_FPU_SINGLE)
4380 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
4381 if (regs_ever_live[reg] && regs_ever_live[reg+1]
4382 && (! call_used_regs[reg] || (interrupt_handler && ! pragma_trapa))
4383 && ++count > 2)
4385 target_flags &= ~FPU_SINGLE_BIT;
4386 break;
4388 pr_initial = has_hard_reg_initial_val (Pmode,
4389 TARGET_SHMEDIA
4390 ? PR_MEDIA_REG : PR_REG);
4391 pr_live = (pr_initial
4392 ? REGNO (pr_initial) != (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
4393 : regs_ever_live[TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG]);
4394 /* Force PR to be live if the prologue has to call the SHmedia
4395 argument decoder or register saver. */
4396 if (TARGET_SHCOMPACT
4397 && ((current_function_args_info.call_cookie
4398 & ~ CALL_COOKIE_RET_TRAMP (1))
4399 || current_function_has_nonlocal_label))
4400 pr_live = 1;
4401 for (count = 0, reg = FIRST_PSEUDO_REGISTER - 1; reg >= 0; reg--)
4403 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
4404 ? pr_live
4405 : (interrupt_handler && ! pragma_trapa)
4406 ? (/* Need to save all the regs ever live. */
4407 (regs_ever_live[reg]
4408 || (call_used_regs[reg]
4409 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG)
4410 && pr_live))
4411 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
4412 && reg != RETURN_ADDRESS_POINTER_REGNUM
4413 && reg != T_REG && reg != GBR_REG && reg != FPSCR_REG)
4414 : (/* Only push those regs which are used and need to be saved. */
4415 regs_ever_live[reg] && ! call_used_regs[reg]))
4417 live_regs_mask[reg / 32] |= 1 << (reg % 32);
4418 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
4420 if ((TARGET_SH4 || TARGET_SH5) && TARGET_FMOVD
4421 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
4423 if (FP_REGISTER_P (reg))
4425 if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
4427 live_regs_mask[(reg ^ 1) / 32] |= 1 << ((reg ^ 1) % 32);
4428 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
4431 else if (XD_REGISTER_P (reg))
4433 /* Must switch to double mode to access these registers. */
4434 target_flags &= ~FPU_SINGLE_BIT;
4440 *count_ptr = count;
4443 /* Code to generate prologue and epilogue sequences */
4445 /* PUSHED is the number of bytes that are bing pushed on the
4446 stack for register saves. Return the frame size, padded
4447 appropriately so that the stack stays properly aligned. */
4448 static HOST_WIDE_INT
4449 rounded_frame_size (pushed)
4450 int pushed;
4452 HOST_WIDE_INT size = get_frame_size ();
4453 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
4455 return ((size + pushed + align - 1) & -align) - pushed;
4458 /* Choose a call-clobbered target-branch register that remains
4459 unchanged along the whole function. We set it up as the return
4460 value in the prologue. */
4462 sh_media_register_for_return ()
4464 int regno;
4465 int tr0_used;
4467 if (! current_function_is_leaf)
4468 return -1;
4470 tr0_used = flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM];
4472 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
4473 if (call_used_regs[regno] && ! regs_ever_live[regno])
4474 return regno;
4476 return -1;
4479 void
4480 sh_expand_prologue ()
4482 HOST_WIDE_INT live_regs_mask[(FIRST_PSEUDO_REGISTER + 31) / 32];
4483 int d, i;
4484 int d_rounding = 0;
4485 int save_flags = target_flags;
4487 current_function_interrupt
4488 = lookup_attribute ("interrupt_handler",
4489 DECL_ATTRIBUTES (current_function_decl))
4490 != NULL_TREE;
4492 /* We have pretend args if we had an object sent partially in registers
4493 and partially on the stack, e.g. a large structure. */
4494 output_stack_adjust (-current_function_pretend_args_size
4495 - current_function_args_info.stack_regs * 8,
4496 stack_pointer_rtx, TARGET_SH5 ? 0 : 1, frame_insn);
4498 extra_push = 0;
4500 if (TARGET_SHCOMPACT && flag_pic && current_function_args_info.call_cookie)
4501 /* We're going to use the PIC register to load the address of the
4502 incoming-argument decoder and/or of the return trampoline from
4503 the GOT, so make sure the PIC register is preserved and
4504 initialized. */
4505 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
4507 if (TARGET_SHCOMPACT
4508 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
4510 int reg;
4512 /* First, make all registers with incoming arguments that will
4513 be pushed onto the stack live, so that register renaming
4514 doesn't overwrite them. */
4515 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
4516 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info.call_cookie)
4517 >= NPARM_REGS (SImode) - reg)
4518 for (; reg < NPARM_REGS (SImode); reg++)
4519 emit_insn (gen_shcompact_preserve_incoming_args
4520 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
4521 else if (CALL_COOKIE_INT_REG_GET
4522 (current_function_args_info.call_cookie, reg) == 1)
4523 emit_insn (gen_shcompact_preserve_incoming_args
4524 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
4526 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
4527 stack_pointer_rtx);
4528 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
4529 GEN_INT (current_function_args_info.call_cookie));
4530 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
4531 gen_rtx_REG (SImode, R0_REG));
4533 else if (TARGET_SHMEDIA)
4535 int tr = sh_media_register_for_return ();
4537 if (tr >= 0)
4539 rtx insn = emit_move_insn (gen_rtx_REG (DImode, tr),
4540 gen_rtx_REG (DImode, PR_MEDIA_REG));
4542 /* If this function only exits with sibcalls, this copy
4543 will be flagged as dead. */
4544 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
4545 const0_rtx,
4546 REG_NOTES (insn));
4550 /* Emit the code for SETUP_VARARGS. */
4551 if (current_function_varargs || current_function_stdarg)
4553 /* This is not used by the SH3E calling convention */
4554 if (TARGET_SH1 && ! TARGET_SH3E && ! TARGET_SH5 && ! TARGET_HITACHI)
4556 /* Push arg regs as if they'd been provided by caller in stack. */
4557 for (i = 0; i < NPARM_REGS(SImode); i++)
4559 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
4560 rtx insn;
4562 if (i >= (NPARM_REGS(SImode)
4563 - current_function_args_info.arg_count[(int) SH_ARG_INT]
4565 break;
4566 insn = push (rn);
4567 RTX_FRAME_RELATED_P (insn) = 0;
4568 extra_push += 4;
4573 /* If we're supposed to switch stacks at function entry, do so now. */
4574 if (sp_switch)
4575 emit_insn (gen_sp_switch_1 ());
4577 calc_live_regs (&d, live_regs_mask);
4578 /* ??? Maybe we could save some switching if we can move a mode switch
4579 that already happens to be at the function start into the prologue. */
4580 if (target_flags != save_flags)
4581 emit_insn (gen_toggle_sz ());
4583 if (TARGET_SH5)
4585 int i;
4586 int offset;
4587 int align;
4588 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
4589 int offset_in_r0 = -1;
4590 int sp_in_r0 = 0;
4592 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
4593 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
4594 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
4596 offset = d + d_rounding;
4597 output_stack_adjust (-offset, stack_pointer_rtx, 1, frame_insn);
4599 /* We loop twice: first, we save 8-byte aligned registers in the
4600 higher addresses, that are known to be aligned. Then, we
4601 proceed to saving 32-bit registers that don't need 8-byte
4602 alignment. */
4603 for (align = 1; align >= 0; align--)
4604 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
4605 if (live_regs_mask[i/32] & (1 << (i % 32)))
4607 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
4608 int reg = i;
4609 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
4611 if (mode == SFmode && (i % 2) == 1
4612 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
4613 && (live_regs_mask[(i ^ 1) / 32] & (1 << ((i ^ 1) % 32))))
4615 mode = DFmode;
4616 i--;
4617 reg--;
4620 /* If we're doing the aligned pass and this is not aligned,
4621 or we're doing the unaligned pass and this is aligned,
4622 skip it. */
4623 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT)
4624 == 0) != align)
4625 continue;
4627 offset -= GET_MODE_SIZE (mode);
4629 reg_rtx = gen_rtx_REG (mode, reg);
4631 mem_rtx = gen_rtx_MEM (mode,
4632 gen_rtx_PLUS (Pmode,
4633 stack_pointer_rtx,
4634 GEN_INT (offset)));
4636 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
4638 mem_rtx = NULL_RTX;
4640 try_pre_dec:
4642 if (HAVE_PRE_DECREMENT
4643 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
4644 || mem_rtx == NULL_RTX
4645 || i == PR_REG || SPECIAL_REGISTER_P (i)))
4647 pre_dec = gen_rtx_MEM (mode,
4648 gen_rtx_PRE_DEC (Pmode, r0));
4650 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
4651 pre_dec_ok);
4653 pre_dec = NULL_RTX;
4655 break;
4657 pre_dec_ok:
4658 mem_rtx = NULL_RTX;
4659 offset += GET_MODE_SIZE (mode);
4661 while (0);
4663 if (mem_rtx != NULL_RTX)
4664 goto addr_ok;
4666 if (offset_in_r0 == -1)
4668 emit_move_insn (r0, GEN_INT (offset));
4669 offset_in_r0 = offset;
4671 else if (offset != offset_in_r0)
4673 emit_move_insn (r0,
4674 gen_rtx_PLUS
4675 (Pmode, r0,
4676 GEN_INT (offset - offset_in_r0)));
4677 offset_in_r0 += offset - offset_in_r0;
4680 if (pre_dec != NULL_RTX)
4682 if (! sp_in_r0)
4684 emit_move_insn (r0,
4685 gen_rtx_PLUS
4686 (Pmode, r0, stack_pointer_rtx));
4687 sp_in_r0 = 1;
4690 offset -= GET_MODE_SIZE (mode);
4691 offset_in_r0 -= GET_MODE_SIZE (mode);
4693 mem_rtx = pre_dec;
4695 else if (sp_in_r0)
4696 mem_rtx = gen_rtx_MEM (mode, r0);
4697 else
4698 mem_rtx = gen_rtx_MEM (mode,
4699 gen_rtx_PLUS (Pmode,
4700 stack_pointer_rtx,
4701 r0));
4703 /* We must not use an r0-based address for target-branch
4704 registers or for special registers without pre-dec
4705 memory addresses, since we store their values in r0
4706 first. */
4707 if (TARGET_REGISTER_P (i)
4708 || ((i == PR_REG || SPECIAL_REGISTER_P (i))
4709 && mem_rtx != pre_dec))
4710 abort ();
4712 addr_ok:
4713 if (TARGET_REGISTER_P (i)
4714 || ((i == PR_REG || SPECIAL_REGISTER_P (i))
4715 && mem_rtx != pre_dec))
4717 rtx r0mode = gen_rtx_REG (GET_MODE (reg_rtx), R0_REG);
4719 emit_move_insn (r0mode, reg_rtx);
4721 offset_in_r0 = -1;
4722 sp_in_r0 = 0;
4724 reg_rtx = r0mode;
4727 emit_move_insn (mem_rtx, reg_rtx);
4730 if (offset != d_rounding)
4731 abort ();
4733 else
4734 push_regs (live_regs_mask);
4736 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
4738 rtx insn = get_last_insn ();
4739 rtx last = emit_insn (gen_GOTaddr2picreg ());
4741 /* Mark these insns as possibly dead. Sometimes, flow2 may
4742 delete all uses of the PIC register. In this case, let it
4743 delete the initialization too. */
4746 insn = NEXT_INSN (insn);
4748 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
4749 const0_rtx,
4750 REG_NOTES (insn));
4752 while (insn != last);
4755 if (SHMEDIA_REGS_STACK_ADJUST ())
4757 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
4758 gen_rtx_SYMBOL_REF (Pmode,
4759 TARGET_FPU_ANY
4760 ? "__GCC_push_shmedia_regs"
4761 : "__GCC_push_shmedia_regs_nofpu"));
4762 /* This must NOT go through the PLT, otherwise mach and macl
4763 may be clobbered. */
4764 emit_insn (gen_shmedia_save_restore_regs_compact
4765 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
4768 if (target_flags != save_flags)
4770 rtx insn = emit_insn (gen_toggle_sz ());
4772 /* If we're lucky, a mode switch in the function body will
4773 overwrite fpscr, turning this insn dead. Tell flow this
4774 insn is ok to delete. */
4775 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
4776 const0_rtx,
4777 REG_NOTES (insn));
4780 target_flags = save_flags;
4782 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
4783 stack_pointer_rtx, TARGET_SH5 ? 0 : 1, frame_insn);
4785 if (frame_pointer_needed)
4786 frame_insn (GEN_MOV (frame_pointer_rtx, stack_pointer_rtx));
4788 if (TARGET_SHCOMPACT
4789 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
4791 /* This must NOT go through the PLT, otherwise mach and macl
4792 may be clobbered. */
4793 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
4794 gen_rtx_SYMBOL_REF (Pmode,
4795 "__GCC_shcompact_incoming_args"));
4796 emit_insn (gen_shcompact_incoming_args ());
4800 void
4801 sh_expand_epilogue ()
4803 HOST_WIDE_INT live_regs_mask[(FIRST_PSEUDO_REGISTER + 31) / 32];
4804 int d, i;
4805 int d_rounding = 0;
4807 int save_flags = target_flags;
4808 int frame_size;
4810 calc_live_regs (&d, live_regs_mask);
4812 if (TARGET_SH5 && d % (STACK_BOUNDARY / BITS_PER_UNIT))
4813 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
4814 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
4816 frame_size = rounded_frame_size (d) - d_rounding;
4818 if (frame_pointer_needed)
4820 output_stack_adjust (frame_size, frame_pointer_rtx, 7, emit_insn);
4822 /* We must avoid moving the stack pointer adjustment past code
4823 which reads from the local frame, else an interrupt could
4824 occur after the SP adjustment and clobber data in the local
4825 frame. */
4826 emit_insn (gen_blockage ());
4827 emit_insn (GEN_MOV (stack_pointer_rtx, frame_pointer_rtx));
4829 else if (frame_size)
4831 /* We must avoid moving the stack pointer adjustment past code
4832 which reads from the local frame, else an interrupt could
4833 occur after the SP adjustment and clobber data in the local
4834 frame. */
4835 emit_insn (gen_blockage ());
4836 output_stack_adjust (frame_size, stack_pointer_rtx, 7, emit_insn);
4839 if (SHMEDIA_REGS_STACK_ADJUST ())
4841 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
4842 gen_rtx_SYMBOL_REF (Pmode,
4843 TARGET_FPU_ANY
4844 ? "__GCC_pop_shmedia_regs"
4845 : "__GCC_pop_shmedia_regs_nofpu"));
4846 /* This must NOT go through the PLT, otherwise mach and macl
4847 may be clobbered. */
4848 emit_insn (gen_shmedia_save_restore_regs_compact
4849 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
4852 /* Pop all the registers. */
4854 if (target_flags != save_flags)
4855 emit_insn (gen_toggle_sz ());
4856 if (TARGET_SH5)
4858 int offset = d_rounding;
4859 int offset_in_r0 = -1;
4860 int sp_in_r0 = 0;
4861 int align;
4862 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
4864 /* We loop twice: first, we save 8-byte aligned registers in the
4865 higher addresses, that are known to be aligned. Then, we
4866 proceed to saving 32-bit registers that don't need 8-byte
4867 alignment. */
4868 for (align = 0; align <= 1; align++)
4869 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4870 if (live_regs_mask[i/32] & (1 << (i % 32)))
4872 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
4873 int reg = i;
4874 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
4876 if (mode == SFmode && (i % 2) == 0
4877 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
4878 && (live_regs_mask[(i ^ 1) / 32] & (1 << ((i ^ 1) % 32))))
4880 mode = DFmode;
4881 i++;
4884 /* If we're doing the aligned pass and this is not aligned,
4885 or we're doing the unaligned pass and this is aligned,
4886 skip it. */
4887 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT)
4888 == 0) != align)
4889 continue;
4891 reg_rtx = gen_rtx_REG (mode, reg);
4893 mem_rtx = gen_rtx_MEM (mode,
4894 gen_rtx_PLUS (Pmode,
4895 stack_pointer_rtx,
4896 GEN_INT (offset)));
4898 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
4900 mem_rtx = NULL_RTX;
4902 try_post_inc:
4904 if (HAVE_POST_INCREMENT
4905 && (offset == offset_in_r0
4906 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
4907 && mem_rtx == NULL_RTX)
4908 || i == PR_REG || SPECIAL_REGISTER_P (i)))
4910 post_inc = gen_rtx_MEM (mode,
4911 gen_rtx_POST_INC (Pmode, r0));
4913 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
4914 post_inc_ok);
4916 post_inc = NULL_RTX;
4918 break;
4920 post_inc_ok:
4921 mem_rtx = NULL_RTX;
4923 while (0);
4925 if (mem_rtx != NULL_RTX)
4926 goto addr_ok;
4928 if (offset_in_r0 == -1)
4930 emit_move_insn (r0, GEN_INT (offset));
4931 offset_in_r0 = offset;
4933 else if (offset != offset_in_r0)
4935 emit_move_insn (r0,
4936 gen_rtx_PLUS
4937 (Pmode, r0,
4938 GEN_INT (offset - offset_in_r0)));
4939 offset_in_r0 += offset - offset_in_r0;
4942 if (post_inc != NULL_RTX)
4944 if (! sp_in_r0)
4946 emit_move_insn (r0,
4947 gen_rtx_PLUS
4948 (Pmode, r0, stack_pointer_rtx));
4949 sp_in_r0 = 1;
4952 mem_rtx = post_inc;
4954 offset_in_r0 += GET_MODE_SIZE (mode);
4956 else if (sp_in_r0)
4957 mem_rtx = gen_rtx_MEM (mode, r0);
4958 else
4959 mem_rtx = gen_rtx_MEM (mode,
4960 gen_rtx_PLUS (Pmode,
4961 stack_pointer_rtx,
4962 r0));
4964 if ((i == PR_REG || SPECIAL_REGISTER_P (i))
4965 && mem_rtx != post_inc)
4966 abort ();
4968 addr_ok:
4969 if ((i == PR_REG || SPECIAL_REGISTER_P (i))
4970 && mem_rtx != post_inc)
4972 insn = emit_move_insn (r0, mem_rtx);
4973 mem_rtx = r0;
4975 else if (TARGET_REGISTER_P (i))
4977 rtx r1 = gen_rtx_REG (mode, R1_REG);
4979 insn = emit_move_insn (r1, mem_rtx);
4980 mem_rtx = r1;
4983 insn = emit_move_insn (reg_rtx, mem_rtx);
4985 offset += GET_MODE_SIZE (mode);
4988 if (offset != d + d_rounding)
4989 abort ();
4991 goto finish;
4993 else
4994 d = 0;
4995 if (live_regs_mask[PR_REG / 32] & (1 << (PR_REG % 32)))
4996 pop (PR_REG);
4997 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4999 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
5001 if (j != PR_REG && live_regs_mask[j / 32] & (1 << (j % 32)))
5002 pop (j);
5004 finish:
5005 if (target_flags != save_flags)
5006 emit_insn (gen_toggle_sz ());
5007 target_flags = save_flags;
5009 output_stack_adjust (extra_push + current_function_pretend_args_size
5010 + d + d_rounding
5011 + current_function_args_info.stack_regs * 8,
5012 stack_pointer_rtx, 7, emit_insn);
5014 /* Switch back to the normal stack if necessary. */
5015 if (sp_switch)
5016 emit_insn (gen_sp_switch_2 ());
5018 /* Tell flow the insn that pops PR isn't dead. */
5019 /* PR_REG will never be live in SHmedia mode, and we don't need to
5020 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
5021 by the return pattern. */
5022 if (live_regs_mask[PR_REG / 32] & (1 << (PR_REG % 32)))
5023 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
5026 static int sh_need_epilogue_known = 0;
5029 sh_need_epilogue ()
5031 if (! sh_need_epilogue_known)
5033 rtx epilogue;
5035 start_sequence ();
5036 sh_expand_epilogue ();
5037 epilogue = get_insns ();
5038 end_sequence ();
5039 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
5041 return sh_need_epilogue_known > 0;
5044 /* Clear variables at function end. */
5046 static void
5047 sh_output_function_epilogue (file, size)
5048 FILE *file ATTRIBUTE_UNUSED;
5049 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
5051 trap_exit = pragma_interrupt = pragma_trapa = pragma_nosave_low_regs = 0;
5052 sh_need_epilogue_known = 0;
5053 sp_switch = NULL_RTX;
5057 sh_builtin_saveregs ()
5059 /* First unnamed integer register. */
5060 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
5061 /* Number of integer registers we need to save. */
5062 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
5063 /* First unnamed SFmode float reg */
5064 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
5065 /* Number of SFmode float regs to save. */
5066 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
5067 rtx regbuf, fpregs;
5068 int bufsize, regno;
5069 HOST_WIDE_INT alias_set;
5071 if (TARGET_SH5)
5073 if (n_intregs)
5075 int pushregs = n_intregs;
5077 while (pushregs < NPARM_REGS (SImode) - 1
5078 && (CALL_COOKIE_INT_REG_GET
5079 (current_function_args_info.call_cookie,
5080 NPARM_REGS (SImode) - pushregs)
5081 == 1))
5083 current_function_args_info.call_cookie
5084 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
5085 - pushregs, 1);
5086 pushregs++;
5089 if (pushregs == NPARM_REGS (SImode))
5090 current_function_args_info.call_cookie
5091 |= (CALL_COOKIE_INT_REG (0, 1)
5092 | CALL_COOKIE_STACKSEQ (pushregs - 1));
5093 else
5094 current_function_args_info.call_cookie
5095 |= CALL_COOKIE_STACKSEQ (pushregs);
5097 current_function_pretend_args_size += 8 * n_intregs;
5099 if (TARGET_SHCOMPACT)
5100 return const0_rtx;
5103 if (! TARGET_SH3E && ! TARGET_SH4 && ! TARGET_SH5)
5105 error ("__builtin_saveregs not supported by this subtarget");
5106 return const0_rtx;
5109 if (TARGET_SHMEDIA)
5110 n_floatregs = 0;
5112 /* Allocate block of memory for the regs. */
5113 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
5114 Or can assign_stack_local accept a 0 SIZE argument? */
5115 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
5117 if (TARGET_SHMEDIA)
5118 regbuf = gen_rtx_MEM (BLKmode,
5119 gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
5120 else if (n_floatregs & 1)
5122 rtx addr;
5124 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
5125 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
5126 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
5127 regbuf = change_address (regbuf, BLKmode, addr);
5129 else
5130 regbuf = assign_stack_local (BLKmode, bufsize, 0);
5131 alias_set = get_varargs_alias_set ();
5132 set_mem_alias_set (regbuf, alias_set);
5134 /* Save int args.
5135 This is optimized to only save the regs that are necessary. Explicitly
5136 named args need not be saved. */
5137 if (n_intregs > 0)
5138 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
5139 adjust_address (regbuf, BLKmode,
5140 n_floatregs * UNITS_PER_WORD),
5141 n_intregs, n_intregs * UNITS_PER_WORD);
5143 if (TARGET_SHMEDIA)
5144 /* Return the address of the regbuf. */
5145 return XEXP (regbuf, 0);
5147 /* Save float args.
5148 This is optimized to only save the regs that are necessary. Explicitly
5149 named args need not be saved.
5150 We explicitly build a pointer to the buffer because it halves the insn
5151 count when not optimizing (otherwise the pointer is built for each reg
5152 saved).
5153 We emit the moves in reverse order so that we can use predecrement. */
5155 fpregs = gen_reg_rtx (Pmode);
5156 emit_move_insn (fpregs, XEXP (regbuf, 0));
5157 emit_insn (gen_addsi3 (fpregs, fpregs,
5158 GEN_INT (n_floatregs * UNITS_PER_WORD)));
5159 if (TARGET_SH4)
5161 rtx mem;
5162 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
5164 emit_insn (gen_addsi3 (fpregs, fpregs,
5165 GEN_INT (-2 * UNITS_PER_WORD)));
5166 mem = gen_rtx_MEM (DFmode, fpregs);
5167 set_mem_alias_set (mem, alias_set);
5168 emit_move_insn (mem,
5169 gen_rtx (REG, DFmode, BASE_ARG_REG (DFmode) + regno));
5171 regno = first_floatreg;
5172 if (regno & 1)
5174 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD)));
5175 mem = gen_rtx_MEM (SFmode, fpregs);
5176 set_mem_alias_set (mem, alias_set);
5177 emit_move_insn (mem,
5178 gen_rtx (REG, SFmode, BASE_ARG_REG (SFmode) + regno
5179 - (TARGET_LITTLE_ENDIAN != 0)));
5182 else
5183 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
5185 rtx mem;
5187 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD)));
5188 mem = gen_rtx_MEM (SFmode, fpregs);
5189 set_mem_alias_set (mem, alias_set);
5190 emit_move_insn (mem,
5191 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
5194 /* Return the address of the regbuf. */
5195 return XEXP (regbuf, 0);
5198 /* Define the `__builtin_va_list' type for the ABI. */
5200 tree
5201 sh_build_va_list ()
5203 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
5204 tree record;
5206 if (TARGET_SH5 || (! TARGET_SH3E && ! TARGET_SH4) || TARGET_HITACHI)
5207 return ptr_type_node;
5209 record = make_node (RECORD_TYPE);
5211 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
5212 ptr_type_node);
5213 f_next_o_limit = build_decl (FIELD_DECL,
5214 get_identifier ("__va_next_o_limit"),
5215 ptr_type_node);
5216 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
5217 ptr_type_node);
5218 f_next_fp_limit = build_decl (FIELD_DECL,
5219 get_identifier ("__va_next_fp_limit"),
5220 ptr_type_node);
5221 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
5222 ptr_type_node);
5224 DECL_FIELD_CONTEXT (f_next_o) = record;
5225 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
5226 DECL_FIELD_CONTEXT (f_next_fp) = record;
5227 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
5228 DECL_FIELD_CONTEXT (f_next_stack) = record;
5230 TYPE_FIELDS (record) = f_next_o;
5231 TREE_CHAIN (f_next_o) = f_next_o_limit;
5232 TREE_CHAIN (f_next_o_limit) = f_next_fp;
5233 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
5234 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
5236 layout_type (record);
5238 return record;
5241 /* Implement `va_start' for varargs and stdarg. */
5243 void
5244 sh_va_start (stdarg_p, valist, nextarg)
5245 int stdarg_p;
5246 tree valist;
5247 rtx nextarg;
5249 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
5250 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
5251 tree t, u;
5252 int nfp, nint;
5254 if (TARGET_SH5)
5256 expand_builtin_saveregs ();
5257 /* When the varargs dummy argument is ``passed'' on a register,
5258 we don't want std_expand_builtin_va_start() to apply any
5259 correction for it, so set stdarg_p so as to pretend there's
5260 no such dummy argument. */
5261 if (current_function_args_info.arg_count[(int) SH_ARG_INT]
5262 < NPARM_REGS (SImode))
5263 stdarg_p = 1;
5264 std_expand_builtin_va_start (stdarg_p, valist, nextarg);
5265 return;
5268 if ((! TARGET_SH3E && ! TARGET_SH4) || TARGET_HITACHI)
5270 std_expand_builtin_va_start (stdarg_p, valist, nextarg);
5271 return;
5274 f_next_o = TYPE_FIELDS (va_list_type_node);
5275 f_next_o_limit = TREE_CHAIN (f_next_o);
5276 f_next_fp = TREE_CHAIN (f_next_o_limit);
5277 f_next_fp_limit = TREE_CHAIN (f_next_fp);
5278 f_next_stack = TREE_CHAIN (f_next_fp_limit);
5280 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o);
5281 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
5282 valist, f_next_o_limit);
5283 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp);
5284 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
5285 valist, f_next_fp_limit);
5286 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
5287 valist, f_next_stack);
5289 /* Call __builtin_saveregs. */
5290 u = make_tree (ptr_type_node, expand_builtin_saveregs ());
5291 t = build (MODIFY_EXPR, ptr_type_node, next_fp, u);
5292 TREE_SIDE_EFFECTS (t) = 1;
5293 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5295 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
5296 if (nfp < 8)
5297 nfp = 8 - nfp;
5298 else
5299 nfp = 0;
5300 u = fold (build (PLUS_EXPR, ptr_type_node, u,
5301 build_int_2 (UNITS_PER_WORD * nfp, 0)));
5302 t = build (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
5303 TREE_SIDE_EFFECTS (t) = 1;
5304 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5306 t = build (MODIFY_EXPR, ptr_type_node, next_o, u);
5307 TREE_SIDE_EFFECTS (t) = 1;
5308 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5310 nint = current_function_args_info.arg_count[SH_ARG_INT];
5311 if (nint < 4)
5312 nint = 4 - nint;
5313 else
5314 nint = 0;
5315 u = fold (build (PLUS_EXPR, ptr_type_node, u,
5316 build_int_2 (UNITS_PER_WORD * nint, 0)));
5317 t = build (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
5318 TREE_SIDE_EFFECTS (t) = 1;
5319 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5321 u = make_tree (ptr_type_node, nextarg);
5322 if (! stdarg_p && (nint == 0 || nfp == 0))
5324 u = fold (build (PLUS_EXPR, ptr_type_node, u,
5325 build_int_2 (-UNITS_PER_WORD, -1)));
5327 t = build (MODIFY_EXPR, ptr_type_node, next_stack, u);
5328 TREE_SIDE_EFFECTS (t) = 1;
5329 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5332 /* Implement `va_arg'. */
5335 sh_va_arg (valist, type)
5336 tree valist, type;
5338 HOST_WIDE_INT size, rsize;
5339 tree tmp, pptr_type_node;
5340 rtx addr_rtx, r;
5341 rtx result;
5342 int pass_by_ref = MUST_PASS_IN_STACK (TYPE_MODE (type), type);
5344 size = int_size_in_bytes (type);
5345 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5346 pptr_type_node = build_pointer_type (ptr_type_node);
5348 if (pass_by_ref)
5349 type = build_pointer_type (type);
5351 if (! TARGET_SH5 && (TARGET_SH3E || TARGET_SH4) && ! TARGET_HITACHI)
5353 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
5354 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
5355 int pass_as_float;
5356 rtx lab_false, lab_over;
5358 f_next_o = TYPE_FIELDS (va_list_type_node);
5359 f_next_o_limit = TREE_CHAIN (f_next_o);
5360 f_next_fp = TREE_CHAIN (f_next_o_limit);
5361 f_next_fp_limit = TREE_CHAIN (f_next_fp);
5362 f_next_stack = TREE_CHAIN (f_next_fp_limit);
5364 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o);
5365 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
5366 valist, f_next_o_limit);
5367 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp),
5368 valist, f_next_fp);
5369 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
5370 valist, f_next_fp_limit);
5371 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
5372 valist, f_next_stack);
5374 if (TARGET_SH4)
5376 pass_as_float = ((TREE_CODE (type) == REAL_TYPE && size <= 8)
5377 || (TREE_CODE (type) == COMPLEX_TYPE
5378 && TREE_CODE (TREE_TYPE (type)) == REAL_TYPE
5379 && size <= 16));
5381 else
5383 pass_as_float = (TREE_CODE (type) == REAL_TYPE && size == 4);
5386 addr_rtx = gen_reg_rtx (Pmode);
5387 lab_false = gen_label_rtx ();
5388 lab_over = gen_label_rtx ();
5390 if (pass_as_float)
5392 int first_floatreg
5393 = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
5394 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
5396 emit_cmp_and_jump_insns (expand_expr (next_fp, NULL_RTX, Pmode,
5397 EXPAND_NORMAL),
5398 expand_expr (next_fp_limit, NULL_RTX,
5399 Pmode, EXPAND_NORMAL),
5400 GE, const1_rtx, Pmode, 1, lab_false);
5402 if (TYPE_ALIGN (type) > BITS_PER_WORD
5403 || (((TREE_CODE (type) == REAL_TYPE && size == 8) || size == 16)
5404 && (n_floatregs & 1)))
5406 tmp = build (BIT_AND_EXPR, ptr_type_node, next_fp,
5407 build_int_2 (UNITS_PER_WORD, 0));
5408 tmp = build (PLUS_EXPR, ptr_type_node, next_fp, tmp);
5409 tmp = build (MODIFY_EXPR, ptr_type_node, next_fp, tmp);
5410 TREE_SIDE_EFFECTS (tmp) = 1;
5411 expand_expr (tmp, const0_rtx, VOIDmode, EXPAND_NORMAL);
5414 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
5415 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
5416 if (r != addr_rtx)
5417 emit_move_insn (addr_rtx, r);
5419 emit_jump_insn (gen_jump (lab_over));
5420 emit_barrier ();
5421 emit_label (lab_false);
5423 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
5424 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
5425 if (r != addr_rtx)
5426 emit_move_insn (addr_rtx, r);
5428 else
5430 tmp = build (PLUS_EXPR, ptr_type_node, next_o,
5431 build_int_2 (rsize, 0));
5433 emit_cmp_and_jump_insns (expand_expr (tmp, NULL_RTX, Pmode,
5434 EXPAND_NORMAL),
5435 expand_expr (next_o_limit, NULL_RTX,
5436 Pmode, EXPAND_NORMAL),
5437 GT, const1_rtx, Pmode, 1, lab_false);
5439 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
5440 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
5441 if (r != addr_rtx)
5442 emit_move_insn (addr_rtx, r);
5444 emit_jump_insn (gen_jump (lab_over));
5445 emit_barrier ();
5446 emit_label (lab_false);
5448 if (size > 4 && ! TARGET_SH4)
5450 tmp = build (MODIFY_EXPR, ptr_type_node, next_o, next_o_limit);
5451 TREE_SIDE_EFFECTS (tmp) = 1;
5452 expand_expr (tmp, const0_rtx, VOIDmode, EXPAND_NORMAL);
5455 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
5456 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
5457 if (r != addr_rtx)
5458 emit_move_insn (addr_rtx, r);
5461 emit_label (lab_over);
5463 tmp = make_tree (pptr_type_node, addr_rtx);
5464 valist = build1 (INDIRECT_REF, ptr_type_node, tmp);
5467 /* ??? In va-sh.h, there had been code to make values larger than
5468 size 8 indirect. This does not match the FUNCTION_ARG macros. */
5470 result = std_expand_builtin_va_arg (valist, type);
5471 if (pass_by_ref)
5473 #ifdef POINTERS_EXTEND_UNSIGNED
5474 if (GET_MODE (addr) != Pmode)
5475 addr = convert_memory_address (Pmode, result);
5476 #endif
5477 result = gen_rtx_MEM (ptr_mode, force_reg (Pmode, result));
5478 set_mem_alias_set (result, get_varargs_alias_set ());
5480 /* ??? expand_builtin_va_arg will also set the alias set of the dereferenced
5481 argument to the varargs alias set. */
5482 return result;
5485 /* Define the offset between two registers, one to be eliminated, and
5486 the other its replacement, at the start of a routine. */
5489 initial_elimination_offset (from, to)
5490 int from;
5491 int to;
5493 int regs_saved;
5494 int regs_saved_rounding = 0;
5495 int total_saved_regs_space;
5496 int total_auto_space;
5497 int save_flags = target_flags;
5498 int copy_flags;
5500 HOST_WIDE_INT live_regs_mask[(FIRST_PSEUDO_REGISTER + 31) / 32];
5501 calc_live_regs (&regs_saved, live_regs_mask);
5502 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
5503 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
5504 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5505 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
5507 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
5508 copy_flags = target_flags;
5509 target_flags = save_flags;
5511 total_saved_regs_space = regs_saved + regs_saved_rounding;
5513 if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
5514 return total_saved_regs_space + total_auto_space
5515 + current_function_args_info.byref_regs * 8;
5517 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
5518 return total_saved_regs_space + total_auto_space
5519 + current_function_args_info.byref_regs * 8;
5521 /* Initial gap between fp and sp is 0. */
5522 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
5523 return 0;
5525 if (from == RETURN_ADDRESS_POINTER_REGNUM
5526 && (to == FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM))
5527 if (TARGET_SH5)
5529 int i, n = total_saved_regs_space;
5530 int align;
5531 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
5533 n += total_auto_space;
5535 /* If it wasn't saved, there's not much we can do. */
5536 if ((live_regs_mask[pr_reg / 32] & (1 << (pr_reg % 32))) == 0)
5537 return n;
5539 target_flags = copy_flags;
5541 /* We loop twice: first, check 8-byte aligned registers,
5542 that are stored in the higher addresses, that are known
5543 to be aligned. Then, check 32-bit registers that don't
5544 need 8-byte alignment. */
5545 for (align = 1; align >= 0; align--)
5546 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
5547 if (live_regs_mask[i/32] & (1 << (i % 32)))
5549 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5551 if (mode == SFmode && (i % 2) == 1
5552 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5553 && (live_regs_mask[(i ^ 1) / 32]
5554 & (1 << ((i ^ 1) % 32))))
5556 mode = DFmode;
5557 i--;
5560 /* If we're doing the aligned pass and this is not aligned,
5561 or we're doing the unaligned pass and this is aligned,
5562 skip it. */
5563 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT)
5564 == 0) != align)
5565 continue;
5567 n -= GET_MODE_SIZE (mode);
5569 if (i == pr_reg)
5571 target_flags = save_flags;
5572 return n;
5576 abort ();
5578 else
5579 return total_auto_space;
5581 abort ();
5584 /* Handle machine specific pragmas to be semi-compatible with Hitachi
5585 compiler. */
5587 void
5588 sh_pr_interrupt (pfile)
5589 cpp_reader *pfile ATTRIBUTE_UNUSED;
5591 pragma_interrupt = 1;
5594 void
5595 sh_pr_trapa (pfile)
5596 cpp_reader *pfile ATTRIBUTE_UNUSED;
5598 pragma_interrupt = pragma_trapa = 1;
5601 void
5602 sh_pr_nosave_low_regs (pfile)
5603 cpp_reader *pfile ATTRIBUTE_UNUSED;
5605 pragma_nosave_low_regs = 1;
5608 /* Generate 'handle_interrupt' attribute for decls */
5610 static void
5611 sh_insert_attributes (node, attributes)
5612 tree node;
5613 tree * attributes;
5615 if (! pragma_interrupt
5616 || TREE_CODE (node) != FUNCTION_DECL)
5617 return;
5619 /* We are only interested in fields. */
5620 if (TREE_CODE_CLASS (TREE_CODE (node)) != 'd')
5621 return;
5623 /* Add a 'handle_interrupt' attribute. */
5624 * attributes = tree_cons (get_identifier ("interrupt_handler"), NULL, * attributes);
5626 return;
5629 /* Supported attributes:
5631 interrupt_handler -- specifies this function is an interrupt handler.
5633 sp_switch -- specifies an alternate stack for an interrupt handler
5634 to run on.
5636 trap_exit -- use a trapa to exit an interrupt function instead of
5637 an rte instruction. */
5639 const struct attribute_spec sh_attribute_table[] =
5641 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
5642 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
5643 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
5644 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
5645 { NULL, 0, 0, false, false, false, NULL }
5648 /* Handle an "interrupt_handler" attribute; arguments as in
5649 struct attribute_spec.handler. */
5650 static tree
5651 sh_handle_interrupt_handler_attribute (node, name, args, flags, no_add_attrs)
5652 tree *node;
5653 tree name;
5654 tree args ATTRIBUTE_UNUSED;
5655 int flags ATTRIBUTE_UNUSED;
5656 bool *no_add_attrs;
5658 if (TREE_CODE (*node) != FUNCTION_DECL)
5660 warning ("`%s' attribute only applies to functions",
5661 IDENTIFIER_POINTER (name));
5662 *no_add_attrs = true;
5665 return NULL_TREE;
5668 /* Handle an "sp_switch" attribute; arguments as in
5669 struct attribute_spec.handler. */
5670 static tree
5671 sh_handle_sp_switch_attribute (node, name, args, flags, no_add_attrs)
5672 tree *node;
5673 tree name;
5674 tree args;
5675 int flags ATTRIBUTE_UNUSED;
5676 bool *no_add_attrs;
5678 if (TREE_CODE (*node) != FUNCTION_DECL)
5680 warning ("`%s' attribute only applies to functions",
5681 IDENTIFIER_POINTER (name));
5682 *no_add_attrs = true;
5684 else if (!pragma_interrupt)
5686 /* The sp_switch attribute only has meaning for interrupt functions. */
5687 warning ("`%s' attribute only applies to interrupt functions",
5688 IDENTIFIER_POINTER (name));
5689 *no_add_attrs = true;
5691 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
5693 /* The argument must be a constant string. */
5694 warning ("`%s' attribute argument not a string constant",
5695 IDENTIFIER_POINTER (name));
5696 *no_add_attrs = true;
5698 else
5700 sp_switch = gen_rtx_SYMBOL_REF (VOIDmode,
5701 TREE_STRING_POINTER (TREE_VALUE (args)));
5704 return NULL_TREE;
5707 /* Handle an "trap_exit" attribute; arguments as in
5708 struct attribute_spec.handler. */
5709 static tree
5710 sh_handle_trap_exit_attribute (node, name, args, flags, no_add_attrs)
5711 tree *node;
5712 tree name;
5713 tree args;
5714 int flags ATTRIBUTE_UNUSED;
5715 bool *no_add_attrs;
5717 if (TREE_CODE (*node) != FUNCTION_DECL)
5719 warning ("`%s' attribute only applies to functions",
5720 IDENTIFIER_POINTER (name));
5721 *no_add_attrs = true;
5723 else if (!pragma_interrupt)
5725 /* The trap_exit attribute only has meaning for interrupt functions. */
5726 warning ("`%s' attribute only applies to interrupt functions",
5727 IDENTIFIER_POINTER (name));
5728 *no_add_attrs = true;
5730 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
5732 /* The argument must be a constant integer. */
5733 warning ("`%s' attribute argument not an integer constant",
5734 IDENTIFIER_POINTER (name));
5735 *no_add_attrs = true;
5737 else
5739 trap_exit = TREE_INT_CST_LOW (TREE_VALUE (args));
5742 return NULL_TREE;
5746 /* Predicates used by the templates. */
5748 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
5749 Used only in general_movsrc_operand. */
5752 system_reg_operand (op, mode)
5753 rtx op;
5754 enum machine_mode mode ATTRIBUTE_UNUSED;
5756 switch (REGNO (op))
5758 case PR_REG:
5759 case MACL_REG:
5760 case MACH_REG:
5761 return 1;
5763 return 0;
5766 /* Returns 1 if OP can be source of a simple move operation.
5767 Same as general_operand, but a LABEL_REF is valid, PRE_DEC is
5768 invalid as are subregs of system registers. */
5771 general_movsrc_operand (op, mode)
5772 rtx op;
5773 enum machine_mode mode;
5775 if (GET_CODE (op) == MEM)
5777 rtx inside = XEXP (op, 0);
5778 if (GET_CODE (inside) == CONST)
5779 inside = XEXP (inside, 0);
5781 if (GET_CODE (inside) == LABEL_REF)
5782 return 1;
5784 if (GET_CODE (inside) == PLUS
5785 && GET_CODE (XEXP (inside, 0)) == LABEL_REF
5786 && GET_CODE (XEXP (inside, 1)) == CONST_INT)
5787 return 1;
5789 /* Only post inc allowed. */
5790 if (GET_CODE (inside) == PRE_DEC)
5791 return 0;
5794 if ((mode == QImode || mode == HImode)
5795 && (GET_CODE (op) == SUBREG
5796 && GET_CODE (XEXP (op, 0)) == REG
5797 && system_reg_operand (XEXP (op, 0), mode)))
5798 return 0;
5800 return general_operand (op, mode);
5803 /* Returns 1 if OP can be a destination of a move.
5804 Same as general_operand, but no preinc allowed. */
5807 general_movdst_operand (op, mode)
5808 rtx op;
5809 enum machine_mode mode;
5811 /* Only pre dec allowed. */
5812 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == POST_INC)
5813 return 0;
5815 return general_operand (op, mode);
5818 /* Accept a register, but not a subreg of any kind. This allows us to
5819 avoid pathological cases in reload wrt data movement common in
5820 int->fp conversion. */
5823 reg_no_subreg_operand (op, mode)
5824 register rtx op;
5825 enum machine_mode mode;
5827 if (GET_CODE (op) == SUBREG)
5828 return 0;
5829 return register_operand (op, mode);
5832 /* Returns 1 if OP is a normal arithmetic register. */
5835 arith_reg_operand (op, mode)
5836 rtx op;
5837 enum machine_mode mode;
5839 if (register_operand (op, mode))
5841 int regno;
5843 if (GET_CODE (op) == REG)
5844 regno = REGNO (op);
5845 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
5846 regno = REGNO (SUBREG_REG (op));
5847 else
5848 return 1;
5850 return (regno != T_REG && regno != PR_REG
5851 && ! TARGET_REGISTER_P (regno)
5852 && (regno != FPUL_REG || TARGET_SH4)
5853 && regno != MACH_REG && regno != MACL_REG);
5855 return 0;
5859 fp_arith_reg_operand (op, mode)
5860 rtx op;
5861 enum machine_mode mode;
5863 if (register_operand (op, mode))
5865 int regno;
5867 if (GET_CODE (op) == REG)
5868 regno = REGNO (op);
5869 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
5870 regno = REGNO (SUBREG_REG (op));
5871 else
5872 return 1;
5874 return (regno >= FIRST_PSEUDO_REGISTER
5875 || FP_REGISTER_P (regno));
5877 return 0;
5880 /* Returns 1 if OP is a valid source operand for an arithmetic insn. */
5883 arith_operand (op, mode)
5884 rtx op;
5885 enum machine_mode mode;
5887 if (arith_reg_operand (op, mode))
5888 return 1;
5890 if (TARGET_SHMEDIA)
5892 /* FIXME: We should be checking whether the CONST_INT fits in a
5893 CONST_OK_FOR_J here, but this causes reload_cse to crash when
5894 attempting to transform a sequence of two 64-bit sets of the
5895 same register from literal constants into a set and an add,
5896 when the difference is too wide for an add. */
5897 if (GET_CODE (op) == CONST_INT
5898 || EXTRA_CONSTRAINT_S (op))
5899 return 1;
5900 else
5901 return 0;
5903 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
5904 return 1;
5906 return 0;
5909 /* Returns 1 if OP is a valid source operand for a compare insn. */
5912 arith_reg_or_0_operand (op, mode)
5913 rtx op;
5914 enum machine_mode mode;
5916 if (arith_reg_operand (op, mode))
5917 return 1;
5919 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_N (INTVAL (op)))
5920 return 1;
5922 return 0;
5925 /* Return 1 if OP is a valid source operand for an SHmedia operation
5926 that takes either a register or a 6-bit immediate. */
5929 shmedia_6bit_operand (op, mode)
5930 rtx op;
5931 enum machine_mode mode;
5933 return (arith_reg_operand (op, mode)
5934 || (GET_CODE (op) == CONST_INT && CONST_OK_FOR_O (INTVAL (op))));
5937 /* Returns 1 if OP is a valid source operand for a logical operation. */
5940 logical_operand (op, mode)
5941 rtx op;
5942 enum machine_mode mode;
5944 if (arith_reg_operand (op, mode))
5945 return 1;
5947 if (TARGET_SHMEDIA)
5949 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_P (INTVAL (op)))
5950 return 1;
5951 else
5952 return 0;
5954 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
5955 return 1;
5957 return 0;
5960 /* Nonzero if OP is a floating point value with value 0.0. */
5963 fp_zero_operand (op)
5964 rtx op;
5966 REAL_VALUE_TYPE r;
5968 if (GET_MODE (op) != SFmode)
5969 return 0;
5971 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
5972 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
5975 /* Nonzero if OP is a floating point value with value 1.0. */
5978 fp_one_operand (op)
5979 rtx op;
5981 REAL_VALUE_TYPE r;
5983 if (GET_MODE (op) != SFmode)
5984 return 0;
5986 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
5987 return REAL_VALUES_EQUAL (r, dconst1);
5990 /* For -m4 and -m4-single-only, mode switching is used. If we are
5991 compiling without -mfmovd, movsf_ie isn't taken into account for
5992 mode switching. We could check in machine_dependent_reorg for
5993 cases where we know we are in single precision mode, but there is
5994 interface to find that out during reload, so we must avoid
5995 choosing an fldi alternative during reload and thus failing to
5996 allocate a scratch register for the constant loading. */
5998 fldi_ok ()
6000 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
6004 tertiary_reload_operand (op, mode)
6005 rtx op;
6006 enum machine_mode mode ATTRIBUTE_UNUSED;
6008 enum rtx_code code = GET_CODE (op);
6009 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
6013 fpscr_operand (op, mode)
6014 rtx op;
6015 enum machine_mode mode ATTRIBUTE_UNUSED;
6017 return (GET_CODE (op) == REG && REGNO (op) == FPSCR_REG
6018 && GET_MODE (op) == PSImode);
6022 fpul_operand (op, mode)
6023 rtx op;
6024 enum machine_mode mode;
6026 if (TARGET_SHMEDIA)
6027 return fp_arith_reg_operand (op, mode);
6029 return (GET_CODE (op) == REG
6030 && (REGNO (op) == FPUL_REG || REGNO (op) >= FIRST_PSEUDO_REGISTER)
6031 && GET_MODE (op) == mode);
6035 symbol_ref_operand (op, mode)
6036 rtx op;
6037 enum machine_mode mode ATTRIBUTE_UNUSED;
6039 return (GET_CODE (op) == SYMBOL_REF);
6043 commutative_float_operator (op, mode)
6044 rtx op;
6045 enum machine_mode mode;
6047 if (GET_MODE (op) != mode)
6048 return 0;
6049 switch (GET_CODE (op))
6051 case PLUS:
6052 case MULT:
6053 return 1;
6054 default:
6055 break;
6057 return 0;
6061 noncommutative_float_operator (op, mode)
6062 rtx op;
6063 enum machine_mode mode;
6065 if (GET_MODE (op) != mode)
6066 return 0;
6067 switch (GET_CODE (op))
6069 case MINUS:
6070 case DIV:
6071 return 1;
6072 default:
6073 break;
6075 return 0;
6079 binary_float_operator (op, mode)
6080 rtx op;
6081 enum machine_mode mode;
6083 if (GET_MODE (op) != mode)
6084 return 0;
6085 switch (GET_CODE (op))
6087 case PLUS:
6088 case MINUS:
6089 case MULT:
6090 case DIV:
6091 return 1;
6092 default:
6093 break;
6095 return 0;
6098 /* Accept pseudos and branch target registers. */
6100 target_reg_operand (op, mode)
6101 rtx op;
6102 enum machine_mode mode;
6104 if (mode != DImode
6105 || GET_MODE (op) != DImode)
6106 return 0;
6108 if (GET_CODE (op) == SUBREG)
6109 op = XEXP (op, 0);
6111 if (GET_CODE (op) != REG)
6112 return 0;
6114 /* We must protect ourselves from matching pseudos that are virtual
6115 register, because they will eventually be replaced with hardware
6116 registers that aren't branch-target registers. */
6117 if (REGNO (op) > LAST_VIRTUAL_REGISTER
6118 || TARGET_REGISTER_P (REGNO (op)))
6119 return 1;
6121 return 0;
6124 /* Same as target_reg_operand, except that label_refs and symbol_refs
6125 are accepted before reload. */
6127 target_operand (op, mode)
6128 rtx op;
6129 enum machine_mode mode;
6131 if (mode != DImode)
6132 return 0;
6134 if ((GET_MODE (op) == DImode || GET_MODE (op) == VOIDmode)
6135 && EXTRA_CONSTRAINT_T (op))
6136 return ! reload_completed;
6138 return target_reg_operand (op, mode);
6142 /* Return the destination address of a branch. */
6144 static int
6145 branch_dest (branch)
6146 rtx branch;
6148 rtx dest = SET_SRC (PATTERN (branch));
6149 int dest_uid;
6151 if (GET_CODE (dest) == IF_THEN_ELSE)
6152 dest = XEXP (dest, 1);
6153 dest = XEXP (dest, 0);
6154 dest_uid = INSN_UID (dest);
6155 return INSN_ADDRESSES (dest_uid);
6158 /* Return non-zero if REG is not used after INSN.
6159 We assume REG is a reload reg, and therefore does
6160 not live past labels. It may live past calls or jumps though. */
6162 reg_unused_after (reg, insn)
6163 rtx reg;
6164 rtx insn;
6166 enum rtx_code code;
6167 rtx set;
6169 /* If the reg is set by this instruction, then it is safe for our
6170 case. Disregard the case where this is a store to memory, since
6171 we are checking a register used in the store address. */
6172 set = single_set (insn);
6173 if (set && GET_CODE (SET_DEST (set)) != MEM
6174 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
6175 return 1;
6177 while ((insn = NEXT_INSN (insn)))
6179 code = GET_CODE (insn);
6181 #if 0
6182 /* If this is a label that existed before reload, then the register
6183 if dead here. However, if this is a label added by reorg, then
6184 the register may still be live here. We can't tell the difference,
6185 so we just ignore labels completely. */
6186 if (code == CODE_LABEL)
6187 return 1;
6188 /* else */
6189 #endif
6191 if (code == JUMP_INSN)
6192 return 0;
6194 /* If this is a sequence, we must handle them all at once.
6195 We could have for instance a call that sets the target register,
6196 and an insn in a delay slot that uses the register. In this case,
6197 we must return 0. */
6198 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
6200 int i;
6201 int retval = 0;
6203 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
6205 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
6206 rtx set = single_set (this_insn);
6208 if (GET_CODE (this_insn) == CALL_INSN)
6209 code = CALL_INSN;
6210 else if (GET_CODE (this_insn) == JUMP_INSN)
6212 if (INSN_ANNULLED_BRANCH_P (this_insn))
6213 return 0;
6214 code = JUMP_INSN;
6217 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
6218 return 0;
6219 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
6221 if (GET_CODE (SET_DEST (set)) != MEM)
6222 retval = 1;
6223 else
6224 return 0;
6226 if (set == 0
6227 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
6228 return 0;
6230 if (retval == 1)
6231 return 1;
6232 else if (code == JUMP_INSN)
6233 return 0;
6235 else if (GET_RTX_CLASS (code) == 'i')
6237 rtx set = single_set (insn);
6239 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
6240 return 0;
6241 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
6242 return GET_CODE (SET_DEST (set)) != MEM;
6243 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
6244 return 0;
6247 if (code == CALL_INSN && call_used_regs[REGNO (reg)])
6248 return 1;
6250 return 1;
6253 #include "ggc.h"
6255 static GTY(()) rtx fpscr_rtx;
6257 get_fpscr_rtx ()
6259 if (! fpscr_rtx)
6261 fpscr_rtx = gen_rtx (REG, PSImode, FPSCR_REG);
6262 REG_USERVAR_P (fpscr_rtx) = 1;
6263 mark_user_reg (fpscr_rtx);
6265 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
6266 mark_user_reg (fpscr_rtx);
6267 return fpscr_rtx;
6270 void
6271 emit_sf_insn (pat)
6272 rtx pat;
6274 emit_insn (pat);
6277 void
6278 emit_df_insn (pat)
6279 rtx pat;
6281 emit_insn (pat);
6284 void
6285 expand_sf_unop (fun, operands)
6286 rtx (*fun) PARAMS ((rtx, rtx, rtx));
6287 rtx *operands;
6289 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
6292 void
6293 expand_sf_binop (fun, operands)
6294 rtx (*fun) PARAMS ((rtx, rtx, rtx, rtx));
6295 rtx *operands;
6297 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
6298 get_fpscr_rtx ()));
6301 void
6302 expand_df_unop (fun, operands)
6303 rtx (*fun) PARAMS ((rtx, rtx, rtx));
6304 rtx *operands;
6306 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
6309 void
6310 expand_df_binop (fun, operands)
6311 rtx (*fun) PARAMS ((rtx, rtx, rtx, rtx));
6312 rtx *operands;
6314 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
6315 get_fpscr_rtx ()));
6318 /* ??? gcc does flow analysis strictly after common subexpression
6319 elimination. As a result, common subespression elimination fails
6320 when there are some intervening statements setting the same register.
6321 If we did nothing about this, this would hurt the precision switching
6322 for SH4 badly. There is some cse after reload, but it is unable to
6323 undo the extra register pressure from the unused instructions, and
6324 it cannot remove auto-increment loads.
6326 A C code example that shows this flow/cse weakness for (at least) SH
6327 and sparc (as of gcc ss-970706) is this:
6329 double
6330 f(double a)
6332 double d;
6333 d = 0.1;
6334 a += d;
6335 d = 1.1;
6336 d = 0.1;
6337 a *= d;
6338 return a;
6341 So we add another pass before common subexpression elimination, to
6342 remove assignments that are dead due to a following assignment in the
6343 same basic block. */
6345 static void
6346 mark_use (x, reg_set_block)
6347 rtx x, *reg_set_block;
6349 enum rtx_code code;
6351 if (! x)
6352 return;
6353 code = GET_CODE (x);
6354 switch (code)
6356 case REG:
6358 int regno = REGNO (x);
6359 int nregs = (regno < FIRST_PSEUDO_REGISTER
6360 ? HARD_REGNO_NREGS (regno, GET_MODE (x))
6361 : 1);
6364 reg_set_block[regno + nregs - 1] = 0;
6366 while (--nregs);
6367 break;
6369 case SET:
6371 rtx dest = SET_DEST (x);
6373 if (GET_CODE (dest) == SUBREG)
6374 dest = SUBREG_REG (dest);
6375 if (GET_CODE (dest) != REG)
6376 mark_use (dest, reg_set_block);
6377 mark_use (SET_SRC (x), reg_set_block);
6378 break;
6380 case CLOBBER:
6381 break;
6382 default:
6384 const char *fmt = GET_RTX_FORMAT (code);
6385 int i, j;
6386 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
6388 if (fmt[i] == 'e')
6389 mark_use (XEXP (x, i), reg_set_block);
6390 else if (fmt[i] == 'E')
6391 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
6392 mark_use (XVECEXP (x, i, j), reg_set_block);
6394 break;
6399 static rtx get_free_reg PARAMS ((HARD_REG_SET));
6401 /* This function returns a register to use to load the address to load
6402 the fpscr from. Currently it always returns r1 or r7, but when we are
6403 able to use pseudo registers after combine, or have a better mechanism
6404 for choosing a register, it should be done here. */
6405 /* REGS_LIVE is the liveness information for the point for which we
6406 need this allocation. In some bare-bones exit blocks, r1 is live at the
6407 start. We can even have all of r0..r3 being live:
6408 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
6409 INSN before which new insns are placed with will clobber the register
6410 we return. If a basic block consists only of setting the return value
6411 register to a pseudo and using that register, the return value is not
6412 live before or after this block, yet we we'll insert our insns right in
6413 the middle. */
6415 static rtx
6416 get_free_reg (regs_live)
6417 HARD_REG_SET regs_live;
6419 if (! TEST_HARD_REG_BIT (regs_live, 1))
6420 return gen_rtx_REG (Pmode, 1);
6422 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
6423 there shouldn't be anything but a jump before the function end. */
6424 if (! TEST_HARD_REG_BIT (regs_live, 7))
6425 return gen_rtx_REG (Pmode, 7);
6427 abort ();
6430 /* This function will set the fpscr from memory.
6431 MODE is the mode we are setting it to. */
6432 void
6433 fpscr_set_from_mem (mode, regs_live)
6434 int mode;
6435 HARD_REG_SET regs_live;
6437 enum attr_fp_mode fp_mode = mode;
6438 rtx addr_reg = get_free_reg (regs_live);
6440 if (fp_mode == (enum attr_fp_mode) NORMAL_MODE (FP_MODE))
6441 emit_insn (gen_fpu_switch1 (addr_reg));
6442 else
6443 emit_insn (gen_fpu_switch0 (addr_reg));
6446 /* Is the given character a logical line separator for the assembler? */
6447 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
6448 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
6449 #endif
6452 sh_insn_length_adjustment (insn)
6453 rtx insn;
6455 /* Instructions with unfilled delay slots take up an extra two bytes for
6456 the nop in the delay slot. */
6457 if (((GET_CODE (insn) == INSN
6458 && GET_CODE (PATTERN (insn)) != USE
6459 && GET_CODE (PATTERN (insn)) != CLOBBER)
6460 || GET_CODE (insn) == CALL_INSN
6461 || (GET_CODE (insn) == JUMP_INSN
6462 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
6463 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
6464 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
6465 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
6466 return 2;
6468 /* sh-dsp parallel processing insn take four bytes instead of two. */
6470 if (GET_CODE (insn) == INSN)
6472 int sum = 0;
6473 rtx body = PATTERN (insn);
6474 const char *template;
6475 char c;
6476 int maybe_label = 1;
6478 if (GET_CODE (body) == ASM_INPUT)
6479 template = XSTR (body, 0);
6480 else if (asm_noperands (body) >= 0)
6481 template
6482 = decode_asm_operands (body, NULL, NULL, NULL, NULL);
6483 else
6484 return 0;
6487 int ppi_adjust = 0;
6490 c = *template++;
6491 while (c == ' ' || c == '\t');
6492 /* all sh-dsp parallel-processing insns start with p.
6493 The only non-ppi sh insn starting with p is pref.
6494 The only ppi starting with pr is prnd. */
6495 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
6496 ppi_adjust = 2;
6497 /* The repeat pseudo-insn expands two three insns, a total of
6498 six bytes in size. */
6499 else if ((c == 'r' || c == 'R')
6500 && ! strncasecmp ("epeat", template, 5))
6501 ppi_adjust = 4;
6502 while (c && c != '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c))
6504 /* If this is a label, it is obviously not a ppi insn. */
6505 if (c == ':' && maybe_label)
6507 ppi_adjust = 0;
6508 break;
6510 else if (c == '\'' || c == '"')
6511 maybe_label = 0;
6512 c = *template++;
6514 sum += ppi_adjust;
6515 maybe_label = c != ':';
6517 while (c);
6518 return sum;
6520 return 0;
6523 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
6524 isn't protected by a PIC unspec. */
6526 nonpic_symbol_mentioned_p (x)
6527 rtx x;
6529 register const char *fmt;
6530 register int i;
6532 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
6533 || GET_CODE (x) == PC)
6534 return 1;
6536 /* We don't want to look into the possible MEM location of a
6537 CONST_DOUBLE, since we're not going to use it, in general. */
6538 if (GET_CODE (x) == CONST_DOUBLE)
6539 return 0;
6541 if (GET_CODE (x) == UNSPEC
6542 && (XINT (x, 1) == UNSPEC_PIC
6543 || XINT (x, 1) == UNSPEC_GOT
6544 || XINT (x, 1) == UNSPEC_GOTOFF
6545 || XINT (x, 1) == UNSPEC_GOTPLT
6546 || XINT (x, 1) == UNSPEC_PLT))
6547 return 0;
6549 fmt = GET_RTX_FORMAT (GET_CODE (x));
6550 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6552 if (fmt[i] == 'E')
6554 register int j;
6556 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
6557 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
6558 return 1;
6560 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
6561 return 1;
6564 return 0;
6567 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
6568 @GOTOFF in `reg'. */
6570 legitimize_pic_address (orig, mode, reg)
6571 rtx orig;
6572 enum machine_mode mode ATTRIBUTE_UNUSED;
6573 rtx reg;
6575 if (GET_CODE (orig) == LABEL_REF
6576 || (GET_CODE (orig) == SYMBOL_REF
6577 && (CONSTANT_POOL_ADDRESS_P (orig)
6578 /* SYMBOL_REF_FLAG is set on static symbols. */
6579 || SYMBOL_REF_FLAG (orig))))
6581 if (reg == 0)
6582 reg = gen_reg_rtx (Pmode);
6584 emit_insn (gen_symGOTOFF2reg (reg, orig));
6585 return reg;
6587 else if (GET_CODE (orig) == SYMBOL_REF)
6589 if (reg == 0)
6590 reg = gen_reg_rtx (Pmode);
6592 emit_insn (gen_symGOT2reg (reg, orig));
6593 return reg;
6595 return orig;
6598 /* Mark the use of a constant in the literal table. If the constant
6599 has multiple labels, make it unique. */
6600 static rtx mark_constant_pool_use (x)
6601 rtx x;
6603 rtx insn, lab, pattern;
6605 if (x == NULL)
6606 return x;
6608 switch (GET_CODE (x))
6610 case LABEL_REF:
6611 x = XEXP (x, 0);
6612 case CODE_LABEL:
6613 break;
6614 default:
6615 return x;
6618 /* Get the first label in the list of labels for the same constant
6619 and delete another labels in the list. */
6620 lab = x;
6621 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
6623 if (GET_CODE (insn) != CODE_LABEL
6624 || LABEL_REFS (insn) != NEXT_INSN (insn))
6625 break;
6626 lab = insn;
6629 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
6630 INSN_DELETED_P (insn) = 1;
6632 /* Mark constants in a window. */
6633 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
6635 if (GET_CODE (insn) != INSN)
6636 continue;
6638 pattern = PATTERN (insn);
6639 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
6640 continue;
6642 switch (XINT (pattern, 1))
6644 case UNSPECV_CONST2:
6645 case UNSPECV_CONST4:
6646 case UNSPECV_CONST8:
6647 XVECEXP (pattern, 0, 1) = const1_rtx;
6648 break;
6649 case UNSPECV_WINDOW_END:
6650 if (XVECEXP (pattern, 0, 0) == x)
6651 return lab;
6652 break;
6653 case UNSPECV_CONST_END:
6654 return lab;
6655 default:
6656 break;
6660 return lab;
6663 /* Return true if it's possible to redirect BRANCH1 to the destination
6664 of an unconditional jump BRANCH2. We only want to do this if the
6665 resulting branch will have a short displacement. */
6666 int
6667 sh_can_redirect_branch (branch1, branch2)
6668 rtx branch1;
6669 rtx branch2;
6671 if (flag_expensive_optimizations && simplejump_p (branch2))
6673 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
6674 rtx insn;
6675 int distance;
6677 for (distance = 0, insn = NEXT_INSN (branch1);
6678 insn && distance < 256;
6679 insn = PREV_INSN (insn))
6681 if (insn == dest)
6682 return 1;
6683 else
6684 distance += get_attr_length (insn);
6686 for (distance = 0, insn = NEXT_INSN (branch1);
6687 insn && distance < 256;
6688 insn = NEXT_INSN (insn))
6690 if (insn == dest)
6691 return 1;
6692 else
6693 distance += get_attr_length (insn);
6696 return 0;
6699 /* A C statement (sans semicolon) to update the integer variable COST
6700 based on the relationship between INSN that is dependent on
6701 DEP_INSN through the dependence LINK. The default is to make no
6702 adjustment to COST. This can be used for example to specify to
6703 the scheduler that an output- or anti-dependence does not incur
6704 the same cost as a data-dependence. */
6705 static int
6706 sh_adjust_cost (insn, link, dep_insn, cost)
6707 rtx insn;
6708 rtx link ATTRIBUTE_UNUSED;
6709 rtx dep_insn;
6710 int cost;
6712 rtx reg;
6714 if (GET_CODE(insn) == CALL_INSN)
6716 /* The only input for a call that is timing-critical is the
6717 function's address. */
6718 rtx call = PATTERN (insn);
6720 if (GET_CODE (call) == PARALLEL)
6721 call = XVECEXP (call, 0 ,0);
6722 if (GET_CODE (call) == SET)
6723 call = SET_SRC (call);
6724 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
6725 && ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn))
6726 cost = 0;
6728 /* All sfunc calls are parallels with at least four components.
6729 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
6730 else if (GET_CODE (PATTERN (insn)) == PARALLEL
6731 && XVECLEN (PATTERN (insn), 0) >= 4
6732 && (reg = sfunc_uses_reg (insn)))
6734 /* Likewise, the most timing critical input for an sfuncs call
6735 is the function address. However, sfuncs typically start
6736 using their arguments pretty quickly.
6737 Assume a four cycle delay before they are needed. */
6738 if (! reg_set_p (reg, dep_insn))
6739 cost -= TARGET_SUPERSCALAR ? 40 : 4;
6741 /* Adjust load_si / pcload_si type insns latency. Use the known
6742 nominal latency and form of the insn to speed up the check. */
6743 else if (cost == 3
6744 && GET_CODE (PATTERN (dep_insn)) == SET
6745 /* Latency for dmpy type insns is also 3, so check the that
6746 it's actually a move insn. */
6747 && general_movsrc_operand (SET_SRC (PATTERN (dep_insn)), SImode))
6748 cost = 2;
6749 else if (cost == 30
6750 && GET_CODE (PATTERN (dep_insn)) == SET
6751 && GET_MODE (SET_SRC (PATTERN (dep_insn))) == SImode)
6752 cost = 20;
6754 return cost;
6757 /* For use by ALLOCATE_INITIAL_VALUE. Note that sh.md contains some
6758 'special function' patterns (type sfunc) that clobber pr, but that
6759 do not look like function calls to leaf_function_p. Hence we must
6760 do this extra check. */
6762 sh_pr_n_sets ()
6764 return REG_N_SETS (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
6767 /* This Function Returns non zero if DFA based scheduler
6768 interface is to be used.At present supported only for
6769 SH4. */
6770 static int
6771 sh_use_dfa_interface()
6773 if (TARGET_SH4)
6774 return 1;
6775 else
6776 return 0;
6779 /* This function returns "2" that signifies dual issue
6780 for SH4 processor.To be used by DFA pipeline description. */
6781 static int
6782 sh_issue_rate()
6784 if(TARGET_SH4)
6785 return 2;
6786 else
6787 return 1;
6790 /* SHmedia requires registers for branches, so we can't generate new
6791 branches past reload. */
6792 static bool
6793 sh_cannot_modify_jumps_p ()
6795 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
6798 static bool
6799 sh_ms_bitfield_layout_p (record_type)
6800 tree record_type ATTRIBUTE_UNUSED;
6802 return TARGET_SH5;
6805 /* If using PIC, mark a SYMBOL_REF for a non-global symbol so that we
6806 may access it using GOTOFF instead of GOT. */
6808 static void
6809 sh_encode_section_info (decl, first)
6810 tree decl;
6811 int first;
6813 rtx rtl, symbol;
6815 if (DECL_P (decl))
6816 rtl = DECL_RTL (decl);
6817 else
6818 rtl = TREE_CST_RTL (decl);
6819 if (GET_CODE (rtl) != MEM)
6820 return;
6821 symbol = XEXP (rtl, 0);
6822 if (GET_CODE (symbol) != SYMBOL_REF)
6823 return;
6825 if (flag_pic)
6826 SYMBOL_REF_FLAG (symbol) = (*targetm.binds_local_p) (decl);
6828 if (TARGET_SH5 && first && TREE_CODE (decl) != FUNCTION_DECL)
6829 XEXP (rtl, 0) = gen_datalabel_ref (symbol);
6832 /* Undo the effects of the above. */
6834 static const char *
6835 sh_strip_name_encoding (str)
6836 const char *str;
6838 STRIP_DATALABEL_ENCODING (str, str);
6839 str += *str == '*';
6840 return str;
6843 #include "gt-sh.h"