* sh.h (PREDICATE_CODES): Add general_extend_operand and inqhi_operand.
[official-gcc.git] / gcc / config / sh / sh.c
blob135648f0051d312dc46fdf9a56a4b93729198701
1 /* Output routines for GCC for Hitachi / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1997, 1997, 1998, 1999, 2000, 2001, 2002
3 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GNU CC.
9 GNU CC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
12 any later version.
14 GNU CC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GNU CC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
24 #include "config.h"
25 #include "system.h"
26 #include "insn-config.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "flags.h"
30 #include "expr.h"
31 #include "optabs.h"
32 #include "function.h"
33 #include "regs.h"
34 #include "hard-reg-set.h"
35 #include "output.h"
36 #include "insn-attr.h"
37 #include "toplev.h"
38 #include "recog.h"
39 #include "c-pragma.h"
40 #include "integrate.h"
41 #include "tm_p.h"
42 #include "target.h"
43 #include "target-def.h"
44 #include "real.h"
45 #include "langhooks.h"
47 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
49 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
50 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
52 /* These are some macros to abstract register modes. */
53 #define CONST_OK_FOR_ADD(size) \
54 (TARGET_SHMEDIA ? CONST_OK_FOR_P (size) : CONST_OK_FOR_I (size))
55 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
56 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
57 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
59 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
60 int current_function_interrupt;
62 /* ??? The pragma interrupt support will not work for SH3. */
63 /* This is set by #pragma interrupt and #pragma trapa, and causes gcc to
64 output code for the next function appropriate for an interrupt handler. */
65 int pragma_interrupt;
67 /* This is set by the trap_exit attribute for functions. It specifies
68 a trap number to be used in a trapa instruction at function exit
69 (instead of an rte instruction). */
70 int trap_exit;
72 /* This is used by the sp_switch attribute for functions. It specifies
73 a variable holding the address of the stack the interrupt function
74 should switch to/from at entry/exit. */
75 rtx sp_switch;
77 /* This is set by #pragma trapa, and is similar to the above, except that
78 the compiler doesn't emit code to preserve all registers. */
79 static int pragma_trapa;
81 /* This is set by #pragma nosave_low_regs. This is useful on the SH3,
82 which has a separate set of low regs for User and Supervisor modes.
83 This should only be used for the lowest level of interrupts. Higher levels
84 of interrupts must save the registers in case they themselves are
85 interrupted. */
86 int pragma_nosave_low_regs;
88 /* This is used for communication between SETUP_INCOMING_VARARGS and
89 sh_expand_prologue. */
90 int current_function_anonymous_args;
92 /* Global variables for machine-dependent things. */
94 /* Which cpu are we scheduling for. */
95 enum processor_type sh_cpu;
97 /* Saved operands from the last compare to use when we generate an scc
98 or bcc insn. */
100 rtx sh_compare_op0;
101 rtx sh_compare_op1;
103 /* Provides the class number of the smallest class containing
104 reg number. */
106 int regno_reg_class[FIRST_PSEUDO_REGISTER] =
108 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
109 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
110 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
111 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
112 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
113 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
114 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
115 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
116 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
117 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
118 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
119 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
125 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
126 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
127 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
128 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
129 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
130 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
131 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
132 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
133 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
134 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
135 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
141 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
142 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
143 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
144 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
145 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
146 GENERAL_REGS,
149 char sh_register_names[FIRST_PSEUDO_REGISTER] \
150 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
152 char sh_additional_register_names[ADDREGNAMES_SIZE] \
153 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
154 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
156 /* Provide reg_class from a letter such as appears in the machine
157 description. */
159 const enum reg_class reg_class_from_letter[] =
161 /* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
162 /* e */ NO_REGS, /* f */ FP_REGS, /* g */ NO_REGS, /* h */ NO_REGS,
163 /* i */ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
164 /* m */ NO_REGS, /* n */ NO_REGS, /* o */ NO_REGS, /* p */ NO_REGS,
165 /* q */ NO_REGS, /* r */ NO_REGS, /* s */ NO_REGS, /* t */ T_REGS,
166 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
167 /* y */ FPUL_REGS, /* z */ R0_REGS
170 int assembler_dialect;
172 static void split_branches PARAMS ((rtx));
173 static int branch_dest PARAMS ((rtx));
174 static void force_into PARAMS ((rtx, rtx));
175 static void print_slot PARAMS ((rtx));
176 static rtx add_constant PARAMS ((rtx, enum machine_mode, rtx));
177 static void dump_table PARAMS ((rtx));
178 static int hi_const PARAMS ((rtx));
179 static int broken_move PARAMS ((rtx));
180 static int mova_p PARAMS ((rtx));
181 static rtx find_barrier PARAMS ((int, rtx, rtx));
182 static int noncall_uses_reg PARAMS ((rtx, rtx, rtx *));
183 static rtx gen_block_redirect PARAMS ((rtx, int, int));
184 static void output_stack_adjust PARAMS ((int, rtx, int, rtx (*) (rtx)));
185 static rtx frame_insn PARAMS ((rtx));
186 static rtx push PARAMS ((int));
187 static void pop PARAMS ((int));
188 static void push_regs PARAMS ((HOST_WIDE_INT *));
189 static void calc_live_regs PARAMS ((int *, HOST_WIDE_INT *));
190 static void mark_use PARAMS ((rtx, rtx *));
191 static HOST_WIDE_INT rounded_frame_size PARAMS ((int));
192 static rtx mark_constant_pool_use PARAMS ((rtx));
193 const struct attribute_spec sh_attribute_table[];
194 static tree sh_handle_interrupt_handler_attribute PARAMS ((tree *, tree, tree, int, bool *));
195 static tree sh_handle_sp_switch_attribute PARAMS ((tree *, tree, tree, int, bool *));
196 static tree sh_handle_trap_exit_attribute PARAMS ((tree *, tree, tree, int, bool *));
197 static void sh_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
198 static void sh_insert_attributes PARAMS ((tree, tree *));
199 static int sh_adjust_cost PARAMS ((rtx, rtx, rtx, int));
200 static int sh_use_dfa_interface PARAMS ((void));
201 static int sh_issue_rate PARAMS ((void));
203 static bool sh_cannot_modify_jumps_p PARAMS ((void));
204 static bool sh_ms_bitfield_layout_p PARAMS ((tree));
206 static void sh_encode_section_info PARAMS ((tree, int));
207 static const char *sh_strip_name_encoding PARAMS ((const char *));
208 static void sh_init_builtins (void);
209 static void sh_media_init_builtins (void);
210 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
213 /* Initialize the GCC target structure. */
214 #undef TARGET_ATTRIBUTE_TABLE
215 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
217 /* The next two are used for debug info when compiling with -gdwarf. */
218 #undef TARGET_ASM_UNALIGNED_HI_OP
219 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
220 #undef TARGET_ASM_UNALIGNED_SI_OP
221 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
223 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
224 #undef TARGET_ASM_UNALIGNED_DI_OP
225 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
226 #undef TARGET_ASM_ALIGNED_DI_OP
227 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
229 #undef TARGET_ASM_FUNCTION_EPILOGUE
230 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
232 #undef TARGET_INSERT_ATTRIBUTES
233 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
235 #undef TARGET_SCHED_ADJUST_COST
236 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
238 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
239 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
240 sh_use_dfa_interface
241 #undef TARGET_SCHED_ISSUE_RATE
242 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
244 #undef TARGET_CANNOT_MODIFY_JUMPS_P
245 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
247 #undef TARGET_MS_BITFIELD_LAYOUT_P
248 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
250 #undef TARGET_ENCODE_SECTION_INFO
251 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
252 #undef TARGET_STRIP_NAME_ENCODING
253 #define TARGET_STRIP_NAME_ENCODING sh_strip_name_encoding
255 #undef TARGET_INIT_BUILTINS
256 #define TARGET_INIT_BUILTINS sh_init_builtins
257 #undef TARGET_EXPAND_BUILTIN
258 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
260 struct gcc_target targetm = TARGET_INITIALIZER;
262 /* Print the operand address in x to the stream. */
264 void
265 print_operand_address (stream, x)
266 FILE *stream;
267 rtx x;
269 switch (GET_CODE (x))
271 case REG:
272 case SUBREG:
273 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
274 break;
276 case PLUS:
278 rtx base = XEXP (x, 0);
279 rtx index = XEXP (x, 1);
281 switch (GET_CODE (index))
283 case CONST_INT:
284 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
285 reg_names[true_regnum (base)]);
286 break;
288 case REG:
289 case SUBREG:
291 int base_num = true_regnum (base);
292 int index_num = true_regnum (index);
294 fprintf (stream, "@(r0,%s)",
295 reg_names[MAX (base_num, index_num)]);
296 break;
299 default:
300 debug_rtx (x);
301 abort ();
304 break;
306 case PRE_DEC:
307 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
308 break;
310 case POST_INC:
311 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
312 break;
314 default:
315 x = mark_constant_pool_use (x);
316 output_addr_const (stream, x);
317 break;
321 /* Print operand x (an rtx) in assembler syntax to file stream
322 according to modifier code.
324 '.' print a .s if insn needs delay slot
325 ',' print LOCAL_LABEL_PREFIX
326 '@' print trap, rte or rts depending upon pragma interruptness
327 '#' output a nop if there is nothing to put in the delay slot
328 ''' print likelyhood suffix (/u for unlikely).
329 'O' print a constant without the #
330 'R' print the LSW of a dp value - changes if in little endian
331 'S' print the MSW of a dp value - changes if in little endian
332 'T' print the next word of a dp value - same as 'R' in big endian mode.
333 'M' print an `x' if `m' will print `base,index'.
334 'N' print 'r63' if the operand is (const_int 0).
335 'm' print a pair `base,offset' or `base,index', for LD and ST.
336 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
337 'o' output an operator. */
339 void
340 print_operand (stream, x, code)
341 FILE *stream;
342 rtx x;
343 int code;
345 switch (code)
347 case '.':
348 if (final_sequence
349 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
350 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
351 break;
352 case ',':
353 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
354 break;
355 case '@':
356 if (trap_exit)
357 fprintf (stream, "trapa #%d", trap_exit);
358 else if (sh_cfun_interrupt_handler_p ())
359 fprintf (stream, "rte");
360 else
361 fprintf (stream, "rts");
362 break;
363 case '#':
364 /* Output a nop if there's nothing in the delay slot. */
365 if (dbr_sequence_length () == 0)
366 fprintf (stream, "\n\tnop");
367 break;
368 case '\'':
370 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
372 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
373 fputs ("/u", stream);
374 break;
376 case 'O':
377 x = mark_constant_pool_use (x);
378 output_addr_const (stream, x);
379 break;
380 case 'R':
381 fputs (reg_names[REGNO (x) + LSW], (stream));
382 break;
383 case 'S':
384 fputs (reg_names[REGNO (x) + MSW], (stream));
385 break;
386 case 'T':
387 /* Next word of a double. */
388 switch (GET_CODE (x))
390 case REG:
391 fputs (reg_names[REGNO (x) + 1], (stream));
392 break;
393 case MEM:
394 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
395 && GET_CODE (XEXP (x, 0)) != POST_INC)
396 x = adjust_address (x, SImode, 4);
397 print_operand_address (stream, XEXP (x, 0));
398 break;
399 default:
400 break;
402 break;
403 case 'o':
404 switch (GET_CODE (x))
406 case PLUS: fputs ("add", stream); break;
407 case MINUS: fputs ("sub", stream); break;
408 case MULT: fputs ("mul", stream); break;
409 case DIV: fputs ("div", stream); break;
410 case EQ: fputs ("eq", stream); break;
411 case NE: fputs ("ne", stream); break;
412 case GT: case LT: fputs ("gt", stream); break;
413 case GE: case LE: fputs ("ge", stream); break;
414 case GTU: case LTU: fputs ("gtu", stream); break;
415 case GEU: case LEU: fputs ("geu", stream); break;
416 default:
417 break;
419 break;
420 case 'M':
421 if (GET_CODE (x) == MEM
422 && GET_CODE (XEXP (x, 0)) == PLUS
423 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
424 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
425 fputc ('x', stream);
426 break;
428 case 'm':
429 if (GET_CODE (x) != MEM)
430 abort ();
431 x = XEXP (x, 0);
432 switch (GET_CODE (x))
434 case REG:
435 case SUBREG:
436 print_operand (stream, x, 0);
437 fputs (", 0", stream);
438 break;
440 case PLUS:
441 print_operand (stream, XEXP (x, 0), 0);
442 fputs (", ", stream);
443 print_operand (stream, XEXP (x, 1), 0);
444 break;
446 default:
447 abort ();
449 break;
451 case 'N':
452 if (x == const0_rtx
453 || (GET_CODE (x) == CONST_VECTOR && zero_vec_operand (x, VOIDmode)))
455 fprintf ((stream), "r63");
456 break;
458 goto default_output;
459 case 'u':
460 if (GET_CODE (x) == CONST_INT)
462 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
463 break;
465 /* Fall through. */
467 default_output:
468 default:
469 switch (GET_CODE (x))
471 /* FIXME: We need this on SHmedia32 because reload generates
472 some sign-extended HI or QI loads into DImode registers
473 but, because Pmode is SImode, the address ends up with a
474 subreg:SI of the DImode register. Maybe reload should be
475 fixed so as to apply alter_subreg to such loads? */
476 case SUBREG:
477 if (SUBREG_BYTE (x) != 0
478 || GET_CODE (SUBREG_REG (x)) != REG)
479 abort ();
481 x = SUBREG_REG (x);
482 /* Fall through. */
484 case REG:
485 if (FP_REGISTER_P (REGNO (x))
486 && GET_MODE (x) == V16SFmode)
487 fprintf ((stream), "mtrx%s", reg_names[REGNO (x)] + 2);
488 else if (FP_REGISTER_P (REGNO (x))
489 && GET_MODE (x) == V4SFmode)
490 fprintf ((stream), "fv%s", reg_names[REGNO (x)] + 2);
491 else if (GET_CODE (x) == REG
492 && GET_MODE (x) == V2SFmode)
493 fprintf ((stream), "fp%s", reg_names[REGNO (x)] + 2);
494 else if (FP_REGISTER_P (REGNO (x))
495 && GET_MODE_SIZE (GET_MODE (x)) > 4)
496 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
497 else
498 fputs (reg_names[REGNO (x)], (stream));
499 break;
501 case MEM:
502 output_address (XEXP (x, 0));
503 break;
505 case CONST:
506 if (TARGET_SHMEDIA
507 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
508 && GET_MODE (XEXP (x, 0)) == DImode
509 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
510 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
512 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
514 fputc ('(', stream);
515 if (GET_CODE (val) == ASHIFTRT)
517 fputc ('(', stream);
518 if (GET_CODE (XEXP (val, 0)) == CONST)
519 fputc ('(', stream);
520 output_addr_const (stream, XEXP (val, 0));
521 if (GET_CODE (XEXP (val, 0)) == CONST)
522 fputc (')', stream);
523 fputs (" >> ", stream);
524 output_addr_const (stream, XEXP (val, 1));
525 fputc (')', stream);
527 else
529 if (GET_CODE (val) == CONST)
530 fputc ('(', stream);
531 output_addr_const (stream, val);
532 if (GET_CODE (val) == CONST)
533 fputc (')', stream);
535 fputs (" & 65535)", stream);
536 break;
539 /* Fall through. */
540 default:
541 if (TARGET_SH1)
542 fputc ('#', stream);
543 output_addr_const (stream, x);
544 break;
546 break;
550 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
551 static void
552 force_into (value, target)
553 rtx value, target;
555 value = force_operand (value, target);
556 if (! rtx_equal_p (value, target))
557 emit_insn (gen_move_insn (target, value));
560 /* Emit code to perform a block move. Choose the best method.
562 OPERANDS[0] is the destination.
563 OPERANDS[1] is the source.
564 OPERANDS[2] is the size.
565 OPERANDS[3] is the alignment safe to use. */
568 expand_block_move (operands)
569 rtx *operands;
571 int align = INTVAL (operands[3]);
572 int constp = (GET_CODE (operands[2]) == CONST_INT);
573 int bytes = (constp ? INTVAL (operands[2]) : 0);
575 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
576 alignment, or if it isn't a multiple of 4 bytes, then fail. */
577 if (! constp || align < 4 || (bytes % 4 != 0))
578 return 0;
580 if (TARGET_HARD_SH4)
582 if (bytes < 12)
583 return 0;
584 else if (bytes == 12)
586 tree entry_name;
587 rtx sym;
588 rtx func_addr_rtx;
589 rtx r4 = gen_rtx (REG, SImode, 4);
590 rtx r5 = gen_rtx (REG, SImode, 5);
592 entry_name = get_identifier ("__movstrSI12_i4");
594 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (entry_name));
595 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
596 force_into (XEXP (operands[0], 0), r4);
597 force_into (XEXP (operands[1], 0), r5);
598 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
599 return 1;
601 else if (! TARGET_SMALLCODE)
603 tree entry_name;
604 rtx sym;
605 rtx func_addr_rtx;
606 int dwords;
607 rtx r4 = gen_rtx (REG, SImode, 4);
608 rtx r5 = gen_rtx (REG, SImode, 5);
609 rtx r6 = gen_rtx (REG, SImode, 6);
611 entry_name = get_identifier (bytes & 4
612 ? "__movstr_i4_odd"
613 : "__movstr_i4_even");
614 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (entry_name));
615 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
616 force_into (XEXP (operands[0], 0), r4);
617 force_into (XEXP (operands[1], 0), r5);
619 dwords = bytes >> 3;
620 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
621 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
622 return 1;
624 else
625 return 0;
627 if (bytes < 64)
629 char entry[30];
630 tree entry_name;
631 rtx sym;
632 rtx func_addr_rtx;
633 rtx r4 = gen_rtx_REG (SImode, 4);
634 rtx r5 = gen_rtx_REG (SImode, 5);
636 sprintf (entry, "__movstrSI%d", bytes);
637 entry_name = get_identifier (entry);
638 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (entry_name));
639 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
640 force_into (XEXP (operands[0], 0), r4);
641 force_into (XEXP (operands[1], 0), r5);
642 emit_insn (gen_block_move_real (func_addr_rtx));
643 return 1;
646 /* This is the same number of bytes as a memcpy call, but to a different
647 less common function name, so this will occasionally use more space. */
648 if (! TARGET_SMALLCODE)
650 tree entry_name;
651 rtx sym;
652 rtx func_addr_rtx;
653 int final_switch, while_loop;
654 rtx r4 = gen_rtx_REG (SImode, 4);
655 rtx r5 = gen_rtx_REG (SImode, 5);
656 rtx r6 = gen_rtx_REG (SImode, 6);
658 entry_name = get_identifier ("__movstr");
659 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (entry_name));
660 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
661 force_into (XEXP (operands[0], 0), r4);
662 force_into (XEXP (operands[1], 0), r5);
664 /* r6 controls the size of the move. 16 is decremented from it
665 for each 64 bytes moved. Then the negative bit left over is used
666 as an index into a list of move instructions. e.g., a 72 byte move
667 would be set up with size(r6) = 14, for one iteration through the
668 big while loop, and a switch of -2 for the last part. */
670 final_switch = 16 - ((bytes / 4) % 16);
671 while_loop = ((bytes / 4) / 16 - 1) * 16;
672 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
673 emit_insn (gen_block_lump_real (func_addr_rtx));
674 return 1;
677 return 0;
680 /* Prepare operands for a move define_expand; specifically, one of the
681 operands must be in a register. */
684 prepare_move_operands (operands, mode)
685 rtx operands[];
686 enum machine_mode mode;
688 if ((mode == SImode || mode == DImode) && flag_pic)
690 rtx temp;
691 if (SYMBOLIC_CONST_P (operands[1]))
693 if (GET_CODE (operands[0]) == MEM)
694 operands[1] = force_reg (Pmode, operands[1]);
695 else if (GET_CODE (operands[1]) == LABEL_REF
696 && target_reg_operand (operands[0], mode))
697 /* It's ok. */;
698 else
700 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
701 operands[1] = legitimize_pic_address (operands[1], mode, temp);
704 else if (GET_CODE (operands[1]) == CONST
705 && GET_CODE (XEXP (operands[1], 0)) == PLUS
706 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
708 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
709 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
710 mode, temp);
711 operands[1] = expand_binop (mode, add_optab, temp,
712 XEXP (XEXP (operands[1], 0), 1),
713 no_new_pseudos ? temp
714 : gen_reg_rtx (Pmode),
715 0, OPTAB_LIB_WIDEN);
719 if (! reload_in_progress && ! reload_completed)
721 /* Copy the source to a register if both operands aren't registers. */
722 if (! register_operand (operands[0], mode)
723 && ! register_operand (operands[1], mode))
724 operands[1] = copy_to_mode_reg (mode, operands[1]);
726 /* This case can happen while generating code to move the result
727 of a library call to the target. Reject `st r0,@(rX,rY)' because
728 reload will fail to find a spill register for rX, since r0 is already
729 being used for the source. */
730 else if (GET_CODE (operands[1]) == REG && REGNO (operands[1]) == 0
731 && GET_CODE (operands[0]) == MEM
732 && GET_CODE (XEXP (operands[0], 0)) == PLUS
733 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
734 operands[1] = copy_to_mode_reg (mode, operands[1]);
737 return 0;
740 /* Prepare the operands for an scc instruction; make sure that the
741 compare has been done. */
743 prepare_scc_operands (code)
744 enum rtx_code code;
746 rtx t_reg = gen_rtx_REG (SImode, T_REG);
747 enum rtx_code oldcode = code;
748 enum machine_mode mode;
750 /* First need a compare insn. */
751 switch (code)
753 case NE:
754 /* It isn't possible to handle this case. */
755 abort ();
756 case LT:
757 code = GT;
758 break;
759 case LE:
760 code = GE;
761 break;
762 case LTU:
763 code = GTU;
764 break;
765 case LEU:
766 code = GEU;
767 break;
768 default:
769 break;
771 if (code != oldcode)
773 rtx tmp = sh_compare_op0;
774 sh_compare_op0 = sh_compare_op1;
775 sh_compare_op1 = tmp;
778 mode = GET_MODE (sh_compare_op0);
779 if (mode == VOIDmode)
780 mode = GET_MODE (sh_compare_op1);
782 sh_compare_op0 = force_reg (mode, sh_compare_op0);
783 if ((code != EQ && code != NE
784 && (sh_compare_op1 != const0_rtx
785 || code == GTU || code == GEU || code == LTU || code == LEU))
786 || (mode == DImode && sh_compare_op1 != const0_rtx)
787 || (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT))
788 sh_compare_op1 = force_reg (mode, sh_compare_op1);
790 if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
791 (mode == SFmode ? emit_sf_insn : emit_df_insn)
792 (gen_rtx (PARALLEL, VOIDmode, gen_rtvec (2,
793 gen_rtx (SET, VOIDmode, t_reg,
794 gen_rtx (code, SImode,
795 sh_compare_op0, sh_compare_op1)),
796 gen_rtx (USE, VOIDmode, get_fpscr_rtx ()))));
797 else
798 emit_insn (gen_rtx (SET, VOIDmode, t_reg,
799 gen_rtx (code, SImode, sh_compare_op0,
800 sh_compare_op1)));
802 return t_reg;
805 /* Called from the md file, set up the operands of a compare instruction. */
807 void
808 from_compare (operands, code)
809 rtx *operands;
810 int code;
812 enum machine_mode mode = GET_MODE (sh_compare_op0);
813 rtx insn;
814 if (mode == VOIDmode)
815 mode = GET_MODE (sh_compare_op1);
816 if (code != EQ
817 || mode == DImode
818 || (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT))
820 /* Force args into regs, since we can't use constants here. */
821 sh_compare_op0 = force_reg (mode, sh_compare_op0);
822 if (sh_compare_op1 != const0_rtx
823 || code == GTU || code == GEU
824 || (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT))
825 sh_compare_op1 = force_reg (mode, sh_compare_op1);
827 if (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
829 from_compare (operands, GT);
830 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
832 else
833 insn = gen_rtx_SET (VOIDmode,
834 gen_rtx_REG (SImode, T_REG),
835 gen_rtx (code, SImode, sh_compare_op0,
836 sh_compare_op1));
837 if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
839 insn = gen_rtx (PARALLEL, VOIDmode,
840 gen_rtvec (2, insn,
841 gen_rtx (USE, VOIDmode, get_fpscr_rtx ())));
842 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
844 else
845 emit_insn (insn);
848 /* Functions to output assembly code. */
850 /* Return a sequence of instructions to perform DI or DF move.
852 Since the SH cannot move a DI or DF in one instruction, we have
853 to take care when we see overlapping source and dest registers. */
855 const char *
856 output_movedouble (insn, operands, mode)
857 rtx insn ATTRIBUTE_UNUSED;
858 rtx operands[];
859 enum machine_mode mode;
861 rtx dst = operands[0];
862 rtx src = operands[1];
864 if (GET_CODE (dst) == MEM
865 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
866 return "mov.l %T1,%0\n\tmov.l %1,%0";
868 if (register_operand (dst, mode)
869 && register_operand (src, mode))
871 if (REGNO (src) == MACH_REG)
872 return "sts mach,%S0\n\tsts macl,%R0";
874 /* When mov.d r1,r2 do r2->r3 then r1->r2;
875 when mov.d r1,r0 do r1->r0 then r2->r1. */
877 if (REGNO (src) + 1 == REGNO (dst))
878 return "mov %T1,%T0\n\tmov %1,%0";
879 else
880 return "mov %1,%0\n\tmov %T1,%T0";
882 else if (GET_CODE (src) == CONST_INT)
884 if (INTVAL (src) < 0)
885 output_asm_insn ("mov #-1,%S0", operands);
886 else
887 output_asm_insn ("mov #0,%S0", operands);
889 return "mov %1,%R0";
891 else if (GET_CODE (src) == MEM)
893 int ptrreg = -1;
894 int dreg = REGNO (dst);
895 rtx inside = XEXP (src, 0);
897 if (GET_CODE (inside) == REG)
898 ptrreg = REGNO (inside);
899 else if (GET_CODE (inside) == SUBREG)
900 ptrreg = subreg_regno (inside);
901 else if (GET_CODE (inside) == PLUS)
903 ptrreg = REGNO (XEXP (inside, 0));
904 /* ??? A r0+REG address shouldn't be possible here, because it isn't
905 an offsettable address. Unfortunately, offsettable addresses use
906 QImode to check the offset, and a QImode offsettable address
907 requires r0 for the other operand, which is not currently
908 supported, so we can't use the 'o' constraint.
909 Thus we must check for and handle r0+REG addresses here.
910 We punt for now, since this is likely very rare. */
911 if (GET_CODE (XEXP (inside, 1)) == REG)
912 abort ();
914 else if (GET_CODE (inside) == LABEL_REF)
915 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
916 else if (GET_CODE (inside) == POST_INC)
917 return "mov.l %1,%0\n\tmov.l %1,%T0";
918 else
919 abort ();
921 /* Work out the safe way to copy. Copy into the second half first. */
922 if (dreg == ptrreg)
923 return "mov.l %T1,%T0\n\tmov.l %1,%0";
926 return "mov.l %1,%0\n\tmov.l %T1,%T0";
929 /* Print an instruction which would have gone into a delay slot after
930 another instruction, but couldn't because the other instruction expanded
931 into a sequence where putting the slot insn at the end wouldn't work. */
933 static void
934 print_slot (insn)
935 rtx insn;
937 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 0, 1);
939 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
942 const char *
943 output_far_jump (insn, op)
944 rtx insn;
945 rtx op;
947 struct { rtx lab, reg, op; } this;
948 rtx braf_base_lab = NULL_RTX;
949 const char *jump;
950 int far;
951 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
953 this.lab = gen_label_rtx ();
955 if (TARGET_SH2
956 && offset >= -32764
957 && offset - get_attr_length (insn) <= 32766)
959 far = 0;
960 jump = "mov.w %O0,%1; braf %1";
962 else
964 far = 1;
965 if (flag_pic)
967 if (TARGET_SH2)
968 jump = "mov.l %O0,%1; braf %1";
969 else
970 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
972 else
973 jump = "mov.l %O0,%1; jmp @%1";
975 /* If we have a scratch register available, use it. */
976 if (GET_CODE (PREV_INSN (insn)) == INSN
977 && INSN_CODE (PREV_INSN (insn)) == CODE_FOR_indirect_jump_scratch)
979 this.reg = SET_DEST (PATTERN (PREV_INSN (insn)));
980 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
981 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
982 output_asm_insn (jump, &this.lab);
983 if (dbr_sequence_length ())
984 print_slot (final_sequence);
985 else
986 output_asm_insn ("nop", 0);
988 else
990 /* Output the delay slot insn first if any. */
991 if (dbr_sequence_length ())
992 print_slot (final_sequence);
994 this.reg = gen_rtx_REG (SImode, 13);
995 /* We must keep the stack aligned to 8-byte boundaries on SH5.
996 Fortunately, MACL is fixed and call-clobbered, and we never
997 need its value across jumps, so save r13 in it instead of in
998 the stack. */
999 if (TARGET_SH5)
1000 output_asm_insn ("lds r13, macl", 0);
1001 else
1002 output_asm_insn ("mov.l r13,@-r15", 0);
1003 output_asm_insn (jump, &this.lab);
1004 if (TARGET_SH5)
1005 output_asm_insn ("sts macl, r13", 0);
1006 else
1007 output_asm_insn ("mov.l @r15+,r13", 0);
1009 if (far && flag_pic && TARGET_SH2)
1011 braf_base_lab = gen_label_rtx ();
1012 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
1013 CODE_LABEL_NUMBER (braf_base_lab));
1015 if (far)
1016 output_asm_insn (".align 2", 0);
1017 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1018 this.op = op;
1019 if (far && flag_pic)
1021 if (TARGET_SH2)
1022 this.lab = braf_base_lab;
1023 output_asm_insn (".long %O2-%O0", &this.lab);
1025 else
1026 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1027 return "";
1030 /* Local label counter, used for constants in the pool and inside
1031 pattern branches. */
1033 static int lf = 100;
1035 /* Output code for ordinary branches. */
1037 const char *
1038 output_branch (logic, insn, operands)
1039 int logic;
1040 rtx insn;
1041 rtx *operands;
1043 switch (get_attr_length (insn))
1045 case 6:
1046 /* This can happen if filling the delay slot has caused a forward
1047 branch to exceed its range (we could reverse it, but only
1048 when we know we won't overextend other branches; this should
1049 best be handled by relaxation).
1050 It can also happen when other condbranches hoist delay slot insn
1051 from their destination, thus leading to code size increase.
1052 But the branch will still be in the range -4092..+4098 bytes. */
1054 if (! TARGET_RELAX)
1056 int label = lf++;
1057 /* The call to print_slot will clobber the operands. */
1058 rtx op0 = operands[0];
1060 /* If the instruction in the delay slot is annulled (true), then
1061 there is no delay slot where we can put it now. The only safe
1062 place for it is after the label. final will do that by default. */
1064 if (final_sequence
1065 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
1067 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1068 ASSEMBLER_DIALECT ? "/" : ".", label);
1069 print_slot (final_sequence);
1071 else
1072 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1074 output_asm_insn ("bra\t%l0", &op0);
1075 fprintf (asm_out_file, "\tnop\n");
1076 ASM_OUTPUT_INTERNAL_LABEL(asm_out_file, "LF", label);
1078 return "";
1080 /* When relaxing, handle this like a short branch. The linker
1081 will fix it up if it still doesn't fit after relaxation. */
1082 case 2:
1083 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1084 default:
1085 /* There should be no longer branches now - that would
1086 indicate that something has destroyed the branches set
1087 up in machine_dependent_reorg. */
1088 abort ();
1092 const char *
1093 output_branchy_insn (code, template, insn, operands)
1094 enum rtx_code code;
1095 const char *template;
1096 rtx insn;
1097 rtx *operands;
1099 rtx next_insn = NEXT_INSN (insn);
1101 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
1103 rtx src = SET_SRC (PATTERN (next_insn));
1104 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
1106 /* Following branch not taken */
1107 operands[9] = gen_label_rtx ();
1108 emit_label_after (operands[9], next_insn);
1109 INSN_ADDRESSES_NEW (operands[9],
1110 INSN_ADDRESSES (INSN_UID (next_insn))
1111 + get_attr_length (next_insn));
1112 return template;
1114 else
1116 int offset = (branch_dest (next_insn)
1117 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
1118 if (offset >= -252 && offset <= 258)
1120 if (GET_CODE (src) == IF_THEN_ELSE)
1121 /* branch_true */
1122 src = XEXP (src, 1);
1123 operands[9] = src;
1124 return template;
1128 operands[9] = gen_label_rtx ();
1129 emit_label_after (operands[9], insn);
1130 INSN_ADDRESSES_NEW (operands[9],
1131 INSN_ADDRESSES (INSN_UID (insn))
1132 + get_attr_length (insn));
1133 return template;
1136 const char *
1137 output_ieee_ccmpeq (insn, operands)
1138 rtx insn, *operands;
1140 return output_branchy_insn (NE, "bt\t%l9\\;fcmp/eq\t%1,%0", insn, operands);
1143 /* Output to FILE the start of the assembler file. */
1145 void
1146 output_file_start (file)
1147 FILE *file;
1149 output_file_directive (file, main_input_filename);
1151 /* Switch to the data section so that the coffsem symbol
1152 isn't in the text section. */
1153 data_section ();
1155 if (TARGET_LITTLE_ENDIAN)
1156 fprintf (file, "\t.little\n");
1158 if (TARGET_SHCOMPACT)
1159 fprintf (file, "\t.mode\tSHcompact\n");
1160 else if (TARGET_SHMEDIA)
1161 fprintf (file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
1162 TARGET_SHMEDIA64 ? 64 : 32);
1165 /* Actual number of instructions used to make a shift by N. */
1166 static const char ashiftrt_insns[] =
1167 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
1169 /* Left shift and logical right shift are the same. */
1170 static const char shift_insns[] =
1171 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1173 /* Individual shift amounts needed to get the above length sequences.
1174 One bit right shifts clobber the T bit, so when possible, put one bit
1175 shifts in the middle of the sequence, so the ends are eligible for
1176 branch delay slots. */
1177 static const short shift_amounts[32][5] = {
1178 {0}, {1}, {2}, {2, 1},
1179 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
1180 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1181 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
1182 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1183 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1184 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1185 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1187 /* Likewise, but for shift amounts < 16, up to three highmost bits
1188 might be clobbered. This is typically used when combined with some
1189 kind of sign or zero extension. */
1191 static const char ext_shift_insns[] =
1192 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1194 static const short ext_shift_amounts[32][4] = {
1195 {0}, {1}, {2}, {2, 1},
1196 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
1197 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1198 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
1199 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1200 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1201 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1202 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1204 /* Assuming we have a value that has been sign-extended by at least one bit,
1205 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
1206 to shift it by N without data loss, and quicker than by other means? */
1207 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
1209 /* This is used in length attributes in sh.md to help compute the length
1210 of arbitrary constant shift instructions. */
1213 shift_insns_rtx (insn)
1214 rtx insn;
1216 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1217 int shift_count = INTVAL (XEXP (set_src, 1));
1218 enum rtx_code shift_code = GET_CODE (set_src);
1220 switch (shift_code)
1222 case ASHIFTRT:
1223 return ashiftrt_insns[shift_count];
1224 case LSHIFTRT:
1225 case ASHIFT:
1226 return shift_insns[shift_count];
1227 default:
1228 abort();
1232 /* Return the cost of a shift. */
1235 shiftcosts (x)
1236 rtx x;
1238 int value;
1240 if (TARGET_SHMEDIA)
1241 return 1;
1243 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
1245 if (GET_MODE (x) == DImode
1246 && GET_CODE (XEXP (x, 1)) == CONST_INT
1247 && INTVAL (XEXP (x, 1)) == 1)
1248 return 2;
1250 /* Everything else is invalid, because there is no pattern for it. */
1251 return 10000;
1253 /* If shift by a non constant, then this will be expensive. */
1254 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1255 return SH_DYNAMIC_SHIFT_COST;
1257 value = INTVAL (XEXP (x, 1));
1259 /* Otherwise, return the true cost in instructions. */
1260 if (GET_CODE (x) == ASHIFTRT)
1262 int cost = ashiftrt_insns[value];
1263 /* If SH3, then we put the constant in a reg and use shad. */
1264 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
1265 cost = 1 + SH_DYNAMIC_SHIFT_COST;
1266 return cost;
1268 else
1269 return shift_insns[value];
1272 /* Return the cost of an AND operation. */
1275 andcosts (x)
1276 rtx x;
1278 int i;
1280 /* Anding with a register is a single cycle and instruction. */
1281 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1282 return 1;
1284 i = INTVAL (XEXP (x, 1));
1286 if (TARGET_SHMEDIA)
1288 if ((GET_CODE (XEXP (x, 1)) == CONST_INT
1289 && CONST_OK_FOR_J (INTVAL (XEXP (x, 1))))
1290 || EXTRA_CONSTRAINT_S (XEXP (x, 1)))
1291 return 1;
1292 else
1293 return 2;
1296 /* These constants are single cycle extu.[bw] instructions. */
1297 if (i == 0xff || i == 0xffff)
1298 return 1;
1299 /* Constants that can be used in an and immediate instruction is a single
1300 cycle, but this requires r0, so make it a little more expensive. */
1301 if (CONST_OK_FOR_L (i))
1302 return 2;
1303 /* Constants that can be loaded with a mov immediate and an and.
1304 This case is probably unnecessary. */
1305 if (CONST_OK_FOR_I (i))
1306 return 2;
1307 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1308 This case is probably unnecessary. */
1309 return 3;
1312 /* Return the cost of an addition or a subtraction. */
1315 addsubcosts (x)
1316 rtx x;
1318 /* Adding a register is a single cycle insn. */
1319 if (GET_CODE (XEXP (x, 1)) == REG
1320 || GET_CODE (XEXP (x, 1)) == SUBREG)
1321 return 1;
1323 /* Likewise for small constants. */
1324 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1325 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
1326 return 1;
1328 if (TARGET_SHMEDIA)
1329 switch (GET_CODE (XEXP (x, 1)))
1331 case CONST:
1332 case LABEL_REF:
1333 case SYMBOL_REF:
1334 return TARGET_SHMEDIA64 ? 5 : 3;
1336 case CONST_INT:
1337 if (CONST_OK_FOR_J (INTVAL (XEXP (x, 1))))
1338 return 2;
1339 else if (CONST_OK_FOR_J (INTVAL (XEXP (x, 1)) >> 16))
1340 return 3;
1341 else if (CONST_OK_FOR_J ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
1342 return 4;
1344 /* Fall through. */
1345 default:
1346 return 5;
1349 /* Any other constant requires a 2 cycle pc-relative load plus an
1350 addition. */
1351 return 3;
1354 /* Return the cost of a multiply. */
1356 multcosts (x)
1357 rtx x ATTRIBUTE_UNUSED;
1359 if (TARGET_SHMEDIA)
1360 return 3;
1362 if (TARGET_SH2)
1364 /* We have a mul insn, so we can never take more than the mul and the
1365 read of the mac reg, but count more because of the latency and extra
1366 reg usage. */
1367 if (TARGET_SMALLCODE)
1368 return 2;
1369 return 3;
1372 /* If we're aiming at small code, then just count the number of
1373 insns in a multiply call sequence. */
1374 if (TARGET_SMALLCODE)
1375 return 5;
1377 /* Otherwise count all the insns in the routine we'd be calling too. */
1378 return 20;
1381 /* Code to expand a shift. */
1383 void
1384 gen_ashift (type, n, reg)
1385 int type;
1386 int n;
1387 rtx reg;
1389 /* Negative values here come from the shift_amounts array. */
1390 if (n < 0)
1392 if (type == ASHIFT)
1393 type = LSHIFTRT;
1394 else
1395 type = ASHIFT;
1396 n = -n;
1399 switch (type)
1401 case ASHIFTRT:
1402 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
1403 break;
1404 case LSHIFTRT:
1405 if (n == 1)
1406 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
1407 else
1408 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
1409 break;
1410 case ASHIFT:
1411 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
1412 break;
1416 /* Same for HImode */
1418 void
1419 gen_ashift_hi (type, n, reg)
1420 int type;
1421 int n;
1422 rtx reg;
1424 /* Negative values here come from the shift_amounts array. */
1425 if (n < 0)
1427 if (type == ASHIFT)
1428 type = LSHIFTRT;
1429 else
1430 type = ASHIFT;
1431 n = -n;
1434 switch (type)
1436 case ASHIFTRT:
1437 case LSHIFTRT:
1438 /* We don't have HImode right shift operations because using the
1439 ordinary 32 bit shift instructions for that doesn't generate proper
1440 zero/sign extension.
1441 gen_ashift_hi is only called in contexts where we know that the
1442 sign extension works out correctly. */
1444 int offset = 0;
1445 if (GET_CODE (reg) == SUBREG)
1447 offset = SUBREG_BYTE (reg);
1448 reg = SUBREG_REG (reg);
1450 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
1451 break;
1453 case ASHIFT:
1454 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
1455 break;
1459 /* Output RTL to split a constant shift into its component SH constant
1460 shift instructions. */
1462 void
1463 gen_shifty_op (code, operands)
1464 int code;
1465 rtx *operands;
1467 int value = INTVAL (operands[2]);
1468 int max, i;
1470 /* Truncate the shift count in case it is out of bounds. */
1471 value = value & 0x1f;
1473 if (value == 31)
1475 if (code == LSHIFTRT)
1477 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
1478 emit_insn (gen_movt (operands[0]));
1479 return;
1481 else if (code == ASHIFT)
1483 /* There is a two instruction sequence for 31 bit left shifts,
1484 but it requires r0. */
1485 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
1487 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
1488 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
1489 return;
1493 else if (value == 0)
1495 /* This can happen when not optimizing. We must output something here
1496 to prevent the compiler from aborting in final.c after the try_split
1497 call. */
1498 emit_insn (gen_nop ());
1499 return;
1502 max = shift_insns[value];
1503 for (i = 0; i < max; i++)
1504 gen_ashift (code, shift_amounts[value][i], operands[0]);
1507 /* Same as above, but optimized for values where the topmost bits don't
1508 matter. */
1510 void
1511 gen_shifty_hi_op (code, operands)
1512 int code;
1513 rtx *operands;
1515 int value = INTVAL (operands[2]);
1516 int max, i;
1517 void (*gen_fun) PARAMS ((int, int, rtx));
1519 /* This operation is used by and_shl for SImode values with a few
1520 high bits known to be cleared. */
1521 value &= 31;
1522 if (value == 0)
1524 emit_insn (gen_nop ());
1525 return;
1528 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
1529 if (code == ASHIFT)
1531 max = ext_shift_insns[value];
1532 for (i = 0; i < max; i++)
1533 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
1535 else
1536 /* When shifting right, emit the shifts in reverse order, so that
1537 solitary negative values come first. */
1538 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
1539 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
1542 /* Output RTL for an arithmetic right shift. */
1544 /* ??? Rewrite to use super-optimizer sequences. */
1547 expand_ashiftrt (operands)
1548 rtx *operands;
1550 rtx sym;
1551 rtx wrk;
1552 char func[18];
1553 tree func_name;
1554 int value;
1556 if (TARGET_SH3)
1558 if (GET_CODE (operands[2]) != CONST_INT)
1560 rtx count = copy_to_mode_reg (SImode, operands[2]);
1561 emit_insn (gen_negsi2 (count, count));
1562 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
1563 return 1;
1565 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
1566 > 1 + SH_DYNAMIC_SHIFT_COST)
1568 rtx count
1569 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
1570 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
1571 return 1;
1574 if (GET_CODE (operands[2]) != CONST_INT)
1575 return 0;
1577 value = INTVAL (operands[2]) & 31;
1579 if (value == 31)
1581 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
1582 return 1;
1584 else if (value >= 16 && value <= 19)
1586 wrk = gen_reg_rtx (SImode);
1587 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
1588 value -= 16;
1589 while (value--)
1590 gen_ashift (ASHIFTRT, 1, wrk);
1591 emit_move_insn (operands[0], wrk);
1592 return 1;
1594 /* Expand a short sequence inline, longer call a magic routine. */
1595 else if (value <= 5)
1597 wrk = gen_reg_rtx (SImode);
1598 emit_move_insn (wrk, operands[1]);
1599 while (value--)
1600 gen_ashift (ASHIFTRT, 1, wrk);
1601 emit_move_insn (operands[0], wrk);
1602 return 1;
1605 wrk = gen_reg_rtx (Pmode);
1607 /* Load the value into an arg reg and call a helper. */
1608 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
1609 sprintf (func, "__ashiftrt_r4_%d", value);
1610 func_name = get_identifier (func);
1611 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (func_name));
1612 emit_move_insn (wrk, sym);
1613 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
1614 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
1615 return 1;
1619 sh_dynamicalize_shift_p (count)
1620 rtx count;
1622 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
1625 /* Try to find a good way to implement the combiner pattern
1626 [(set (match_operand:SI 0 "register_operand" "r")
1627 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
1628 (match_operand:SI 2 "const_int_operand" "n"))
1629 (match_operand:SI 3 "const_int_operand" "n"))) .
1630 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
1631 return 0 for simple right / left or left/right shift combination.
1632 return 1 for a combination of shifts with zero_extend.
1633 return 2 for a combination of shifts with an AND that needs r0.
1634 return 3 for a combination of shifts with an AND that needs an extra
1635 scratch register, when the three highmost bits of the AND mask are clear.
1636 return 4 for a combination of shifts with an AND that needs an extra
1637 scratch register, when any of the three highmost bits of the AND mask
1638 is set.
1639 If ATTRP is set, store an initial right shift width in ATTRP[0],
1640 and the instruction length in ATTRP[1] . These values are not valid
1641 when returning 0.
1642 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
1643 shift_amounts for the last shift value that is to be used before the
1644 sign extend. */
1646 shl_and_kind (left_rtx, mask_rtx, attrp)
1647 rtx left_rtx, mask_rtx;
1648 int *attrp;
1650 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
1651 int left = INTVAL (left_rtx), right;
1652 int best = 0;
1653 int cost, best_cost = 10000;
1654 int best_right = 0, best_len = 0;
1655 int i;
1656 int can_ext;
1658 if (left < 0 || left > 31)
1659 return 0;
1660 if (GET_CODE (mask_rtx) == CONST_INT)
1661 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
1662 else
1663 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
1664 /* Can this be expressed as a right shift / left shift pair ? */
1665 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
1666 right = exact_log2 (lsb);
1667 mask2 = ~(mask + lsb - 1);
1668 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
1669 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
1670 if (! mask2)
1671 best_cost = shift_insns[right] + shift_insns[right + left];
1672 /* mask has no trailing zeroes <==> ! right */
1673 else if (! right && mask2 == ~(lsb2 - 1))
1675 int late_right = exact_log2 (lsb2);
1676 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
1678 /* Try to use zero extend */
1679 if (mask2 == ~(lsb2 - 1))
1681 int width, first;
1683 for (width = 8; width <= 16; width += 8)
1685 /* Can we zero-extend right away? */
1686 if (lsb2 == (unsigned HOST_WIDE_INT)1 << width)
1688 cost
1689 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
1690 if (cost < best_cost)
1692 best = 1;
1693 best_cost = cost;
1694 best_right = right;
1695 best_len = cost;
1696 if (attrp)
1697 attrp[2] = -1;
1699 continue;
1701 /* ??? Could try to put zero extend into initial right shift,
1702 or even shift a bit left before the right shift. */
1703 /* Determine value of first part of left shift, to get to the
1704 zero extend cut-off point. */
1705 first = width - exact_log2 (lsb2) + right;
1706 if (first >= 0 && right + left - first >= 0)
1708 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
1709 + ext_shift_insns[right + left - first];
1710 if (cost < best_cost)
1712 best = 1;
1713 best_cost = cost;
1714 best_right = right;
1715 best_len = cost;
1716 if (attrp)
1717 attrp[2] = first;
1722 /* Try to use r0 AND pattern */
1723 for (i = 0; i <= 2; i++)
1725 if (i > right)
1726 break;
1727 if (! CONST_OK_FOR_L (mask >> i))
1728 continue;
1729 cost = (i != 0) + 2 + ext_shift_insns[left + i];
1730 if (cost < best_cost)
1732 best = 2;
1733 best_cost = cost;
1734 best_right = i;
1735 best_len = cost - 1;
1738 /* Try to use a scratch register to hold the AND operand. */
1739 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT)3 << 30)) == 0;
1740 for (i = 0; i <= 2; i++)
1742 if (i > right)
1743 break;
1744 cost = (i != 0) + (CONST_OK_FOR_I (mask >> i) ? 2 : 3)
1745 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
1746 if (cost < best_cost)
1748 best = 4 - can_ext;
1749 best_cost = cost;
1750 best_right = i;
1751 best_len = cost - 1 - ! CONST_OK_FOR_I (mask >> i);
1755 if (attrp)
1757 attrp[0] = best_right;
1758 attrp[1] = best_len;
1760 return best;
1763 /* This is used in length attributes of the unnamed instructions
1764 corresponding to shl_and_kind return values of 1 and 2. */
1766 shl_and_length (insn)
1767 rtx insn;
1769 rtx set_src, left_rtx, mask_rtx;
1770 int attributes[3];
1772 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1773 left_rtx = XEXP (XEXP (set_src, 0), 1);
1774 mask_rtx = XEXP (set_src, 1);
1775 shl_and_kind (left_rtx, mask_rtx, attributes);
1776 return attributes[1];
1779 /* This is used in length attribute of the and_shl_scratch instruction. */
1782 shl_and_scr_length (insn)
1783 rtx insn;
1785 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1786 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
1787 rtx op = XEXP (set_src, 0);
1788 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
1789 op = XEXP (XEXP (op, 0), 0);
1790 return len + shift_insns[INTVAL (XEXP (op, 1))];
1793 /* Generating rtl? */
1794 extern int rtx_equal_function_value_matters;
1796 /* Generate rtl for instructions for which shl_and_kind advised a particular
1797 method of generating them, i.e. returned zero. */
1800 gen_shl_and (dest, left_rtx, mask_rtx, source)
1801 rtx dest, left_rtx, mask_rtx, source;
1803 int attributes[3];
1804 unsigned HOST_WIDE_INT mask;
1805 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
1806 int right, total_shift;
1807 void (*shift_gen_fun) PARAMS ((int, rtx*)) = gen_shifty_hi_op;
1809 right = attributes[0];
1810 total_shift = INTVAL (left_rtx) + right;
1811 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
1812 switch (kind)
1814 default:
1815 return -1;
1816 case 1:
1818 int first = attributes[2];
1819 rtx operands[3];
1821 if (first < 0)
1823 emit_insn ((mask << right) <= 0xff
1824 ? gen_zero_extendqisi2(dest,
1825 gen_lowpart (QImode, source))
1826 : gen_zero_extendhisi2(dest,
1827 gen_lowpart (HImode, source)));
1828 source = dest;
1830 if (source != dest)
1831 emit_insn (gen_movsi (dest, source));
1832 operands[0] = dest;
1833 if (right)
1835 operands[2] = GEN_INT (right);
1836 gen_shifty_hi_op (LSHIFTRT, operands);
1838 if (first > 0)
1840 operands[2] = GEN_INT (first);
1841 gen_shifty_hi_op (ASHIFT, operands);
1842 total_shift -= first;
1843 mask <<= first;
1845 if (first >= 0)
1846 emit_insn (mask <= 0xff
1847 ? gen_zero_extendqisi2(dest, gen_lowpart (QImode, dest))
1848 : gen_zero_extendhisi2(dest, gen_lowpart (HImode, dest)));
1849 if (total_shift > 0)
1851 operands[2] = GEN_INT (total_shift);
1852 gen_shifty_hi_op (ASHIFT, operands);
1854 break;
1856 case 4:
1857 shift_gen_fun = gen_shifty_op;
1858 case 3:
1859 /* If the topmost bit that matters is set, set the topmost bits
1860 that don't matter. This way, we might be able to get a shorter
1861 signed constant. */
1862 if (mask & ((HOST_WIDE_INT)1 << (31 - total_shift)))
1863 mask |= (HOST_WIDE_INT)~0 << (31 - total_shift);
1864 case 2:
1865 /* Don't expand fine-grained when combining, because that will
1866 make the pattern fail. */
1867 if (rtx_equal_function_value_matters
1868 || reload_in_progress || reload_completed)
1870 rtx operands[3];
1872 /* Cases 3 and 4 should be handled by this split
1873 only while combining */
1874 if (kind > 2)
1875 abort ();
1876 if (right)
1878 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
1879 source = dest;
1881 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
1882 if (total_shift)
1884 operands[0] = dest;
1885 operands[1] = dest;
1886 operands[2] = GEN_INT (total_shift);
1887 shift_gen_fun (ASHIFT, operands);
1889 break;
1891 else
1893 int neg = 0;
1894 if (kind != 4 && total_shift < 16)
1896 neg = -ext_shift_amounts[total_shift][1];
1897 if (neg > 0)
1898 neg -= ext_shift_amounts[total_shift][2];
1899 else
1900 neg = 0;
1902 emit_insn (gen_and_shl_scratch (dest, source,
1903 GEN_INT (right),
1904 GEN_INT (mask),
1905 GEN_INT (total_shift + neg),
1906 GEN_INT (neg)));
1907 emit_insn (gen_movsi (dest, dest));
1908 break;
1911 return 0;
1914 /* Try to find a good way to implement the combiner pattern
1915 [(set (match_operand:SI 0 "register_operand" "=r")
1916 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
1917 (match_operand:SI 2 "const_int_operand" "n")
1918 (match_operand:SI 3 "const_int_operand" "n")
1919 (const_int 0)))
1920 (clobber (reg:SI T_REG))]
1921 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
1922 return 0 for simple left / right shift combination.
1923 return 1 for left shift / 8 bit sign extend / left shift.
1924 return 2 for left shift / 16 bit sign extend / left shift.
1925 return 3 for left shift / 8 bit sign extend / shift / sign extend.
1926 return 4 for left shift / 16 bit sign extend / shift / sign extend.
1927 return 5 for left shift / 16 bit sign extend / right shift
1928 return 6 for < 8 bit sign extend / left shift.
1929 return 7 for < 8 bit sign extend / left shift / single right shift.
1930 If COSTP is nonzero, assign the calculated cost to *COSTP. */
1933 shl_sext_kind (left_rtx, size_rtx, costp)
1934 rtx left_rtx, size_rtx;
1935 int *costp;
1937 int left, size, insize, ext;
1938 int cost, best_cost;
1939 int kind;
1941 left = INTVAL (left_rtx);
1942 size = INTVAL (size_rtx);
1943 insize = size - left;
1944 if (insize <= 0)
1945 abort ();
1946 /* Default to left / right shift. */
1947 kind = 0;
1948 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
1949 if (size <= 16)
1951 /* 16 bit shift / sign extend / 16 bit shift */
1952 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
1953 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
1954 below, by alternative 3 or something even better. */
1955 if (cost < best_cost)
1957 kind = 5;
1958 best_cost = cost;
1961 /* Try a plain sign extend between two shifts. */
1962 for (ext = 16; ext >= insize; ext -= 8)
1964 if (ext <= size)
1966 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
1967 if (cost < best_cost)
1969 kind = ext / (unsigned) 8;
1970 best_cost = cost;
1973 /* Check if we can do a sloppy shift with a final signed shift
1974 restoring the sign. */
1975 if (EXT_SHIFT_SIGNED (size - ext))
1976 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
1977 /* If not, maybe it's still cheaper to do the second shift sloppy,
1978 and do a final sign extend? */
1979 else if (size <= 16)
1980 cost = ext_shift_insns[ext - insize] + 1
1981 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
1982 else
1983 continue;
1984 if (cost < best_cost)
1986 kind = ext / (unsigned) 8 + 2;
1987 best_cost = cost;
1990 /* Check if we can sign extend in r0 */
1991 if (insize < 8)
1993 cost = 3 + shift_insns[left];
1994 if (cost < best_cost)
1996 kind = 6;
1997 best_cost = cost;
1999 /* Try the same with a final signed shift. */
2000 if (left < 31)
2002 cost = 3 + ext_shift_insns[left + 1] + 1;
2003 if (cost < best_cost)
2005 kind = 7;
2006 best_cost = cost;
2010 if (TARGET_SH3)
2012 /* Try to use a dynamic shift. */
2013 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
2014 if (cost < best_cost)
2016 kind = 0;
2017 best_cost = cost;
2020 if (costp)
2021 *costp = cost;
2022 return kind;
2025 /* Function to be used in the length attribute of the instructions
2026 implementing this pattern. */
2029 shl_sext_length (insn)
2030 rtx insn;
2032 rtx set_src, left_rtx, size_rtx;
2033 int cost;
2035 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2036 left_rtx = XEXP (XEXP (set_src, 0), 1);
2037 size_rtx = XEXP (set_src, 1);
2038 shl_sext_kind (left_rtx, size_rtx, &cost);
2039 return cost;
2042 /* Generate rtl for this pattern */
2045 gen_shl_sext (dest, left_rtx, size_rtx, source)
2046 rtx dest, left_rtx, size_rtx, source;
2048 int kind;
2049 int left, size, insize, cost;
2050 rtx operands[3];
2052 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
2053 left = INTVAL (left_rtx);
2054 size = INTVAL (size_rtx);
2055 insize = size - left;
2056 switch (kind)
2058 case 1:
2059 case 2:
2060 case 3:
2061 case 4:
2063 int ext = kind & 1 ? 8 : 16;
2064 int shift2 = size - ext;
2066 /* Don't expand fine-grained when combining, because that will
2067 make the pattern fail. */
2068 if (! rtx_equal_function_value_matters
2069 && ! reload_in_progress && ! reload_completed)
2071 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2072 emit_insn (gen_movsi (dest, source));
2073 break;
2075 if (dest != source)
2076 emit_insn (gen_movsi (dest, source));
2077 operands[0] = dest;
2078 if (ext - insize)
2080 operands[2] = GEN_INT (ext - insize);
2081 gen_shifty_hi_op (ASHIFT, operands);
2083 emit_insn (kind & 1
2084 ? gen_extendqisi2(dest, gen_lowpart (QImode, dest))
2085 : gen_extendhisi2(dest, gen_lowpart (HImode, dest)));
2086 if (kind <= 2)
2088 if (shift2)
2090 operands[2] = GEN_INT (shift2);
2091 gen_shifty_op (ASHIFT, operands);
2094 else
2096 if (shift2 > 0)
2098 if (EXT_SHIFT_SIGNED (shift2))
2100 operands[2] = GEN_INT (shift2 + 1);
2101 gen_shifty_op (ASHIFT, operands);
2102 operands[2] = GEN_INT (1);
2103 gen_shifty_op (ASHIFTRT, operands);
2104 break;
2106 operands[2] = GEN_INT (shift2);
2107 gen_shifty_hi_op (ASHIFT, operands);
2109 else if (shift2)
2111 operands[2] = GEN_INT (-shift2);
2112 gen_shifty_hi_op (LSHIFTRT, operands);
2114 emit_insn (size <= 8
2115 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2116 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2118 break;
2120 case 5:
2122 int i = 16 - size;
2123 if (! rtx_equal_function_value_matters
2124 && ! reload_in_progress && ! reload_completed)
2125 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2126 else
2128 operands[0] = dest;
2129 operands[2] = GEN_INT (16 - insize);
2130 gen_shifty_hi_op (ASHIFT, operands);
2131 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2133 /* Don't use gen_ashrsi3 because it generates new pseudos. */
2134 while (--i >= 0)
2135 gen_ashift (ASHIFTRT, 1, dest);
2136 break;
2138 case 6:
2139 case 7:
2140 /* Don't expand fine-grained when combining, because that will
2141 make the pattern fail. */
2142 if (! rtx_equal_function_value_matters
2143 && ! reload_in_progress && ! reload_completed)
2145 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2146 emit_insn (gen_movsi (dest, source));
2147 break;
2149 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
2150 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
2151 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
2152 operands[0] = dest;
2153 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
2154 gen_shifty_op (ASHIFT, operands);
2155 if (kind == 7)
2156 emit_insn (gen_ashrsi3_k (dest, dest, GEN_INT (1)));
2157 break;
2158 default:
2159 return -1;
2161 return 0;
2164 /* Prefix a symbol_ref name with "datalabel". */
2167 gen_datalabel_ref (sym)
2168 rtx sym;
2170 if (GET_CODE (sym) == LABEL_REF)
2171 return gen_rtx_CONST (GET_MODE (sym),
2172 gen_rtx_UNSPEC (GET_MODE (sym),
2173 gen_rtvec (1, sym),
2174 UNSPEC_DATALABEL));
2176 if (GET_CODE (sym) != SYMBOL_REF)
2177 abort ();
2179 XSTR (sym, 0) = concat (SH_DATALABEL_ENCODING, XSTR (sym, 0), NULL);
2181 return sym;
2185 /* The SH cannot load a large constant into a register, constants have to
2186 come from a pc relative load. The reference of a pc relative load
2187 instruction must be less than 1k infront of the instruction. This
2188 means that we often have to dump a constant inside a function, and
2189 generate code to branch around it.
2191 It is important to minimize this, since the branches will slow things
2192 down and make things bigger.
2194 Worst case code looks like:
2196 mov.l L1,rn
2197 bra L2
2199 align
2200 L1: .long value
2204 mov.l L3,rn
2205 bra L4
2207 align
2208 L3: .long value
2212 We fix this by performing a scan before scheduling, which notices which
2213 instructions need to have their operands fetched from the constant table
2214 and builds the table.
2216 The algorithm is:
2218 scan, find an instruction which needs a pcrel move. Look forward, find the
2219 last barrier which is within MAX_COUNT bytes of the requirement.
2220 If there isn't one, make one. Process all the instructions between
2221 the find and the barrier.
2223 In the above example, we can tell that L3 is within 1k of L1, so
2224 the first move can be shrunk from the 3 insn+constant sequence into
2225 just 1 insn, and the constant moved to L3 to make:
2227 mov.l L1,rn
2229 mov.l L3,rn
2230 bra L4
2232 align
2233 L3:.long value
2234 L4:.long value
2236 Then the second move becomes the target for the shortening process. */
2238 typedef struct
2240 rtx value; /* Value in table. */
2241 rtx label; /* Label of value. */
2242 rtx wend; /* End of window. */
2243 enum machine_mode mode; /* Mode of value. */
2244 } pool_node;
2246 /* The maximum number of constants that can fit into one pool, since
2247 the pc relative range is 0...1020 bytes and constants are at least 4
2248 bytes long. */
2250 #define MAX_POOL_SIZE (1020/4)
2251 static pool_node pool_vector[MAX_POOL_SIZE];
2252 static int pool_size;
2253 static rtx pool_window_label;
2254 static int pool_window_last;
2256 /* ??? If we need a constant in HImode which is the truncated value of a
2257 constant we need in SImode, we could combine the two entries thus saving
2258 two bytes. Is this common enough to be worth the effort of implementing
2259 it? */
2261 /* ??? This stuff should be done at the same time that we shorten branches.
2262 As it is now, we must assume that all branches are the maximum size, and
2263 this causes us to almost always output constant pools sooner than
2264 necessary. */
2266 /* Add a constant to the pool and return its label. */
2268 static rtx
2269 add_constant (x, mode, last_value)
2270 rtx x;
2271 enum machine_mode mode;
2272 rtx last_value;
2274 int i;
2275 rtx lab, new, ref, newref;
2277 /* First see if we've already got it. */
2278 for (i = 0; i < pool_size; i++)
2280 if (x->code == pool_vector[i].value->code
2281 && mode == pool_vector[i].mode)
2283 if (x->code == CODE_LABEL)
2285 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
2286 continue;
2288 if (rtx_equal_p (x, pool_vector[i].value))
2290 lab = new = 0;
2291 if (! last_value
2292 || ! i
2293 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
2295 new = gen_label_rtx ();
2296 LABEL_REFS (new) = pool_vector[i].label;
2297 pool_vector[i].label = lab = new;
2299 if (lab && pool_window_label)
2301 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2302 ref = pool_vector[pool_window_last].wend;
2303 LABEL_NEXTREF (newref) = ref;
2304 pool_vector[pool_window_last].wend = newref;
2306 if (new)
2307 pool_window_label = new;
2308 pool_window_last = i;
2309 return lab;
2314 /* Need a new one. */
2315 pool_vector[pool_size].value = x;
2316 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
2317 lab = 0;
2318 else
2319 lab = gen_label_rtx ();
2320 pool_vector[pool_size].mode = mode;
2321 pool_vector[pool_size].label = lab;
2322 pool_vector[pool_size].wend = NULL_RTX;
2323 if (lab && pool_window_label)
2325 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2326 ref = pool_vector[pool_window_last].wend;
2327 LABEL_NEXTREF (newref) = ref;
2328 pool_vector[pool_window_last].wend = newref;
2330 if (lab)
2331 pool_window_label = lab;
2332 pool_window_last = pool_size;
2333 pool_size++;
2334 return lab;
2337 /* Output the literal table. */
2339 static void
2340 dump_table (scan)
2341 rtx scan;
2343 int i;
2344 int need_align = 1;
2345 rtx lab, ref;
2346 int have_di = 0;
2348 /* Do two passes, first time dump out the HI sized constants. */
2350 for (i = 0; i < pool_size; i++)
2352 pool_node *p = &pool_vector[i];
2354 if (p->mode == HImode)
2356 if (need_align)
2358 scan = emit_insn_after (gen_align_2 (), scan);
2359 need_align = 0;
2361 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2362 scan = emit_label_after (lab, scan);
2363 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
2364 scan);
2365 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2367 lab = XEXP (ref, 0);
2368 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2371 else if (p->mode == DImode || p->mode == DFmode)
2372 have_di = 1;
2375 need_align = 1;
2377 if (TARGET_SHCOMPACT && have_di)
2379 rtx align_insn = NULL_RTX;
2381 scan = emit_label_after (gen_label_rtx (), scan);
2382 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2383 need_align = 0;
2385 for (i = 0; i < pool_size; i++)
2387 pool_node *p = &pool_vector[i];
2389 switch (p->mode)
2391 case HImode:
2392 break;
2393 case SImode:
2394 case SFmode:
2395 if (align_insn)
2397 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2398 emit_label_before (lab, align_insn);
2399 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
2400 align_insn);
2401 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2403 lab = XEXP (ref, 0);
2404 emit_insn_before (gen_consttable_window_end (lab),
2405 align_insn);
2407 delete_insn (align_insn);
2408 align_insn = NULL_RTX;
2409 continue;
2411 else
2413 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2414 scan = emit_label_after (lab, scan);
2415 scan = emit_insn_after (gen_consttable_4 (p->value,
2416 const0_rtx), scan);
2417 need_align = ! need_align;
2419 break;
2420 case DFmode:
2421 case DImode:
2422 if (need_align)
2424 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2425 align_insn = scan;
2426 need_align = 0;
2428 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2429 scan = emit_label_after (lab, scan);
2430 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2431 scan);
2432 break;
2433 default:
2434 abort ();
2435 break;
2438 if (p->mode != HImode)
2440 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2442 lab = XEXP (ref, 0);
2443 scan = emit_insn_after (gen_consttable_window_end (lab),
2444 scan);
2449 pool_size = 0;
2452 for (i = 0; i < pool_size; i++)
2454 pool_node *p = &pool_vector[i];
2456 switch (p->mode)
2458 case HImode:
2459 break;
2460 case SImode:
2461 case SFmode:
2462 if (need_align)
2464 need_align = 0;
2465 scan = emit_label_after (gen_label_rtx (), scan);
2466 scan = emit_insn_after (gen_align_4 (), scan);
2468 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2469 scan = emit_label_after (lab, scan);
2470 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
2471 scan);
2472 break;
2473 case DFmode:
2474 case DImode:
2475 if (need_align)
2477 need_align = 0;
2478 scan = emit_label_after (gen_label_rtx (), scan);
2479 scan = emit_insn_after (gen_align_4 (), scan);
2481 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2482 scan = emit_label_after (lab, scan);
2483 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2484 scan);
2485 break;
2486 default:
2487 abort ();
2488 break;
2491 if (p->mode != HImode)
2493 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2495 lab = XEXP (ref, 0);
2496 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2501 scan = emit_insn_after (gen_consttable_end (), scan);
2502 scan = emit_barrier_after (scan);
2503 pool_size = 0;
2504 pool_window_label = NULL_RTX;
2505 pool_window_last = 0;
2508 /* Return non-zero if constant would be an ok source for a
2509 mov.w instead of a mov.l. */
2511 static int
2512 hi_const (src)
2513 rtx src;
2515 return (GET_CODE (src) == CONST_INT
2516 && INTVAL (src) >= -32768
2517 && INTVAL (src) <= 32767);
2520 /* Non-zero if the insn is a move instruction which needs to be fixed. */
2522 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
2523 CONST_DOUBLE input value is CONST_OK_FOR_I. For a SFmode move, we don't
2524 need to fix it if the input value is CONST_OK_FOR_I. */
2526 static int
2527 broken_move (insn)
2528 rtx insn;
2530 if (GET_CODE (insn) == INSN)
2532 rtx pat = PATTERN (insn);
2533 if (GET_CODE (pat) == PARALLEL)
2534 pat = XVECEXP (pat, 0, 0);
2535 if (GET_CODE (pat) == SET
2536 /* We can load any 8 bit value if we don't care what the high
2537 order bits end up as. */
2538 && GET_MODE (SET_DEST (pat)) != QImode
2539 && (CONSTANT_P (SET_SRC (pat))
2540 /* Match mova_const. */
2541 || (GET_CODE (SET_SRC (pat)) == UNSPEC
2542 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
2543 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
2544 && ! (TARGET_SH3E
2545 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
2546 && (fp_zero_operand (SET_SRC (pat))
2547 || fp_one_operand (SET_SRC (pat)))
2548 /* ??? If this is a -m4 or -m4-single compilation, in general
2549 we don't know the current setting of fpscr, so disable fldi.
2550 There is an exception if this was a register-register move
2551 before reload - and hence it was ascertained that we have
2552 single precision setting - and in a post-reload optimization
2553 we changed this to do a constant load. In that case
2554 we don't have an r0 clobber, hence we must use fldi. */
2555 && (! TARGET_SH4 || TARGET_FMOVD
2556 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
2557 == SCRATCH))
2558 && GET_CODE (SET_DEST (pat)) == REG
2559 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
2560 && (GET_CODE (SET_SRC (pat)) != CONST_INT
2561 || ! CONST_OK_FOR_I (INTVAL (SET_SRC (pat)))))
2562 return 1;
2565 return 0;
2568 static int
2569 mova_p (insn)
2570 rtx insn;
2572 return (GET_CODE (insn) == INSN
2573 && GET_CODE (PATTERN (insn)) == SET
2574 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
2575 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
2576 /* Don't match mova_const. */
2577 && GET_CODE (XVECEXP (SET_SRC (PATTERN (insn)), 0, 0)) == LABEL_REF);
2580 /* Find the last barrier from insn FROM which is close enough to hold the
2581 constant pool. If we can't find one, then create one near the end of
2582 the range. */
2584 static rtx
2585 find_barrier (num_mova, mova, from)
2586 int num_mova;
2587 rtx mova, from;
2589 int count_si = 0;
2590 int count_hi = 0;
2591 int found_hi = 0;
2592 int found_si = 0;
2593 int found_di = 0;
2594 int hi_align = 2;
2595 int si_align = 2;
2596 int leading_mova = num_mova;
2597 rtx barrier_before_mova, found_barrier = 0, good_barrier = 0;
2598 int si_limit;
2599 int hi_limit;
2601 /* For HImode: range is 510, add 4 because pc counts from address of
2602 second instruction after this one, subtract 2 for the jump instruction
2603 that we may need to emit before the table, subtract 2 for the instruction
2604 that fills the jump delay slot (in very rare cases, reorg will take an
2605 instruction from after the constant pool or will leave the delay slot
2606 empty). This gives 510.
2607 For SImode: range is 1020, add 4 because pc counts from address of
2608 second instruction after this one, subtract 2 in case pc is 2 byte
2609 aligned, subtract 2 for the jump instruction that we may need to emit
2610 before the table, subtract 2 for the instruction that fills the jump
2611 delay slot. This gives 1018. */
2613 /* The branch will always be shortened now that the reference address for
2614 forward branches is the successor address, thus we need no longer make
2615 adjustments to the [sh]i_limit for -O0. */
2617 si_limit = 1018;
2618 hi_limit = 510;
2620 while (from && count_si < si_limit && count_hi < hi_limit)
2622 int inc = get_attr_length (from);
2623 int new_align = 1;
2625 if (GET_CODE (from) == CODE_LABEL)
2627 if (optimize)
2628 new_align = 1 << label_to_alignment (from);
2629 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
2630 new_align = 1 << barrier_align (from);
2631 else
2632 new_align = 1;
2633 inc = 0;
2636 if (GET_CODE (from) == BARRIER)
2639 found_barrier = from;
2641 /* If we are at the end of the function, or in front of an alignment
2642 instruction, we need not insert an extra alignment. We prefer
2643 this kind of barrier. */
2644 if (barrier_align (from) > 2)
2645 good_barrier = from;
2648 if (broken_move (from))
2650 rtx pat, src, dst;
2651 enum machine_mode mode;
2653 pat = PATTERN (from);
2654 if (GET_CODE (pat) == PARALLEL)
2655 pat = XVECEXP (pat, 0, 0);
2656 src = SET_SRC (pat);
2657 dst = SET_DEST (pat);
2658 mode = GET_MODE (dst);
2660 /* We must explicitly check the mode, because sometimes the
2661 front end will generate code to load unsigned constants into
2662 HImode targets without properly sign extending them. */
2663 if (mode == HImode
2664 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
2666 found_hi += 2;
2667 /* We put the short constants before the long constants, so
2668 we must count the length of short constants in the range
2669 for the long constants. */
2670 /* ??? This isn't optimal, but is easy to do. */
2671 si_limit -= 2;
2673 else
2675 /* We dump DF/DI constants before SF/SI ones, because
2676 the limit is the same, but the alignment requirements
2677 are higher. We may waste up to 4 additional bytes
2678 for alignment, and the DF/DI constant may have
2679 another SF/SI constant placed before it. */
2680 if (TARGET_SHCOMPACT
2681 && ! found_di
2682 && (mode == DFmode || mode == DImode))
2684 found_di = 1;
2685 si_limit -= 8;
2687 while (si_align > 2 && found_si + si_align - 2 > count_si)
2688 si_align >>= 1;
2689 if (found_si > count_si)
2690 count_si = found_si;
2691 found_si += GET_MODE_SIZE (mode);
2692 if (num_mova)
2693 si_limit -= GET_MODE_SIZE (mode);
2696 /* See the code in machine_dependent_reorg, which has a similar if
2697 statement that generates a new mova insn in many cases. */
2698 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
2699 inc += 2;
2702 if (mova_p (from))
2704 if (! num_mova++)
2706 leading_mova = 0;
2707 mova = from;
2708 barrier_before_mova = good_barrier ? good_barrier : found_barrier;
2710 if (found_si > count_si)
2711 count_si = found_si;
2713 else if (GET_CODE (from) == JUMP_INSN
2714 && (GET_CODE (PATTERN (from)) == ADDR_VEC
2715 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
2717 if (num_mova)
2718 num_mova--;
2719 if (barrier_align (next_real_insn (from)) == CACHE_LOG)
2721 /* We have just passed the barrier in front of the
2722 ADDR_DIFF_VEC, which is stored in found_barrier. Since
2723 the ADDR_DIFF_VEC is accessed as data, just like our pool
2724 constants, this is a good opportunity to accommodate what
2725 we have gathered so far.
2726 If we waited any longer, we could end up at a barrier in
2727 front of code, which gives worse cache usage for separated
2728 instruction / data caches. */
2729 good_barrier = found_barrier;
2730 break;
2732 else
2734 rtx body = PATTERN (from);
2735 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
2738 /* For the SH1, we generate alignments even after jumps-around-jumps. */
2739 else if (GET_CODE (from) == JUMP_INSN
2740 && ! TARGET_SH2
2741 && ! TARGET_SMALLCODE)
2742 new_align = 4;
2744 if (found_si)
2746 count_si += inc;
2747 if (new_align > si_align)
2749 si_limit -= (count_si - 1) & (new_align - si_align);
2750 si_align = new_align;
2752 count_si = (count_si + new_align - 1) & -new_align;
2754 if (found_hi)
2756 count_hi += inc;
2757 if (new_align > hi_align)
2759 hi_limit -= (count_hi - 1) & (new_align - hi_align);
2760 hi_align = new_align;
2762 count_hi = (count_hi + new_align - 1) & -new_align;
2764 from = NEXT_INSN (from);
2767 if (num_mova)
2769 if (leading_mova)
2771 /* Try as we might, the leading mova is out of range. Change
2772 it into a load (which will become a pcload) and retry. */
2773 SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
2774 INSN_CODE (mova) = -1;
2775 return find_barrier (0, 0, mova);
2777 else
2779 /* Insert the constant pool table before the mova instruction,
2780 to prevent the mova label reference from going out of range. */
2781 from = mova;
2782 good_barrier = found_barrier = barrier_before_mova;
2786 if (found_barrier)
2788 if (good_barrier && next_real_insn (found_barrier))
2789 found_barrier = good_barrier;
2791 else
2793 /* We didn't find a barrier in time to dump our stuff,
2794 so we'll make one. */
2795 rtx label = gen_label_rtx ();
2797 /* If we exceeded the range, then we must back up over the last
2798 instruction we looked at. Otherwise, we just need to undo the
2799 NEXT_INSN at the end of the loop. */
2800 if (count_hi > hi_limit || count_si > si_limit)
2801 from = PREV_INSN (PREV_INSN (from));
2802 else
2803 from = PREV_INSN (from);
2805 /* Walk back to be just before any jump or label.
2806 Putting it before a label reduces the number of times the branch
2807 around the constant pool table will be hit. Putting it before
2808 a jump makes it more likely that the bra delay slot will be
2809 filled. */
2810 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
2811 || GET_CODE (from) == CODE_LABEL)
2812 from = PREV_INSN (from);
2814 from = emit_jump_insn_after (gen_jump (label), from);
2815 JUMP_LABEL (from) = label;
2816 LABEL_NUSES (label) = 1;
2817 found_barrier = emit_barrier_after (from);
2818 emit_label_after (label, found_barrier);
2821 return found_barrier;
2824 /* If the instruction INSN is implemented by a special function, and we can
2825 positively find the register that is used to call the sfunc, and this
2826 register is not used anywhere else in this instruction - except as the
2827 destination of a set, return this register; else, return 0. */
2829 sfunc_uses_reg (insn)
2830 rtx insn;
2832 int i;
2833 rtx pattern, part, reg_part, reg;
2835 if (GET_CODE (insn) != INSN)
2836 return 0;
2837 pattern = PATTERN (insn);
2838 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
2839 return 0;
2841 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
2843 part = XVECEXP (pattern, 0, i);
2844 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
2845 reg_part = part;
2847 if (! reg_part)
2848 return 0;
2849 reg = XEXP (reg_part, 0);
2850 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
2852 part = XVECEXP (pattern, 0, i);
2853 if (part == reg_part || GET_CODE (part) == CLOBBER)
2854 continue;
2855 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
2856 && GET_CODE (SET_DEST (part)) == REG)
2857 ? SET_SRC (part) : part)))
2858 return 0;
2860 return reg;
2863 /* See if the only way in which INSN uses REG is by calling it, or by
2864 setting it while calling it. Set *SET to a SET rtx if the register
2865 is set by INSN. */
2867 static int
2868 noncall_uses_reg (reg, insn, set)
2869 rtx reg;
2870 rtx insn;
2871 rtx *set;
2873 rtx pattern, reg2;
2875 *set = NULL_RTX;
2877 reg2 = sfunc_uses_reg (insn);
2878 if (reg2 && REGNO (reg2) == REGNO (reg))
2880 pattern = single_set (insn);
2881 if (pattern
2882 && GET_CODE (SET_DEST (pattern)) == REG
2883 && REGNO (reg) == REGNO (SET_DEST (pattern)))
2884 *set = pattern;
2885 return 0;
2887 if (GET_CODE (insn) != CALL_INSN)
2889 /* We don't use rtx_equal_p because we don't care if the mode is
2890 different. */
2891 pattern = single_set (insn);
2892 if (pattern
2893 && GET_CODE (SET_DEST (pattern)) == REG
2894 && REGNO (reg) == REGNO (SET_DEST (pattern)))
2896 rtx par, part;
2897 int i;
2899 *set = pattern;
2900 par = PATTERN (insn);
2901 if (GET_CODE (par) == PARALLEL)
2902 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
2904 part = XVECEXP (par, 0, i);
2905 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
2906 return 1;
2908 return reg_mentioned_p (reg, SET_SRC (pattern));
2911 return 1;
2914 pattern = PATTERN (insn);
2916 if (GET_CODE (pattern) == PARALLEL)
2918 int i;
2920 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
2921 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
2922 return 1;
2923 pattern = XVECEXP (pattern, 0, 0);
2926 if (GET_CODE (pattern) == SET)
2928 if (reg_mentioned_p (reg, SET_DEST (pattern)))
2930 /* We don't use rtx_equal_p, because we don't care if the
2931 mode is different. */
2932 if (GET_CODE (SET_DEST (pattern)) != REG
2933 || REGNO (reg) != REGNO (SET_DEST (pattern)))
2934 return 1;
2936 *set = pattern;
2939 pattern = SET_SRC (pattern);
2942 if (GET_CODE (pattern) != CALL
2943 || GET_CODE (XEXP (pattern, 0)) != MEM
2944 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
2945 return 1;
2947 return 0;
2950 /* Given a X, a pattern of an insn or a part of it, return a mask of used
2951 general registers. Bits 0..15 mean that the respective registers
2952 are used as inputs in the instruction. Bits 16..31 mean that the
2953 registers 0..15, respectively, are used as outputs, or are clobbered.
2954 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
2956 regs_used (x, is_dest)
2957 rtx x; int is_dest;
2959 enum rtx_code code;
2960 const char *fmt;
2961 int i, used = 0;
2963 if (! x)
2964 return used;
2965 code = GET_CODE (x);
2966 switch (code)
2968 case REG:
2969 if (REGNO (x) < 16)
2970 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
2971 << (REGNO (x) + is_dest));
2972 return 0;
2973 case SUBREG:
2975 rtx y = SUBREG_REG (x);
2977 if (GET_CODE (y) != REG)
2978 break;
2979 if (REGNO (y) < 16)
2980 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
2981 << (REGNO (y) +
2982 subreg_regno_offset (REGNO (y),
2983 GET_MODE (y),
2984 SUBREG_BYTE (x),
2985 GET_MODE (x)) + is_dest));
2986 return 0;
2988 case SET:
2989 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
2990 case RETURN:
2991 /* If there was a return value, it must have been indicated with USE. */
2992 return 0x00ffff00;
2993 case CLOBBER:
2994 is_dest = 1;
2995 break;
2996 case MEM:
2997 is_dest = 0;
2998 break;
2999 case CALL:
3000 used |= 0x00ff00f0;
3001 break;
3002 default:
3003 break;
3006 fmt = GET_RTX_FORMAT (code);
3008 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3010 if (fmt[i] == 'E')
3012 register int j;
3013 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3014 used |= regs_used (XVECEXP (x, i, j), is_dest);
3016 else if (fmt[i] == 'e')
3017 used |= regs_used (XEXP (x, i), is_dest);
3019 return used;
3022 /* Create an instruction that prevents redirection of a conditional branch
3023 to the destination of the JUMP with address ADDR.
3024 If the branch needs to be implemented as an indirect jump, try to find
3025 a scratch register for it.
3026 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
3027 If any preceding insn that doesn't fit into a delay slot is good enough,
3028 pass 1. Pass 2 if a definite blocking insn is needed.
3029 -1 is used internally to avoid deep recursion.
3030 If a blocking instruction is made or recognized, return it. */
3032 static rtx
3033 gen_block_redirect (jump, addr, need_block)
3034 rtx jump;
3035 int addr, need_block;
3037 int dead = 0;
3038 rtx prev = prev_nonnote_insn (jump);
3039 rtx dest;
3041 /* First, check if we already have an instruction that satisfies our need. */
3042 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
3044 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
3045 return prev;
3046 if (GET_CODE (PATTERN (prev)) == USE
3047 || GET_CODE (PATTERN (prev)) == CLOBBER
3048 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3049 prev = jump;
3050 else if ((need_block &= ~1) < 0)
3051 return prev;
3052 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
3053 need_block = 0;
3055 /* We can't use JUMP_LABEL here because it might be undefined
3056 when not optimizing. */
3057 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
3058 /* If the branch is out of range, try to find a scratch register for it. */
3059 if (optimize
3060 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3061 > 4092 + 4098))
3063 rtx scan;
3064 /* Don't look for the stack pointer as a scratch register,
3065 it would cause trouble if an interrupt occurred. */
3066 unsigned try = 0x7fff, used;
3067 int jump_left = flag_expensive_optimizations + 1;
3069 /* It is likely that the most recent eligible instruction is wanted for
3070 the delay slot. Therefore, find out which registers it uses, and
3071 try to avoid using them. */
3073 for (scan = jump; (scan = PREV_INSN (scan)); )
3075 enum rtx_code code;
3077 if (INSN_DELETED_P (scan))
3078 continue;
3079 code = GET_CODE (scan);
3080 if (code == CODE_LABEL || code == JUMP_INSN)
3081 break;
3082 if (code == INSN
3083 && GET_CODE (PATTERN (scan)) != USE
3084 && GET_CODE (PATTERN (scan)) != CLOBBER
3085 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
3087 try &= ~regs_used (PATTERN (scan), 0);
3088 break;
3091 for (used = dead = 0, scan = JUMP_LABEL (jump);
3092 (scan = NEXT_INSN (scan)); )
3094 enum rtx_code code;
3096 if (INSN_DELETED_P (scan))
3097 continue;
3098 code = GET_CODE (scan);
3099 if (GET_RTX_CLASS (code) == 'i')
3101 used |= regs_used (PATTERN (scan), 0);
3102 if (code == CALL_INSN)
3103 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
3104 dead |= (used >> 16) & ~used;
3105 if (dead & try)
3107 dead &= try;
3108 break;
3110 if (code == JUMP_INSN)
3112 if (jump_left-- && simplejump_p (scan))
3113 scan = JUMP_LABEL (scan);
3114 else
3115 break;
3119 /* Mask out the stack pointer again, in case it was
3120 the only 'free' register we have found. */
3121 dead &= 0x7fff;
3123 /* If the immediate destination is still in range, check for possible
3124 threading with a jump beyond the delay slot insn.
3125 Don't check if we are called recursively; the jump has been or will be
3126 checked in a different invocation then. */
3128 else if (optimize && need_block >= 0)
3130 rtx next = next_active_insn (next_active_insn (dest));
3131 if (next && GET_CODE (next) == JUMP_INSN
3132 && GET_CODE (PATTERN (next)) == SET
3133 && recog_memoized (next) == CODE_FOR_jump)
3135 dest = JUMP_LABEL (next);
3136 if (dest
3137 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3138 > 4092 + 4098))
3139 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
3143 if (dead)
3145 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
3147 /* It would be nice if we could convert the jump into an indirect
3148 jump / far branch right now, and thus exposing all constituent
3149 instructions to further optimization. However, reorg uses
3150 simplejump_p to determine if there is an unconditional jump where
3151 it should try to schedule instructions from the target of the
3152 branch; simplejump_p fails for indirect jumps even if they have
3153 a JUMP_LABEL. */
3154 rtx insn = emit_insn_before (gen_indirect_jump_scratch
3155 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
3156 , jump);
3157 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
3158 return insn;
3160 else if (need_block)
3161 /* We can't use JUMP_LABEL here because it might be undefined
3162 when not optimizing. */
3163 return emit_insn_before (gen_block_branch_redirect
3164 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
3165 , jump);
3166 return prev;
3169 #define CONDJUMP_MIN -252
3170 #define CONDJUMP_MAX 262
3171 struct far_branch
3173 /* A label (to be placed) in front of the jump
3174 that jumps to our ultimate destination. */
3175 rtx near_label;
3176 /* Where we are going to insert it if we cannot move the jump any farther,
3177 or the jump itself if we have picked up an existing jump. */
3178 rtx insert_place;
3179 /* The ultimate destination. */
3180 rtx far_label;
3181 struct far_branch *prev;
3182 /* If the branch has already been created, its address;
3183 else the address of its first prospective user. */
3184 int address;
3187 static void gen_far_branch PARAMS ((struct far_branch *));
3188 enum mdep_reorg_phase_e mdep_reorg_phase;
3189 static void
3190 gen_far_branch (bp)
3191 struct far_branch *bp;
3193 rtx insn = bp->insert_place;
3194 rtx jump;
3195 rtx label = gen_label_rtx ();
3197 emit_label_after (label, insn);
3198 if (bp->far_label)
3200 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
3201 LABEL_NUSES (bp->far_label)++;
3203 else
3204 jump = emit_jump_insn_after (gen_return (), insn);
3205 /* Emit a barrier so that reorg knows that any following instructions
3206 are not reachable via a fall-through path.
3207 But don't do this when not optimizing, since we wouldn't supress the
3208 alignment for the barrier then, and could end up with out-of-range
3209 pc-relative loads. */
3210 if (optimize)
3211 emit_barrier_after (jump);
3212 emit_label_after (bp->near_label, insn);
3213 JUMP_LABEL (jump) = bp->far_label;
3214 if (! invert_jump (insn, label, 1))
3215 abort ();
3216 /* Prevent reorg from undoing our splits. */
3217 gen_block_redirect (jump, bp->address += 2, 2);
3220 /* Fix up ADDR_DIFF_VECs. */
3221 void
3222 fixup_addr_diff_vecs (first)
3223 rtx first;
3225 rtx insn;
3227 for (insn = first; insn; insn = NEXT_INSN (insn))
3229 rtx vec_lab, pat, prev, prevpat, x, braf_label;
3231 if (GET_CODE (insn) != JUMP_INSN
3232 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
3233 continue;
3234 pat = PATTERN (insn);
3235 vec_lab = XEXP (XEXP (pat, 0), 0);
3237 /* Search the matching casesi_jump_2. */
3238 for (prev = vec_lab; ; prev = PREV_INSN (prev))
3240 if (GET_CODE (prev) != JUMP_INSN)
3241 continue;
3242 prevpat = PATTERN (prev);
3243 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
3244 continue;
3245 x = XVECEXP (prevpat, 0, 1);
3246 if (GET_CODE (x) != USE)
3247 continue;
3248 x = XEXP (x, 0);
3249 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
3250 break;
3253 /* Emit the reference label of the braf where it belongs, right after
3254 the casesi_jump_2 (i.e. braf). */
3255 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
3256 emit_label_after (braf_label, prev);
3258 /* Fix up the ADDR_DIF_VEC to be relative
3259 to the reference address of the braf. */
3260 XEXP (XEXP (pat, 0), 0) = braf_label;
3264 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
3265 a barrier. Return the base 2 logarithm of the desired alignment. */
3267 barrier_align (barrier_or_label)
3268 rtx barrier_or_label;
3270 rtx next = next_real_insn (barrier_or_label), pat, prev;
3271 int slot, credit, jump_to_next;
3273 if (! next)
3274 return 0;
3276 pat = PATTERN (next);
3278 if (GET_CODE (pat) == ADDR_DIFF_VEC)
3279 return 2;
3281 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
3282 /* This is a barrier in front of a constant table. */
3283 return 0;
3285 prev = prev_real_insn (barrier_or_label);
3286 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
3288 pat = PATTERN (prev);
3289 /* If this is a very small table, we want to keep the alignment after
3290 the table to the minimum for proper code alignment. */
3291 return ((TARGET_SMALLCODE
3292 || (XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
3293 <= (unsigned)1 << (CACHE_LOG - 2)))
3294 ? 1 << TARGET_SHMEDIA : CACHE_LOG);
3297 if (TARGET_SMALLCODE)
3298 return 0;
3300 if (! TARGET_SH2 || ! optimize)
3301 return CACHE_LOG;
3303 /* When fixing up pcloads, a constant table might be inserted just before
3304 the basic block that ends with the barrier. Thus, we can't trust the
3305 instruction lengths before that. */
3306 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
3308 /* Check if there is an immediately preceding branch to the insn beyond
3309 the barrier. We must weight the cost of discarding useful information
3310 from the current cache line when executing this branch and there is
3311 an alignment, against that of fetching unneeded insn in front of the
3312 branch target when there is no alignment. */
3314 /* There are two delay_slot cases to consider. One is the simple case
3315 where the preceding branch is to the insn beyond the barrier (simple
3316 delay slot filling), and the other is where the preceding branch has
3317 a delay slot that is a duplicate of the insn after the barrier
3318 (fill_eager_delay_slots) and the branch is to the insn after the insn
3319 after the barrier. */
3321 /* PREV is presumed to be the JUMP_INSN for the barrier under
3322 investigation. Skip to the insn before it. */
3323 prev = prev_real_insn (prev);
3325 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
3326 credit >= 0 && prev && GET_CODE (prev) == INSN;
3327 prev = prev_real_insn (prev))
3329 jump_to_next = 0;
3330 if (GET_CODE (PATTERN (prev)) == USE
3331 || GET_CODE (PATTERN (prev)) == CLOBBER)
3332 continue;
3333 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
3335 prev = XVECEXP (PATTERN (prev), 0, 1);
3336 if (INSN_UID (prev) == INSN_UID (next))
3338 /* Delay slot was filled with insn at jump target. */
3339 jump_to_next = 1;
3340 continue;
3344 if (slot &&
3345 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3346 slot = 0;
3347 credit -= get_attr_length (prev);
3349 if (prev
3350 && GET_CODE (prev) == JUMP_INSN
3351 && JUMP_LABEL (prev))
3353 rtx x;
3354 if (jump_to_next
3355 || next_real_insn (JUMP_LABEL (prev)) == next
3356 /* If relax_delay_slots() decides NEXT was redundant
3357 with some previous instruction, it will have
3358 redirected PREV's jump to the following insn. */
3359 || JUMP_LABEL (prev) == next_nonnote_insn (next)
3360 /* There is no upper bound on redundant instructions
3361 that might have been skipped, but we must not put an
3362 alignment where none had been before. */
3363 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
3364 (INSN_P (x)
3365 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
3366 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch))))
3368 rtx pat = PATTERN (prev);
3369 if (GET_CODE (pat) == PARALLEL)
3370 pat = XVECEXP (pat, 0, 0);
3371 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
3372 return 0;
3377 return CACHE_LOG;
3380 /* If we are inside a phony loop, almost any kind of label can turn up as the
3381 first one in the loop. Aligning a braf label causes incorrect switch
3382 destination addresses; we can detect braf labels because they are
3383 followed by a BARRIER.
3384 Applying loop alignment to small constant or switch tables is a waste
3385 of space, so we suppress this too. */
3387 sh_loop_align (label)
3388 rtx label;
3390 rtx next = label;
3393 next = next_nonnote_insn (next);
3394 while (next && GET_CODE (next) == CODE_LABEL);
3396 if (! next
3397 || ! INSN_P (next)
3398 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
3399 || recog_memoized (next) == CODE_FOR_consttable_2)
3400 return 0;
3402 if (TARGET_SH5)
3403 return 3;
3405 return 2;
3408 /* Exported to toplev.c.
3410 Do a final pass over the function, just before delayed branch
3411 scheduling. */
3413 void
3414 machine_dependent_reorg (first)
3415 rtx first;
3417 rtx insn, mova;
3418 int num_mova;
3419 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
3420 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
3422 /* We must split call insns before introducing `mova's. If we're
3423 optimizing, they'll have already been split. Otherwise, make
3424 sure we don't split them too late. */
3425 if (! optimize)
3426 split_all_insns_noflow ();
3428 if (TARGET_SHMEDIA)
3429 return;
3431 /* If relaxing, generate pseudo-ops to associate function calls with
3432 the symbols they call. It does no harm to not generate these
3433 pseudo-ops. However, when we can generate them, it enables to
3434 linker to potentially relax the jsr to a bsr, and eliminate the
3435 register load and, possibly, the constant pool entry. */
3437 mdep_reorg_phase = SH_INSERT_USES_LABELS;
3438 if (TARGET_RELAX)
3440 /* Remove all REG_LABEL notes. We want to use them for our own
3441 purposes. This works because none of the remaining passes
3442 need to look at them.
3444 ??? But it may break in the future. We should use a machine
3445 dependent REG_NOTE, or some other approach entirely. */
3446 for (insn = first; insn; insn = NEXT_INSN (insn))
3448 if (INSN_P (insn))
3450 rtx note;
3452 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
3453 remove_note (insn, note);
3457 for (insn = first; insn; insn = NEXT_INSN (insn))
3459 rtx pattern, reg, link, set, scan, dies, label;
3460 int rescan = 0, foundinsn = 0;
3462 if (GET_CODE (insn) == CALL_INSN)
3464 pattern = PATTERN (insn);
3466 if (GET_CODE (pattern) == PARALLEL)
3467 pattern = XVECEXP (pattern, 0, 0);
3468 if (GET_CODE (pattern) == SET)
3469 pattern = SET_SRC (pattern);
3471 if (GET_CODE (pattern) != CALL
3472 || GET_CODE (XEXP (pattern, 0)) != MEM)
3473 continue;
3475 reg = XEXP (XEXP (pattern, 0), 0);
3477 else
3479 reg = sfunc_uses_reg (insn);
3480 if (! reg)
3481 continue;
3484 if (GET_CODE (reg) != REG)
3485 continue;
3487 /* This is a function call via REG. If the only uses of REG
3488 between the time that it is set and the time that it dies
3489 are in function calls, then we can associate all the
3490 function calls with the setting of REG. */
3492 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
3494 if (REG_NOTE_KIND (link) != 0)
3495 continue;
3496 set = single_set (XEXP (link, 0));
3497 if (set && rtx_equal_p (reg, SET_DEST (set)))
3499 link = XEXP (link, 0);
3500 break;
3504 if (! link)
3506 /* ??? Sometimes global register allocation will have
3507 deleted the insn pointed to by LOG_LINKS. Try
3508 scanning backward to find where the register is set. */
3509 for (scan = PREV_INSN (insn);
3510 scan && GET_CODE (scan) != CODE_LABEL;
3511 scan = PREV_INSN (scan))
3513 if (! INSN_P (scan))
3514 continue;
3516 if (! reg_mentioned_p (reg, scan))
3517 continue;
3519 if (noncall_uses_reg (reg, scan, &set))
3520 break;
3522 if (set)
3524 link = scan;
3525 break;
3530 if (! link)
3531 continue;
3533 /* The register is set at LINK. */
3535 /* We can only optimize the function call if the register is
3536 being set to a symbol. In theory, we could sometimes
3537 optimize calls to a constant location, but the assembler
3538 and linker do not support that at present. */
3539 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
3540 && GET_CODE (SET_SRC (set)) != LABEL_REF)
3541 continue;
3543 /* Scan forward from LINK to the place where REG dies, and
3544 make sure that the only insns which use REG are
3545 themselves function calls. */
3547 /* ??? This doesn't work for call targets that were allocated
3548 by reload, since there may not be a REG_DEAD note for the
3549 register. */
3551 dies = NULL_RTX;
3552 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
3554 rtx scanset;
3556 /* Don't try to trace forward past a CODE_LABEL if we haven't
3557 seen INSN yet. Ordinarily, we will only find the setting insn
3558 in LOG_LINKS if it is in the same basic block. However,
3559 cross-jumping can insert code labels in between the load and
3560 the call, and can result in situations where a single call
3561 insn may have two targets depending on where we came from. */
3563 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
3564 break;
3566 if (! INSN_P (scan))
3567 continue;
3569 /* Don't try to trace forward past a JUMP. To optimize
3570 safely, we would have to check that all the
3571 instructions at the jump destination did not use REG. */
3573 if (GET_CODE (scan) == JUMP_INSN)
3574 break;
3576 if (! reg_mentioned_p (reg, scan))
3577 continue;
3579 if (noncall_uses_reg (reg, scan, &scanset))
3580 break;
3582 if (scan == insn)
3583 foundinsn = 1;
3585 if (scan != insn
3586 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
3588 /* There is a function call to this register other
3589 than the one we are checking. If we optimize
3590 this call, we need to rescan again below. */
3591 rescan = 1;
3594 /* ??? We shouldn't have to worry about SCANSET here.
3595 We should just be able to check for a REG_DEAD note
3596 on a function call. However, the REG_DEAD notes are
3597 apparently not dependable around libcalls; c-torture
3598 execute/920501-2 is a test case. If SCANSET is set,
3599 then this insn sets the register, so it must have
3600 died earlier. Unfortunately, this will only handle
3601 the cases in which the register is, in fact, set in a
3602 later insn. */
3604 /* ??? We shouldn't have to use FOUNDINSN here.
3605 However, the LOG_LINKS fields are apparently not
3606 entirely reliable around libcalls;
3607 newlib/libm/math/e_pow.c is a test case. Sometimes
3608 an insn will appear in LOG_LINKS even though it is
3609 not the most recent insn which sets the register. */
3611 if (foundinsn
3612 && (scanset
3613 || find_reg_note (scan, REG_DEAD, reg)))
3615 dies = scan;
3616 break;
3620 if (! dies)
3622 /* Either there was a branch, or some insn used REG
3623 other than as a function call address. */
3624 continue;
3627 /* Create a code label, and put it in a REG_LABEL note on
3628 the insn which sets the register, and on each call insn
3629 which uses the register. In final_prescan_insn we look
3630 for the REG_LABEL notes, and output the appropriate label
3631 or pseudo-op. */
3633 label = gen_label_rtx ();
3634 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL, label,
3635 REG_NOTES (link));
3636 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, label,
3637 REG_NOTES (insn));
3638 if (rescan)
3640 scan = link;
3643 rtx reg2;
3645 scan = NEXT_INSN (scan);
3646 if (scan != insn
3647 && ((GET_CODE (scan) == CALL_INSN
3648 && reg_mentioned_p (reg, scan))
3649 || ((reg2 = sfunc_uses_reg (scan))
3650 && REGNO (reg2) == REGNO (reg))))
3651 REG_NOTES (scan)
3652 = gen_rtx_INSN_LIST (REG_LABEL, label, REG_NOTES (scan));
3654 while (scan != dies);
3659 if (TARGET_SH2)
3660 fixup_addr_diff_vecs (first);
3662 if (optimize)
3664 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
3665 shorten_branches (first);
3667 /* Scan the function looking for move instructions which have to be
3668 changed to pc-relative loads and insert the literal tables. */
3670 mdep_reorg_phase = SH_FIXUP_PCLOAD;
3671 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
3673 if (mova_p (insn))
3675 if (! num_mova++)
3676 mova = insn;
3678 else if (GET_CODE (insn) == JUMP_INSN
3679 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
3680 && num_mova)
3682 rtx scan;
3683 int total;
3685 num_mova--;
3687 /* Some code might have been inserted between the mova and
3688 its ADDR_DIFF_VEC. Check if the mova is still in range. */
3689 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
3690 total += get_attr_length (scan);
3692 /* range of mova is 1020, add 4 because pc counts from address of
3693 second instruction after this one, subtract 2 in case pc is 2
3694 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
3695 cancels out with alignment effects of the mova itself. */
3696 if (total > 1022)
3698 /* Change the mova into a load, and restart scanning
3699 there. broken_move will then return true for mova. */
3700 SET_SRC (PATTERN (mova))
3701 = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
3702 INSN_CODE (mova) = -1;
3703 insn = mova;
3706 if (broken_move (insn))
3708 rtx scan;
3709 /* Scan ahead looking for a barrier to stick the constant table
3710 behind. */
3711 rtx barrier = find_barrier (num_mova, mova, insn);
3712 rtx last_float_move, last_float = 0, *last_float_addr;
3713 int may_need_align = 1;
3715 if (num_mova && ! mova_p (mova))
3717 /* find_barrier had to change the first mova into a
3718 pcload; thus, we have to start with this new pcload. */
3719 insn = mova;
3720 num_mova = 0;
3722 /* Now find all the moves between the points and modify them. */
3723 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
3725 if (GET_CODE (scan) == CODE_LABEL)
3726 last_float = 0;
3727 if (broken_move (scan))
3729 rtx *patp = &PATTERN (scan), pat = *patp;
3730 rtx src, dst;
3731 rtx lab;
3732 rtx newsrc;
3733 enum machine_mode mode;
3735 if (GET_CODE (pat) == PARALLEL)
3736 patp = &XVECEXP (pat, 0, 0), pat = *patp;
3737 src = SET_SRC (pat);
3738 dst = SET_DEST (pat);
3739 mode = GET_MODE (dst);
3741 if (mode == SImode && hi_const (src)
3742 && REGNO (dst) != FPUL_REG)
3744 int offset = 0;
3746 mode = HImode;
3747 while (GET_CODE (dst) == SUBREG)
3749 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
3750 GET_MODE (SUBREG_REG (dst)),
3751 SUBREG_BYTE (dst),
3752 GET_MODE (dst));
3753 dst = SUBREG_REG (dst);
3755 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
3758 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
3760 /* This must be an insn that clobbers r0. */
3761 rtx clobber = XVECEXP (PATTERN (scan), 0,
3762 XVECLEN (PATTERN (scan), 0) - 1);
3764 if (GET_CODE (clobber) != CLOBBER
3765 || ! rtx_equal_p (XEXP (clobber, 0), r0_rtx))
3766 abort ();
3768 if (last_float
3769 && reg_set_between_p (r0_rtx, last_float_move, scan))
3770 last_float = 0;
3771 if (TARGET_SHCOMPACT)
3773 /* The first SFmode constant after a DFmode
3774 constant may be pulled before a sequence
3775 of DFmode constants, so the second SFmode
3776 needs a label, just in case. */
3777 if (GET_MODE_SIZE (mode) == 4)
3779 if (last_float && may_need_align)
3780 last_float = 0;
3781 may_need_align = 0;
3783 if (last_float
3784 && (GET_MODE_SIZE (GET_MODE (last_float))
3785 != GET_MODE_SIZE (mode)))
3787 last_float = 0;
3788 if (GET_MODE_SIZE (mode) == 4)
3789 may_need_align = 1;
3792 lab = add_constant (src, mode, last_float);
3793 if (lab)
3794 emit_insn_before (gen_mova (lab), scan);
3795 else
3797 /* There will be a REG_UNUSED note for r0 on
3798 LAST_FLOAT_MOVE; we have to change it to REG_INC,
3799 lest reorg:mark_target_live_regs will not
3800 consider r0 to be used, and we end up with delay
3801 slot insn in front of SCAN that clobbers r0. */
3802 rtx note
3803 = find_regno_note (last_float_move, REG_UNUSED, 0);
3805 /* If we are not optimizing, then there may not be
3806 a note. */
3807 if (note)
3808 PUT_MODE (note, REG_INC);
3810 *last_float_addr = r0_inc_rtx;
3812 last_float_move = scan;
3813 last_float = src;
3814 newsrc = gen_rtx (MEM, mode,
3815 (((TARGET_SH4 && ! TARGET_FMOVD)
3816 || REGNO (dst) == FPUL_REG)
3817 ? r0_inc_rtx
3818 : r0_rtx));
3819 last_float_addr = &XEXP (newsrc, 0);
3821 /* Remove the clobber of r0. */
3822 XEXP (clobber, 0) = gen_rtx_SCRATCH (Pmode);
3823 RTX_UNCHANGING_P (newsrc) = 1;
3825 /* This is a mova needing a label. Create it. */
3826 else if (GET_CODE (src) == UNSPEC
3827 && XINT (src, 1) == UNSPEC_MOVA
3828 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
3830 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
3831 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
3832 newsrc = gen_rtx_UNSPEC (VOIDmode,
3833 gen_rtvec (1, newsrc),
3834 UNSPEC_MOVA);
3836 else
3838 lab = add_constant (src, mode, 0);
3839 newsrc = gen_rtx_MEM (mode,
3840 gen_rtx_LABEL_REF (VOIDmode, lab));
3841 RTX_UNCHANGING_P (newsrc) = 1;
3843 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
3844 INSN_CODE (scan) = -1;
3847 dump_table (barrier);
3848 insn = barrier;
3852 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
3853 INSN_ADDRESSES_FREE ();
3854 split_branches (first);
3856 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
3857 also has an effect on the register that holds the addres of the sfunc.
3858 Insert an extra dummy insn in front of each sfunc that pretends to
3859 use this register. */
3860 if (flag_delayed_branch)
3862 for (insn = first; insn; insn = NEXT_INSN (insn))
3864 rtx reg = sfunc_uses_reg (insn);
3866 if (! reg)
3867 continue;
3868 emit_insn_before (gen_use_sfunc_addr (reg), insn);
3871 #if 0
3872 /* fpscr is not actually a user variable, but we pretend it is for the
3873 sake of the previous optimization passes, since we want it handled like
3874 one. However, we don't have any debugging information for it, so turn
3875 it into a non-user variable now. */
3876 if (TARGET_SH4)
3877 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
3878 #endif
3879 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
3883 get_dest_uid (label, max_uid)
3884 rtx label;
3885 int max_uid;
3887 rtx dest = next_real_insn (label);
3888 int dest_uid;
3889 if (! dest)
3890 /* This can happen for an undefined label. */
3891 return 0;
3892 dest_uid = INSN_UID (dest);
3893 /* If this is a newly created branch redirection blocking instruction,
3894 we cannot index the branch_uid or insn_addresses arrays with its
3895 uid. But then, we won't need to, because the actual destination is
3896 the following branch. */
3897 while (dest_uid >= max_uid)
3899 dest = NEXT_INSN (dest);
3900 dest_uid = INSN_UID (dest);
3902 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
3903 return 0;
3904 return dest_uid;
3907 /* Split condbranches that are out of range. Also add clobbers for
3908 scratch registers that are needed in far jumps.
3909 We do this before delay slot scheduling, so that it can take our
3910 newly created instructions into account. It also allows us to
3911 find branches with common targets more easily. */
3913 static void
3914 split_branches (first)
3915 rtx first;
3917 rtx insn;
3918 struct far_branch **uid_branch, *far_branch_list = 0;
3919 int max_uid = get_max_uid ();
3921 /* Find out which branches are out of range. */
3922 shorten_branches (first);
3924 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
3925 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
3927 for (insn = first; insn; insn = NEXT_INSN (insn))
3928 if (! INSN_P (insn))
3929 continue;
3930 else if (INSN_DELETED_P (insn))
3932 /* Shorten_branches would split this instruction again,
3933 so transform it into a note. */
3934 PUT_CODE (insn, NOTE);
3935 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
3936 NOTE_SOURCE_FILE (insn) = 0;
3938 else if (GET_CODE (insn) == JUMP_INSN
3939 /* Don't mess with ADDR_DIFF_VEC */
3940 && (GET_CODE (PATTERN (insn)) == SET
3941 || GET_CODE (PATTERN (insn)) == RETURN))
3943 enum attr_type type = get_attr_type (insn);
3944 if (type == TYPE_CBRANCH)
3946 rtx next, beyond;
3948 if (get_attr_length (insn) > 4)
3950 rtx src = SET_SRC (PATTERN (insn));
3951 rtx olabel = XEXP (XEXP (src, 1), 0);
3952 int addr = INSN_ADDRESSES (INSN_UID (insn));
3953 rtx label = 0;
3954 int dest_uid = get_dest_uid (olabel, max_uid);
3955 struct far_branch *bp = uid_branch[dest_uid];
3957 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
3958 the label if the LABEL_NUSES count drops to zero. There is
3959 always a jump_optimize pass that sets these values, but it
3960 proceeds to delete unreferenced code, and then if not
3961 optimizing, to un-delete the deleted instructions, thus
3962 leaving labels with too low uses counts. */
3963 if (! optimize)
3965 JUMP_LABEL (insn) = olabel;
3966 LABEL_NUSES (olabel)++;
3968 if (! bp)
3970 bp = (struct far_branch *) alloca (sizeof *bp);
3971 uid_branch[dest_uid] = bp;
3972 bp->prev = far_branch_list;
3973 far_branch_list = bp;
3974 bp->far_label
3975 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
3976 LABEL_NUSES (bp->far_label)++;
3978 else
3980 label = bp->near_label;
3981 if (! label && bp->address - addr >= CONDJUMP_MIN)
3983 rtx block = bp->insert_place;
3985 if (GET_CODE (PATTERN (block)) == RETURN)
3986 block = PREV_INSN (block);
3987 else
3988 block = gen_block_redirect (block,
3989 bp->address, 2);
3990 label = emit_label_after (gen_label_rtx (),
3991 PREV_INSN (block));
3992 bp->near_label = label;
3994 else if (label && ! NEXT_INSN (label))
3996 if (addr + 2 - bp->address <= CONDJUMP_MAX)
3997 bp->insert_place = insn;
3998 else
3999 gen_far_branch (bp);
4002 if (! label
4003 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
4005 bp->near_label = label = gen_label_rtx ();
4006 bp->insert_place = insn;
4007 bp->address = addr;
4009 if (! redirect_jump (insn, label, 1))
4010 abort ();
4012 else
4014 /* get_attr_length (insn) == 2 */
4015 /* Check if we have a pattern where reorg wants to redirect
4016 the branch to a label from an unconditional branch that
4017 is too far away. */
4018 /* We can't use JUMP_LABEL here because it might be undefined
4019 when not optimizing. */
4020 /* A syntax error might cause beyond to be NULL_RTX. */
4021 beyond
4022 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
4023 0));
4025 if (beyond
4026 && (GET_CODE (beyond) == JUMP_INSN
4027 || ((beyond = next_active_insn (beyond))
4028 && GET_CODE (beyond) == JUMP_INSN))
4029 && GET_CODE (PATTERN (beyond)) == SET
4030 && recog_memoized (beyond) == CODE_FOR_jump
4031 && ((INSN_ADDRESSES
4032 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
4033 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4034 > 252 + 258 + 2))
4035 gen_block_redirect (beyond,
4036 INSN_ADDRESSES (INSN_UID (beyond)), 1);
4039 next = next_active_insn (insn);
4041 if ((GET_CODE (next) == JUMP_INSN
4042 || GET_CODE (next = next_active_insn (next)) == JUMP_INSN)
4043 && GET_CODE (PATTERN (next)) == SET
4044 && recog_memoized (next) == CODE_FOR_jump
4045 && ((INSN_ADDRESSES
4046 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
4047 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4048 > 252 + 258 + 2))
4049 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
4051 else if (type == TYPE_JUMP || type == TYPE_RETURN)
4053 int addr = INSN_ADDRESSES (INSN_UID (insn));
4054 rtx far_label = 0;
4055 int dest_uid = 0;
4056 struct far_branch *bp;
4058 if (type == TYPE_JUMP)
4060 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
4061 dest_uid = get_dest_uid (far_label, max_uid);
4062 if (! dest_uid)
4064 /* Parse errors can lead to labels outside
4065 the insn stream. */
4066 if (! NEXT_INSN (far_label))
4067 continue;
4069 if (! optimize)
4071 JUMP_LABEL (insn) = far_label;
4072 LABEL_NUSES (far_label)++;
4074 redirect_jump (insn, NULL_RTX, 1);
4075 far_label = 0;
4078 bp = uid_branch[dest_uid];
4079 if (! bp)
4081 bp = (struct far_branch *) alloca (sizeof *bp);
4082 uid_branch[dest_uid] = bp;
4083 bp->prev = far_branch_list;
4084 far_branch_list = bp;
4085 bp->near_label = 0;
4086 bp->far_label = far_label;
4087 if (far_label)
4088 LABEL_NUSES (far_label)++;
4090 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
4091 if (addr - bp->address <= CONDJUMP_MAX)
4092 emit_label_after (bp->near_label, PREV_INSN (insn));
4093 else
4095 gen_far_branch (bp);
4096 bp->near_label = 0;
4098 else
4099 bp->near_label = 0;
4100 bp->address = addr;
4101 bp->insert_place = insn;
4102 if (! far_label)
4103 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
4104 else
4105 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
4108 /* Generate all pending far branches,
4109 and free our references to the far labels. */
4110 while (far_branch_list)
4112 if (far_branch_list->near_label
4113 && ! NEXT_INSN (far_branch_list->near_label))
4114 gen_far_branch (far_branch_list);
4115 if (optimize
4116 && far_branch_list->far_label
4117 && ! --LABEL_NUSES (far_branch_list->far_label))
4118 delete_insn (far_branch_list->far_label);
4119 far_branch_list = far_branch_list->prev;
4122 /* Instruction length information is no longer valid due to the new
4123 instructions that have been generated. */
4124 init_insn_lengths ();
4127 /* Dump out instruction addresses, which is useful for debugging the
4128 constant pool table stuff.
4130 If relaxing, output the label and pseudo-ops used to link together
4131 calls and the instruction which set the registers. */
4133 /* ??? This is unnecessary, and probably should be deleted. This makes
4134 the insn_addresses declaration above unnecessary. */
4136 /* ??? The addresses printed by this routine for insns are nonsense for
4137 insns which are inside of a sequence where none of the inner insns have
4138 variable length. This is because the second pass of shorten_branches
4139 does not bother to update them. */
4141 void
4142 final_prescan_insn (insn, opvec, noperands)
4143 rtx insn;
4144 rtx *opvec ATTRIBUTE_UNUSED;
4145 int noperands ATTRIBUTE_UNUSED;
4147 if (TARGET_DUMPISIZE)
4148 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
4150 if (TARGET_RELAX)
4152 rtx note;
4154 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
4155 if (note)
4157 rtx pattern;
4159 pattern = PATTERN (insn);
4160 if (GET_CODE (pattern) == PARALLEL)
4161 pattern = XVECEXP (pattern, 0, 0);
4162 if (GET_CODE (pattern) == CALL
4163 || (GET_CODE (pattern) == SET
4164 && (GET_CODE (SET_SRC (pattern)) == CALL
4165 || get_attr_type (insn) == TYPE_SFUNC)))
4166 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
4167 CODE_LABEL_NUMBER (XEXP (note, 0)));
4168 else if (GET_CODE (pattern) == SET)
4169 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
4170 CODE_LABEL_NUMBER (XEXP (note, 0)));
4171 else
4172 abort ();
4177 /* Dump out any constants accumulated in the final pass. These will
4178 only be labels. */
4180 const char *
4181 output_jump_label_table ()
4183 int i;
4185 if (pool_size)
4187 fprintf (asm_out_file, "\t.align 2\n");
4188 for (i = 0; i < pool_size; i++)
4190 pool_node *p = &pool_vector[i];
4192 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
4193 CODE_LABEL_NUMBER (p->label));
4194 output_asm_insn (".long %O0", &p->value);
4196 pool_size = 0;
4199 return "";
4202 /* A full frame looks like:
4204 arg-5
4205 arg-4
4206 [ if current_function_anonymous_args
4207 arg-3
4208 arg-2
4209 arg-1
4210 arg-0 ]
4211 saved-fp
4212 saved-r10
4213 saved-r11
4214 saved-r12
4215 saved-pr
4216 local-n
4218 local-1
4219 local-0 <- fp points here. */
4221 /* Number of bytes pushed for anonymous args, used to pass information
4222 between expand_prologue and expand_epilogue. */
4224 static int extra_push;
4226 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register
4227 to be adjusted, and TEMP, if nonnegative, holds the register number
4228 of a general register that we may clobber. */
4230 static void
4231 output_stack_adjust (size, reg, temp, emit_fn)
4232 int size;
4233 rtx reg;
4234 int temp;
4235 rtx (*emit_fn) PARAMS ((rtx));
4237 if (size)
4239 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
4241 if (size % align)
4242 abort ();
4244 if (CONST_OK_FOR_ADD (size))
4245 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
4246 /* Try to do it with two partial adjustments; however, we must make
4247 sure that the stack is properly aligned at all times, in case
4248 an interrupt occurs between the two partial adjustments. */
4249 else if (CONST_OK_FOR_ADD (size / 2 & -align)
4250 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
4252 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
4253 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
4255 else
4257 rtx const_reg;
4258 rtx insn;
4260 /* If TEMP is invalid, we could temporarily save a general
4261 register to MACL. However, there is currently no need
4262 to handle this case, so just abort when we see it. */
4263 if (temp < 0)
4264 abort ();
4265 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
4267 /* If SIZE is negative, subtract the positive value.
4268 This sometimes allows a constant pool entry to be shared
4269 between prologue and epilogue code. */
4270 if (size < 0)
4272 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
4273 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
4275 else
4277 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
4278 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
4280 if (emit_fn == frame_insn)
4281 REG_NOTES (insn)
4282 = (gen_rtx_EXPR_LIST
4283 (REG_FRAME_RELATED_EXPR,
4284 gen_rtx_SET (VOIDmode, reg,
4285 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
4286 REG_NOTES (insn)));
4291 static rtx
4292 frame_insn (x)
4293 rtx x;
4295 x = emit_insn (x);
4296 RTX_FRAME_RELATED_P (x) = 1;
4297 return x;
4300 /* Output RTL to push register RN onto the stack. */
4302 static rtx
4303 push (rn)
4304 int rn;
4306 rtx x;
4307 if (rn == FPUL_REG)
4308 x = gen_push_fpul ();
4309 else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
4310 && FP_OR_XD_REGISTER_P (rn))
4312 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
4313 return;
4314 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
4316 else if (TARGET_SH3E && FP_REGISTER_P (rn))
4317 x = gen_push_e (gen_rtx_REG (SFmode, rn));
4318 else
4319 x = gen_push (gen_rtx_REG (SImode, rn));
4321 x = frame_insn (x);
4322 REG_NOTES (x)
4323 = gen_rtx_EXPR_LIST (REG_INC,
4324 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
4325 return x;
4328 /* Output RTL to pop register RN from the stack. */
4330 static void
4331 pop (rn)
4332 int rn;
4334 rtx x;
4335 if (rn == FPUL_REG)
4336 x = gen_pop_fpul ();
4337 else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
4338 && FP_OR_XD_REGISTER_P (rn))
4340 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
4341 return;
4342 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
4344 else if (TARGET_SH3E && FP_REGISTER_P (rn))
4345 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
4346 else
4347 x = gen_pop (gen_rtx_REG (SImode, rn));
4349 x = emit_insn (x);
4350 REG_NOTES (x)
4351 = gen_rtx_EXPR_LIST (REG_INC,
4352 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
4355 /* Generate code to push the regs specified in the mask. */
4357 static void
4358 push_regs (mask)
4359 HOST_WIDE_INT *mask;
4361 int i;
4363 /* Push PR last; this gives better latencies after the prologue, and
4364 candidates for the return delay slot when there are no general
4365 registers pushed. */
4366 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4367 if (i != PR_REG && mask[i / 32] & (1 << (i % 32)))
4368 push (i);
4369 if (mask[PR_REG / 32] & (1 << (PR_REG % 32)))
4370 push (PR_REG);
4373 /* Work out the registers which need to be saved, both as a mask and a
4374 count of saved words.
4376 If doing a pragma interrupt function, then push all regs used by the
4377 function, and if we call another function (we can tell by looking at PR),
4378 make sure that all the regs it clobbers are safe too. */
4380 static void
4381 calc_live_regs (count_ptr, live_regs_mask)
4382 int *count_ptr;
4383 HOST_WIDE_INT *live_regs_mask;
4385 int reg;
4386 int count;
4387 int interrupt_handler;
4388 int pr_live;
4390 interrupt_handler = sh_cfun_interrupt_handler_p ();
4392 for (count = 0; 32 * count < FIRST_PSEUDO_REGISTER; count++)
4393 live_regs_mask[count] = 0;
4394 /* If we can save a lot of saves by switching to double mode, do that. */
4395 if (TARGET_SH4 && TARGET_FMOVD && TARGET_FPU_SINGLE)
4396 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
4397 if (regs_ever_live[reg] && regs_ever_live[reg+1]
4398 && (! call_used_regs[reg] || (interrupt_handler && ! pragma_trapa))
4399 && ++count > 2)
4401 target_flags &= ~FPU_SINGLE_BIT;
4402 break;
4404 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
4405 knows how to use it. That means the pseudo originally allocated for
4406 the initial value can become the PR_MEDIA_REG hard register, as seen for
4407 execute/20010122-1.c:test9. */
4408 if (TARGET_SHMEDIA)
4409 pr_live = regs_ever_live[PR_MEDIA_REG];
4410 else
4412 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
4413 pr_live = (pr_initial
4414 ? REGNO (pr_initial) != (PR_REG) : regs_ever_live[PR_REG]);
4416 /* Force PR to be live if the prologue has to call the SHmedia
4417 argument decoder or register saver. */
4418 if (TARGET_SHCOMPACT
4419 && ((current_function_args_info.call_cookie
4420 & ~ CALL_COOKIE_RET_TRAMP (1))
4421 || current_function_has_nonlocal_label))
4422 pr_live = 1;
4423 for (count = 0, reg = FIRST_PSEUDO_REGISTER - 1; reg >= 0; reg--)
4425 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
4426 ? pr_live
4427 : (interrupt_handler && ! pragma_trapa)
4428 ? (/* Need to save all the regs ever live. */
4429 (regs_ever_live[reg]
4430 || (call_used_regs[reg]
4431 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG)
4432 && pr_live))
4433 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
4434 && reg != RETURN_ADDRESS_POINTER_REGNUM
4435 && reg != T_REG && reg != GBR_REG && reg != FPSCR_REG)
4436 : (/* Only push those regs which are used and need to be saved. */
4437 regs_ever_live[reg] && ! call_used_regs[reg]))
4439 live_regs_mask[reg / 32] |= 1 << (reg % 32);
4440 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
4442 if ((TARGET_SH4 || TARGET_SH5) && TARGET_FMOVD
4443 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
4445 if (FP_REGISTER_P (reg))
4447 if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
4449 live_regs_mask[(reg ^ 1) / 32] |= 1 << ((reg ^ 1) % 32);
4450 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
4453 else if (XD_REGISTER_P (reg))
4455 /* Must switch to double mode to access these registers. */
4456 target_flags &= ~FPU_SINGLE_BIT;
4462 *count_ptr = count;
4465 /* Code to generate prologue and epilogue sequences */
4467 /* PUSHED is the number of bytes that are bing pushed on the
4468 stack for register saves. Return the frame size, padded
4469 appropriately so that the stack stays properly aligned. */
4470 static HOST_WIDE_INT
4471 rounded_frame_size (pushed)
4472 int pushed;
4474 HOST_WIDE_INT size = get_frame_size ();
4475 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
4477 return ((size + pushed + align - 1) & -align) - pushed;
4480 /* Choose a call-clobbered target-branch register that remains
4481 unchanged along the whole function. We set it up as the return
4482 value in the prologue. */
4484 sh_media_register_for_return ()
4486 int regno;
4487 int tr0_used;
4489 if (! current_function_is_leaf)
4490 return -1;
4492 tr0_used = flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM];
4494 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
4495 if (call_used_regs[regno] && ! regs_ever_live[regno])
4496 return regno;
4498 return -1;
4501 void
4502 sh_expand_prologue ()
4504 HOST_WIDE_INT live_regs_mask[(FIRST_PSEUDO_REGISTER + 31) / 32];
4505 int d, i;
4506 int d_rounding = 0;
4507 int save_flags = target_flags;
4509 current_function_interrupt = sh_cfun_interrupt_handler_p ();
4511 /* We have pretend args if we had an object sent partially in registers
4512 and partially on the stack, e.g. a large structure. */
4513 output_stack_adjust (-current_function_pretend_args_size
4514 - current_function_args_info.stack_regs * 8,
4515 stack_pointer_rtx, TARGET_SH5 ? 0 : 1, frame_insn);
4517 extra_push = 0;
4519 if (TARGET_SHCOMPACT && flag_pic && current_function_args_info.call_cookie)
4520 /* We're going to use the PIC register to load the address of the
4521 incoming-argument decoder and/or of the return trampoline from
4522 the GOT, so make sure the PIC register is preserved and
4523 initialized. */
4524 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
4526 if (TARGET_SHCOMPACT
4527 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
4529 int reg;
4531 /* First, make all registers with incoming arguments that will
4532 be pushed onto the stack live, so that register renaming
4533 doesn't overwrite them. */
4534 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
4535 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info.call_cookie)
4536 >= NPARM_REGS (SImode) - reg)
4537 for (; reg < NPARM_REGS (SImode); reg++)
4538 emit_insn (gen_shcompact_preserve_incoming_args
4539 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
4540 else if (CALL_COOKIE_INT_REG_GET
4541 (current_function_args_info.call_cookie, reg) == 1)
4542 emit_insn (gen_shcompact_preserve_incoming_args
4543 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
4545 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
4546 stack_pointer_rtx);
4547 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
4548 GEN_INT (current_function_args_info.call_cookie));
4549 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
4550 gen_rtx_REG (SImode, R0_REG));
4552 else if (TARGET_SHMEDIA)
4554 int tr = sh_media_register_for_return ();
4556 if (tr >= 0)
4558 rtx insn = emit_move_insn (gen_rtx_REG (DImode, tr),
4559 gen_rtx_REG (DImode, PR_MEDIA_REG));
4561 /* If this function only exits with sibcalls, this copy
4562 will be flagged as dead. */
4563 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
4564 const0_rtx,
4565 REG_NOTES (insn));
4569 /* Emit the code for SETUP_VARARGS. */
4570 if (current_function_varargs || current_function_stdarg)
4572 /* This is not used by the SH3E calling convention */
4573 if (TARGET_SH1 && ! TARGET_SH3E && ! TARGET_SH5 && ! TARGET_HITACHI)
4575 /* Push arg regs as if they'd been provided by caller in stack. */
4576 for (i = 0; i < NPARM_REGS(SImode); i++)
4578 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
4579 rtx insn;
4581 if (i >= (NPARM_REGS(SImode)
4582 - current_function_args_info.arg_count[(int) SH_ARG_INT]
4584 break;
4585 insn = push (rn);
4586 RTX_FRAME_RELATED_P (insn) = 0;
4587 extra_push += 4;
4592 /* If we're supposed to switch stacks at function entry, do so now. */
4593 if (sp_switch)
4594 emit_insn (gen_sp_switch_1 ());
4596 calc_live_regs (&d, live_regs_mask);
4597 /* ??? Maybe we could save some switching if we can move a mode switch
4598 that already happens to be at the function start into the prologue. */
4599 if (target_flags != save_flags)
4600 emit_insn (gen_toggle_sz ());
4602 if (TARGET_SH5)
4604 int i;
4605 int offset;
4606 int align;
4607 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
4608 int offset_in_r0 = -1;
4609 int sp_in_r0 = 0;
4611 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
4612 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
4613 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
4615 offset = d + d_rounding;
4616 output_stack_adjust (-offset, stack_pointer_rtx, 1, frame_insn);
4618 /* We loop twice: first, we save 8-byte aligned registers in the
4619 higher addresses, that are known to be aligned. Then, we
4620 proceed to saving 32-bit registers that don't need 8-byte
4621 alignment. */
4622 for (align = 1; align >= 0; align--)
4623 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
4624 if (live_regs_mask[i/32] & (1 << (i % 32)))
4626 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
4627 int reg = i;
4628 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
4630 if (mode == SFmode && (i % 2) == 1
4631 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
4632 && (live_regs_mask[(i ^ 1) / 32] & (1 << ((i ^ 1) % 32))))
4634 mode = DFmode;
4635 i--;
4636 reg--;
4639 /* If we're doing the aligned pass and this is not aligned,
4640 or we're doing the unaligned pass and this is aligned,
4641 skip it. */
4642 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT)
4643 == 0) != align)
4644 continue;
4646 offset -= GET_MODE_SIZE (mode);
4648 reg_rtx = gen_rtx_REG (mode, reg);
4650 mem_rtx = gen_rtx_MEM (mode,
4651 gen_rtx_PLUS (Pmode,
4652 stack_pointer_rtx,
4653 GEN_INT (offset)));
4655 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
4657 mem_rtx = NULL_RTX;
4659 try_pre_dec:
4661 if (HAVE_PRE_DECREMENT
4662 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
4663 || mem_rtx == NULL_RTX
4664 || i == PR_REG || SPECIAL_REGISTER_P (i)))
4666 pre_dec = gen_rtx_MEM (mode,
4667 gen_rtx_PRE_DEC (Pmode, r0));
4669 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
4670 pre_dec_ok);
4672 pre_dec = NULL_RTX;
4674 break;
4676 pre_dec_ok:
4677 mem_rtx = NULL_RTX;
4678 offset += GET_MODE_SIZE (mode);
4680 while (0);
4682 if (mem_rtx != NULL_RTX)
4683 goto addr_ok;
4685 if (offset_in_r0 == -1)
4687 emit_move_insn (r0, GEN_INT (offset));
4688 offset_in_r0 = offset;
4690 else if (offset != offset_in_r0)
4692 emit_move_insn (r0,
4693 gen_rtx_PLUS
4694 (Pmode, r0,
4695 GEN_INT (offset - offset_in_r0)));
4696 offset_in_r0 += offset - offset_in_r0;
4699 if (pre_dec != NULL_RTX)
4701 if (! sp_in_r0)
4703 emit_move_insn (r0,
4704 gen_rtx_PLUS
4705 (Pmode, r0, stack_pointer_rtx));
4706 sp_in_r0 = 1;
4709 offset -= GET_MODE_SIZE (mode);
4710 offset_in_r0 -= GET_MODE_SIZE (mode);
4712 mem_rtx = pre_dec;
4714 else if (sp_in_r0)
4715 mem_rtx = gen_rtx_MEM (mode, r0);
4716 else
4717 mem_rtx = gen_rtx_MEM (mode,
4718 gen_rtx_PLUS (Pmode,
4719 stack_pointer_rtx,
4720 r0));
4722 /* We must not use an r0-based address for target-branch
4723 registers or for special registers without pre-dec
4724 memory addresses, since we store their values in r0
4725 first. */
4726 if (TARGET_REGISTER_P (i)
4727 || ((i == PR_REG || SPECIAL_REGISTER_P (i))
4728 && mem_rtx != pre_dec))
4729 abort ();
4731 addr_ok:
4732 if (TARGET_REGISTER_P (i)
4733 || ((i == PR_REG || SPECIAL_REGISTER_P (i))
4734 && mem_rtx != pre_dec))
4736 rtx r0mode = gen_rtx_REG (GET_MODE (reg_rtx), R0_REG);
4738 emit_move_insn (r0mode, reg_rtx);
4740 offset_in_r0 = -1;
4741 sp_in_r0 = 0;
4743 reg_rtx = r0mode;
4746 emit_move_insn (mem_rtx, reg_rtx);
4749 if (offset != d_rounding)
4750 abort ();
4752 else
4753 push_regs (live_regs_mask);
4755 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
4757 rtx insn = get_last_insn ();
4758 rtx last = emit_insn (gen_GOTaddr2picreg ());
4760 /* Mark these insns as possibly dead. Sometimes, flow2 may
4761 delete all uses of the PIC register. In this case, let it
4762 delete the initialization too. */
4765 insn = NEXT_INSN (insn);
4767 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
4768 const0_rtx,
4769 REG_NOTES (insn));
4771 while (insn != last);
4774 if (SHMEDIA_REGS_STACK_ADJUST ())
4776 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
4777 gen_rtx_SYMBOL_REF (Pmode,
4778 TARGET_FPU_ANY
4779 ? "__GCC_push_shmedia_regs"
4780 : "__GCC_push_shmedia_regs_nofpu"));
4781 /* This must NOT go through the PLT, otherwise mach and macl
4782 may be clobbered. */
4783 emit_insn (gen_shmedia_save_restore_regs_compact
4784 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
4787 if (target_flags != save_flags)
4789 rtx insn = emit_insn (gen_toggle_sz ());
4791 /* If we're lucky, a mode switch in the function body will
4792 overwrite fpscr, turning this insn dead. Tell flow this
4793 insn is ok to delete. */
4794 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
4795 const0_rtx,
4796 REG_NOTES (insn));
4799 target_flags = save_flags;
4801 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
4802 stack_pointer_rtx, TARGET_SH5 ? 0 : 1, frame_insn);
4804 if (frame_pointer_needed)
4805 frame_insn (GEN_MOV (frame_pointer_rtx, stack_pointer_rtx));
4807 if (TARGET_SHCOMPACT
4808 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
4810 /* This must NOT go through the PLT, otherwise mach and macl
4811 may be clobbered. */
4812 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
4813 gen_rtx_SYMBOL_REF (Pmode,
4814 "__GCC_shcompact_incoming_args"));
4815 emit_insn (gen_shcompact_incoming_args ());
4819 void
4820 sh_expand_epilogue ()
4822 HOST_WIDE_INT live_regs_mask[(FIRST_PSEUDO_REGISTER + 31) / 32];
4823 int d, i;
4824 int d_rounding = 0;
4826 int save_flags = target_flags;
4827 int frame_size;
4829 calc_live_regs (&d, live_regs_mask);
4831 if (TARGET_SH5 && d % (STACK_BOUNDARY / BITS_PER_UNIT))
4832 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
4833 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
4835 frame_size = rounded_frame_size (d) - d_rounding;
4837 if (frame_pointer_needed)
4839 output_stack_adjust (frame_size, frame_pointer_rtx, 7, emit_insn);
4841 /* We must avoid moving the stack pointer adjustment past code
4842 which reads from the local frame, else an interrupt could
4843 occur after the SP adjustment and clobber data in the local
4844 frame. */
4845 emit_insn (gen_blockage ());
4846 emit_insn (GEN_MOV (stack_pointer_rtx, frame_pointer_rtx));
4848 else if (frame_size)
4850 /* We must avoid moving the stack pointer adjustment past code
4851 which reads from the local frame, else an interrupt could
4852 occur after the SP adjustment and clobber data in the local
4853 frame. */
4854 emit_insn (gen_blockage ());
4855 output_stack_adjust (frame_size, stack_pointer_rtx, 7, emit_insn);
4858 if (SHMEDIA_REGS_STACK_ADJUST ())
4860 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
4861 gen_rtx_SYMBOL_REF (Pmode,
4862 TARGET_FPU_ANY
4863 ? "__GCC_pop_shmedia_regs"
4864 : "__GCC_pop_shmedia_regs_nofpu"));
4865 /* This must NOT go through the PLT, otherwise mach and macl
4866 may be clobbered. */
4867 emit_insn (gen_shmedia_save_restore_regs_compact
4868 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
4871 /* Pop all the registers. */
4873 if (target_flags != save_flags)
4874 emit_insn (gen_toggle_sz ());
4875 if (TARGET_SH5)
4877 int offset = d_rounding;
4878 int offset_in_r0 = -1;
4879 int sp_in_r0 = 0;
4880 int align;
4881 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
4883 /* We loop twice: first, we save 8-byte aligned registers in the
4884 higher addresses, that are known to be aligned. Then, we
4885 proceed to saving 32-bit registers that don't need 8-byte
4886 alignment. */
4887 for (align = 0; align <= 1; align++)
4888 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4889 if (live_regs_mask[i/32] & (1 << (i % 32)))
4891 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
4892 int reg = i;
4893 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
4895 if (mode == SFmode && (i % 2) == 0
4896 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
4897 && (live_regs_mask[(i ^ 1) / 32] & (1 << ((i ^ 1) % 32))))
4899 mode = DFmode;
4900 i++;
4903 /* If we're doing the aligned pass and this is not aligned,
4904 or we're doing the unaligned pass and this is aligned,
4905 skip it. */
4906 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT)
4907 == 0) != align)
4908 continue;
4910 reg_rtx = gen_rtx_REG (mode, reg);
4912 mem_rtx = gen_rtx_MEM (mode,
4913 gen_rtx_PLUS (Pmode,
4914 stack_pointer_rtx,
4915 GEN_INT (offset)));
4917 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
4919 mem_rtx = NULL_RTX;
4921 try_post_inc:
4923 if (HAVE_POST_INCREMENT
4924 && (offset == offset_in_r0
4925 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
4926 && mem_rtx == NULL_RTX)
4927 || i == PR_REG || SPECIAL_REGISTER_P (i)))
4929 post_inc = gen_rtx_MEM (mode,
4930 gen_rtx_POST_INC (Pmode, r0));
4932 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
4933 post_inc_ok);
4935 post_inc = NULL_RTX;
4937 break;
4939 post_inc_ok:
4940 mem_rtx = NULL_RTX;
4942 while (0);
4944 if (mem_rtx != NULL_RTX)
4945 goto addr_ok;
4947 if (offset_in_r0 == -1)
4949 emit_move_insn (r0, GEN_INT (offset));
4950 offset_in_r0 = offset;
4952 else if (offset != offset_in_r0)
4954 emit_move_insn (r0,
4955 gen_rtx_PLUS
4956 (Pmode, r0,
4957 GEN_INT (offset - offset_in_r0)));
4958 offset_in_r0 += offset - offset_in_r0;
4961 if (post_inc != NULL_RTX)
4963 if (! sp_in_r0)
4965 emit_move_insn (r0,
4966 gen_rtx_PLUS
4967 (Pmode, r0, stack_pointer_rtx));
4968 sp_in_r0 = 1;
4971 mem_rtx = post_inc;
4973 offset_in_r0 += GET_MODE_SIZE (mode);
4975 else if (sp_in_r0)
4976 mem_rtx = gen_rtx_MEM (mode, r0);
4977 else
4978 mem_rtx = gen_rtx_MEM (mode,
4979 gen_rtx_PLUS (Pmode,
4980 stack_pointer_rtx,
4981 r0));
4983 if ((i == PR_REG || SPECIAL_REGISTER_P (i))
4984 && mem_rtx != post_inc)
4985 abort ();
4987 addr_ok:
4988 if ((i == PR_REG || SPECIAL_REGISTER_P (i))
4989 && mem_rtx != post_inc)
4991 insn = emit_move_insn (r0, mem_rtx);
4992 mem_rtx = r0;
4994 else if (TARGET_REGISTER_P (i))
4996 rtx r1 = gen_rtx_REG (mode, R1_REG);
4998 insn = emit_move_insn (r1, mem_rtx);
4999 mem_rtx = r1;
5002 insn = emit_move_insn (reg_rtx, mem_rtx);
5004 offset += GET_MODE_SIZE (mode);
5007 if (offset != d + d_rounding)
5008 abort ();
5010 goto finish;
5012 else
5013 d = 0;
5014 if (live_regs_mask[PR_REG / 32] & (1 << (PR_REG % 32)))
5015 pop (PR_REG);
5016 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5018 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
5020 if (j != PR_REG && live_regs_mask[j / 32] & (1 << (j % 32)))
5021 pop (j);
5023 finish:
5024 if (target_flags != save_flags)
5025 emit_insn (gen_toggle_sz ());
5026 target_flags = save_flags;
5028 output_stack_adjust (extra_push + current_function_pretend_args_size
5029 + d + d_rounding
5030 + current_function_args_info.stack_regs * 8,
5031 stack_pointer_rtx, 7, emit_insn);
5033 /* Switch back to the normal stack if necessary. */
5034 if (sp_switch)
5035 emit_insn (gen_sp_switch_2 ());
5037 /* Tell flow the insn that pops PR isn't dead. */
5038 /* PR_REG will never be live in SHmedia mode, and we don't need to
5039 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
5040 by the return pattern. */
5041 if (live_regs_mask[PR_REG / 32] & (1 << (PR_REG % 32)))
5042 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
5045 static int sh_need_epilogue_known = 0;
5048 sh_need_epilogue ()
5050 if (! sh_need_epilogue_known)
5052 rtx epilogue;
5054 start_sequence ();
5055 sh_expand_epilogue ();
5056 epilogue = get_insns ();
5057 end_sequence ();
5058 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
5060 return sh_need_epilogue_known > 0;
5063 /* Clear variables at function end. */
5065 static void
5066 sh_output_function_epilogue (file, size)
5067 FILE *file ATTRIBUTE_UNUSED;
5068 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
5070 trap_exit = pragma_interrupt = pragma_trapa = pragma_nosave_low_regs = 0;
5071 sh_need_epilogue_known = 0;
5072 sp_switch = NULL_RTX;
5076 sh_builtin_saveregs ()
5078 /* First unnamed integer register. */
5079 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
5080 /* Number of integer registers we need to save. */
5081 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
5082 /* First unnamed SFmode float reg */
5083 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
5084 /* Number of SFmode float regs to save. */
5085 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
5086 rtx regbuf, fpregs;
5087 int bufsize, regno;
5088 HOST_WIDE_INT alias_set;
5090 if (TARGET_SH5)
5092 if (n_intregs)
5094 int pushregs = n_intregs;
5096 while (pushregs < NPARM_REGS (SImode) - 1
5097 && (CALL_COOKIE_INT_REG_GET
5098 (current_function_args_info.call_cookie,
5099 NPARM_REGS (SImode) - pushregs)
5100 == 1))
5102 current_function_args_info.call_cookie
5103 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
5104 - pushregs, 1);
5105 pushregs++;
5108 if (pushregs == NPARM_REGS (SImode))
5109 current_function_args_info.call_cookie
5110 |= (CALL_COOKIE_INT_REG (0, 1)
5111 | CALL_COOKIE_STACKSEQ (pushregs - 1));
5112 else
5113 current_function_args_info.call_cookie
5114 |= CALL_COOKIE_STACKSEQ (pushregs);
5116 current_function_pretend_args_size += 8 * n_intregs;
5118 if (TARGET_SHCOMPACT)
5119 return const0_rtx;
5122 if (! TARGET_SH3E && ! TARGET_SH4 && ! TARGET_SH5)
5124 error ("__builtin_saveregs not supported by this subtarget");
5125 return const0_rtx;
5128 if (TARGET_SHMEDIA)
5129 n_floatregs = 0;
5131 /* Allocate block of memory for the regs. */
5132 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
5133 Or can assign_stack_local accept a 0 SIZE argument? */
5134 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
5136 if (TARGET_SHMEDIA)
5137 regbuf = gen_rtx_MEM (BLKmode,
5138 gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
5139 else if (n_floatregs & 1)
5141 rtx addr;
5143 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
5144 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
5145 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
5146 regbuf = change_address (regbuf, BLKmode, addr);
5148 else
5149 regbuf = assign_stack_local (BLKmode, bufsize, 0);
5150 alias_set = get_varargs_alias_set ();
5151 set_mem_alias_set (regbuf, alias_set);
5153 /* Save int args.
5154 This is optimized to only save the regs that are necessary. Explicitly
5155 named args need not be saved. */
5156 if (n_intregs > 0)
5157 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
5158 adjust_address (regbuf, BLKmode,
5159 n_floatregs * UNITS_PER_WORD),
5160 n_intregs, n_intregs * UNITS_PER_WORD);
5162 if (TARGET_SHMEDIA)
5163 /* Return the address of the regbuf. */
5164 return XEXP (regbuf, 0);
5166 /* Save float args.
5167 This is optimized to only save the regs that are necessary. Explicitly
5168 named args need not be saved.
5169 We explicitly build a pointer to the buffer because it halves the insn
5170 count when not optimizing (otherwise the pointer is built for each reg
5171 saved).
5172 We emit the moves in reverse order so that we can use predecrement. */
5174 fpregs = gen_reg_rtx (Pmode);
5175 emit_move_insn (fpregs, XEXP (regbuf, 0));
5176 emit_insn (gen_addsi3 (fpregs, fpregs,
5177 GEN_INT (n_floatregs * UNITS_PER_WORD)));
5178 if (TARGET_SH4)
5180 rtx mem;
5181 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
5183 emit_insn (gen_addsi3 (fpregs, fpregs,
5184 GEN_INT (-2 * UNITS_PER_WORD)));
5185 mem = gen_rtx_MEM (DFmode, fpregs);
5186 set_mem_alias_set (mem, alias_set);
5187 emit_move_insn (mem,
5188 gen_rtx (REG, DFmode, BASE_ARG_REG (DFmode) + regno));
5190 regno = first_floatreg;
5191 if (regno & 1)
5193 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD)));
5194 mem = gen_rtx_MEM (SFmode, fpregs);
5195 set_mem_alias_set (mem, alias_set);
5196 emit_move_insn (mem,
5197 gen_rtx (REG, SFmode, BASE_ARG_REG (SFmode) + regno
5198 - (TARGET_LITTLE_ENDIAN != 0)));
5201 else
5202 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
5204 rtx mem;
5206 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD)));
5207 mem = gen_rtx_MEM (SFmode, fpregs);
5208 set_mem_alias_set (mem, alias_set);
5209 emit_move_insn (mem,
5210 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
5213 /* Return the address of the regbuf. */
5214 return XEXP (regbuf, 0);
5217 /* Define the `__builtin_va_list' type for the ABI. */
5219 tree
5220 sh_build_va_list ()
5222 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
5223 tree record;
5225 if (TARGET_SH5 || (! TARGET_SH3E && ! TARGET_SH4) || TARGET_HITACHI)
5226 return ptr_type_node;
5228 record = make_node (RECORD_TYPE);
5230 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
5231 ptr_type_node);
5232 f_next_o_limit = build_decl (FIELD_DECL,
5233 get_identifier ("__va_next_o_limit"),
5234 ptr_type_node);
5235 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
5236 ptr_type_node);
5237 f_next_fp_limit = build_decl (FIELD_DECL,
5238 get_identifier ("__va_next_fp_limit"),
5239 ptr_type_node);
5240 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
5241 ptr_type_node);
5243 DECL_FIELD_CONTEXT (f_next_o) = record;
5244 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
5245 DECL_FIELD_CONTEXT (f_next_fp) = record;
5246 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
5247 DECL_FIELD_CONTEXT (f_next_stack) = record;
5249 TYPE_FIELDS (record) = f_next_o;
5250 TREE_CHAIN (f_next_o) = f_next_o_limit;
5251 TREE_CHAIN (f_next_o_limit) = f_next_fp;
5252 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
5253 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
5255 layout_type (record);
5257 return record;
5260 /* Implement `va_start' for varargs and stdarg. */
5262 void
5263 sh_va_start (stdarg_p, valist, nextarg)
5264 int stdarg_p;
5265 tree valist;
5266 rtx nextarg;
5268 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
5269 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
5270 tree t, u;
5271 int nfp, nint;
5273 if (TARGET_SH5)
5275 expand_builtin_saveregs ();
5276 /* When the varargs dummy argument is ``passed'' on a register,
5277 we don't want std_expand_builtin_va_start() to apply any
5278 correction for it, so set stdarg_p so as to pretend there's
5279 no such dummy argument. */
5280 if (current_function_args_info.arg_count[(int) SH_ARG_INT]
5281 < NPARM_REGS (SImode))
5282 stdarg_p = 1;
5283 std_expand_builtin_va_start (stdarg_p, valist, nextarg);
5284 return;
5287 if ((! TARGET_SH3E && ! TARGET_SH4) || TARGET_HITACHI)
5289 std_expand_builtin_va_start (stdarg_p, valist, nextarg);
5290 return;
5293 f_next_o = TYPE_FIELDS (va_list_type_node);
5294 f_next_o_limit = TREE_CHAIN (f_next_o);
5295 f_next_fp = TREE_CHAIN (f_next_o_limit);
5296 f_next_fp_limit = TREE_CHAIN (f_next_fp);
5297 f_next_stack = TREE_CHAIN (f_next_fp_limit);
5299 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o);
5300 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
5301 valist, f_next_o_limit);
5302 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp);
5303 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
5304 valist, f_next_fp_limit);
5305 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
5306 valist, f_next_stack);
5308 /* Call __builtin_saveregs. */
5309 u = make_tree (ptr_type_node, expand_builtin_saveregs ());
5310 t = build (MODIFY_EXPR, ptr_type_node, next_fp, u);
5311 TREE_SIDE_EFFECTS (t) = 1;
5312 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5314 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
5315 if (nfp < 8)
5316 nfp = 8 - nfp;
5317 else
5318 nfp = 0;
5319 u = fold (build (PLUS_EXPR, ptr_type_node, u,
5320 build_int_2 (UNITS_PER_WORD * nfp, 0)));
5321 t = build (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
5322 TREE_SIDE_EFFECTS (t) = 1;
5323 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5325 t = build (MODIFY_EXPR, ptr_type_node, next_o, u);
5326 TREE_SIDE_EFFECTS (t) = 1;
5327 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5329 nint = current_function_args_info.arg_count[SH_ARG_INT];
5330 if (nint < 4)
5331 nint = 4 - nint;
5332 else
5333 nint = 0;
5334 u = fold (build (PLUS_EXPR, ptr_type_node, u,
5335 build_int_2 (UNITS_PER_WORD * nint, 0)));
5336 t = build (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
5337 TREE_SIDE_EFFECTS (t) = 1;
5338 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5340 u = make_tree (ptr_type_node, nextarg);
5341 if (! stdarg_p && (nint == 0 || nfp == 0))
5343 u = fold (build (PLUS_EXPR, ptr_type_node, u,
5344 build_int_2 (-UNITS_PER_WORD, -1)));
5346 t = build (MODIFY_EXPR, ptr_type_node, next_stack, u);
5347 TREE_SIDE_EFFECTS (t) = 1;
5348 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5351 /* Implement `va_arg'. */
5354 sh_va_arg (valist, type)
5355 tree valist, type;
5357 HOST_WIDE_INT size, rsize;
5358 tree tmp, pptr_type_node;
5359 rtx addr_rtx, r;
5360 rtx result;
5361 int pass_by_ref = MUST_PASS_IN_STACK (TYPE_MODE (type), type);
5363 size = int_size_in_bytes (type);
5364 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5365 pptr_type_node = build_pointer_type (ptr_type_node);
5367 if (pass_by_ref)
5368 type = build_pointer_type (type);
5370 if (! TARGET_SH5 && (TARGET_SH3E || TARGET_SH4) && ! TARGET_HITACHI)
5372 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
5373 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
5374 int pass_as_float;
5375 rtx lab_false, lab_over;
5377 f_next_o = TYPE_FIELDS (va_list_type_node);
5378 f_next_o_limit = TREE_CHAIN (f_next_o);
5379 f_next_fp = TREE_CHAIN (f_next_o_limit);
5380 f_next_fp_limit = TREE_CHAIN (f_next_fp);
5381 f_next_stack = TREE_CHAIN (f_next_fp_limit);
5383 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o);
5384 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
5385 valist, f_next_o_limit);
5386 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp),
5387 valist, f_next_fp);
5388 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
5389 valist, f_next_fp_limit);
5390 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
5391 valist, f_next_stack);
5393 if (TARGET_SH4)
5395 pass_as_float = ((TREE_CODE (type) == REAL_TYPE && size <= 8)
5396 || (TREE_CODE (type) == COMPLEX_TYPE
5397 && TREE_CODE (TREE_TYPE (type)) == REAL_TYPE
5398 && size <= 16));
5400 else
5402 pass_as_float = (TREE_CODE (type) == REAL_TYPE && size == 4);
5405 addr_rtx = gen_reg_rtx (Pmode);
5406 lab_false = gen_label_rtx ();
5407 lab_over = gen_label_rtx ();
5409 if (pass_as_float)
5411 int first_floatreg
5412 = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
5413 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
5415 emit_cmp_and_jump_insns (expand_expr (next_fp, NULL_RTX, Pmode,
5416 EXPAND_NORMAL),
5417 expand_expr (next_fp_limit, NULL_RTX,
5418 Pmode, EXPAND_NORMAL),
5419 GE, const1_rtx, Pmode, 1, lab_false);
5421 if (TYPE_ALIGN (type) > BITS_PER_WORD
5422 || (((TREE_CODE (type) == REAL_TYPE && size == 8) || size == 16)
5423 && (n_floatregs & 1)))
5425 tmp = build (BIT_AND_EXPR, ptr_type_node, next_fp,
5426 build_int_2 (UNITS_PER_WORD, 0));
5427 tmp = build (PLUS_EXPR, ptr_type_node, next_fp, tmp);
5428 tmp = build (MODIFY_EXPR, ptr_type_node, next_fp, tmp);
5429 TREE_SIDE_EFFECTS (tmp) = 1;
5430 expand_expr (tmp, const0_rtx, VOIDmode, EXPAND_NORMAL);
5433 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
5434 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
5435 if (r != addr_rtx)
5436 emit_move_insn (addr_rtx, r);
5438 emit_jump_insn (gen_jump (lab_over));
5439 emit_barrier ();
5440 emit_label (lab_false);
5442 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
5443 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
5444 if (r != addr_rtx)
5445 emit_move_insn (addr_rtx, r);
5447 else
5449 tmp = build (PLUS_EXPR, ptr_type_node, next_o,
5450 build_int_2 (rsize, 0));
5452 emit_cmp_and_jump_insns (expand_expr (tmp, NULL_RTX, Pmode,
5453 EXPAND_NORMAL),
5454 expand_expr (next_o_limit, NULL_RTX,
5455 Pmode, EXPAND_NORMAL),
5456 GT, const1_rtx, Pmode, 1, lab_false);
5458 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
5459 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
5460 if (r != addr_rtx)
5461 emit_move_insn (addr_rtx, r);
5463 emit_jump_insn (gen_jump (lab_over));
5464 emit_barrier ();
5465 emit_label (lab_false);
5467 if (size > 4 && ! TARGET_SH4)
5469 tmp = build (MODIFY_EXPR, ptr_type_node, next_o, next_o_limit);
5470 TREE_SIDE_EFFECTS (tmp) = 1;
5471 expand_expr (tmp, const0_rtx, VOIDmode, EXPAND_NORMAL);
5474 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
5475 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
5476 if (r != addr_rtx)
5477 emit_move_insn (addr_rtx, r);
5480 emit_label (lab_over);
5482 tmp = make_tree (pptr_type_node, addr_rtx);
5483 valist = build1 (INDIRECT_REF, ptr_type_node, tmp);
5486 /* ??? In va-sh.h, there had been code to make values larger than
5487 size 8 indirect. This does not match the FUNCTION_ARG macros. */
5489 result = std_expand_builtin_va_arg (valist, type);
5490 if (pass_by_ref)
5492 #ifdef POINTERS_EXTEND_UNSIGNED
5493 if (GET_MODE (addr) != Pmode)
5494 addr = convert_memory_address (Pmode, result);
5495 #endif
5496 result = gen_rtx_MEM (ptr_mode, force_reg (Pmode, result));
5497 set_mem_alias_set (result, get_varargs_alias_set ());
5499 /* ??? expand_builtin_va_arg will also set the alias set of the dereferenced
5500 argument to the varargs alias set. */
5501 return result;
5504 /* Define the offset between two registers, one to be eliminated, and
5505 the other its replacement, at the start of a routine. */
5508 initial_elimination_offset (from, to)
5509 int from;
5510 int to;
5512 int regs_saved;
5513 int regs_saved_rounding = 0;
5514 int total_saved_regs_space;
5515 int total_auto_space;
5516 int save_flags = target_flags;
5517 int copy_flags;
5519 HOST_WIDE_INT live_regs_mask[(FIRST_PSEUDO_REGISTER + 31) / 32];
5520 calc_live_regs (&regs_saved, live_regs_mask);
5521 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
5522 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
5523 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5524 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
5526 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
5527 copy_flags = target_flags;
5528 target_flags = save_flags;
5530 total_saved_regs_space = regs_saved + regs_saved_rounding;
5532 if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
5533 return total_saved_regs_space + total_auto_space
5534 + current_function_args_info.byref_regs * 8;
5536 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
5537 return total_saved_regs_space + total_auto_space
5538 + current_function_args_info.byref_regs * 8;
5540 /* Initial gap between fp and sp is 0. */
5541 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
5542 return 0;
5544 if (from == RETURN_ADDRESS_POINTER_REGNUM
5545 && (to == FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM))
5546 if (TARGET_SH5)
5548 int i, n = total_saved_regs_space;
5549 int align;
5550 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
5552 n += total_auto_space;
5554 /* If it wasn't saved, there's not much we can do. */
5555 if ((live_regs_mask[pr_reg / 32] & (1 << (pr_reg % 32))) == 0)
5556 return n;
5558 target_flags = copy_flags;
5560 /* We loop twice: first, check 8-byte aligned registers,
5561 that are stored in the higher addresses, that are known
5562 to be aligned. Then, check 32-bit registers that don't
5563 need 8-byte alignment. */
5564 for (align = 1; align >= 0; align--)
5565 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
5566 if (live_regs_mask[i/32] & (1 << (i % 32)))
5568 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5570 if (mode == SFmode && (i % 2) == 1
5571 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5572 && (live_regs_mask[(i ^ 1) / 32]
5573 & (1 << ((i ^ 1) % 32))))
5575 mode = DFmode;
5576 i--;
5579 /* If we're doing the aligned pass and this is not aligned,
5580 or we're doing the unaligned pass and this is aligned,
5581 skip it. */
5582 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT)
5583 == 0) != align)
5584 continue;
5586 n -= GET_MODE_SIZE (mode);
5588 if (i == pr_reg)
5590 target_flags = save_flags;
5591 return n;
5595 abort ();
5597 else
5598 return total_auto_space;
5600 abort ();
5603 /* Handle machine specific pragmas to be semi-compatible with Hitachi
5604 compiler. */
5606 void
5607 sh_pr_interrupt (pfile)
5608 cpp_reader *pfile ATTRIBUTE_UNUSED;
5610 pragma_interrupt = 1;
5613 void
5614 sh_pr_trapa (pfile)
5615 cpp_reader *pfile ATTRIBUTE_UNUSED;
5617 pragma_interrupt = pragma_trapa = 1;
5620 void
5621 sh_pr_nosave_low_regs (pfile)
5622 cpp_reader *pfile ATTRIBUTE_UNUSED;
5624 pragma_nosave_low_regs = 1;
5627 /* Generate 'handle_interrupt' attribute for decls */
5629 static void
5630 sh_insert_attributes (node, attributes)
5631 tree node;
5632 tree * attributes;
5634 if (! pragma_interrupt
5635 || TREE_CODE (node) != FUNCTION_DECL)
5636 return;
5638 /* We are only interested in fields. */
5639 if (TREE_CODE_CLASS (TREE_CODE (node)) != 'd')
5640 return;
5642 /* Add a 'handle_interrupt' attribute. */
5643 * attributes = tree_cons (get_identifier ("interrupt_handler"), NULL, * attributes);
5645 return;
5648 /* Supported attributes:
5650 interrupt_handler -- specifies this function is an interrupt handler.
5652 sp_switch -- specifies an alternate stack for an interrupt handler
5653 to run on.
5655 trap_exit -- use a trapa to exit an interrupt function instead of
5656 an rte instruction. */
5658 const struct attribute_spec sh_attribute_table[] =
5660 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
5661 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
5662 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
5663 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
5664 { NULL, 0, 0, false, false, false, NULL }
5667 /* Handle an "interrupt_handler" attribute; arguments as in
5668 struct attribute_spec.handler. */
5669 static tree
5670 sh_handle_interrupt_handler_attribute (node, name, args, flags, no_add_attrs)
5671 tree *node;
5672 tree name;
5673 tree args ATTRIBUTE_UNUSED;
5674 int flags ATTRIBUTE_UNUSED;
5675 bool *no_add_attrs;
5677 if (TREE_CODE (*node) != FUNCTION_DECL)
5679 warning ("`%s' attribute only applies to functions",
5680 IDENTIFIER_POINTER (name));
5681 *no_add_attrs = true;
5684 return NULL_TREE;
5687 /* Handle an "sp_switch" attribute; arguments as in
5688 struct attribute_spec.handler. */
5689 static tree
5690 sh_handle_sp_switch_attribute (node, name, args, flags, no_add_attrs)
5691 tree *node;
5692 tree name;
5693 tree args;
5694 int flags ATTRIBUTE_UNUSED;
5695 bool *no_add_attrs;
5697 if (TREE_CODE (*node) != FUNCTION_DECL)
5699 warning ("`%s' attribute only applies to functions",
5700 IDENTIFIER_POINTER (name));
5701 *no_add_attrs = true;
5703 else if (!pragma_interrupt)
5705 /* The sp_switch attribute only has meaning for interrupt functions. */
5706 warning ("`%s' attribute only applies to interrupt functions",
5707 IDENTIFIER_POINTER (name));
5708 *no_add_attrs = true;
5710 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
5712 /* The argument must be a constant string. */
5713 warning ("`%s' attribute argument not a string constant",
5714 IDENTIFIER_POINTER (name));
5715 *no_add_attrs = true;
5717 else
5719 sp_switch = gen_rtx_SYMBOL_REF (VOIDmode,
5720 TREE_STRING_POINTER (TREE_VALUE (args)));
5723 return NULL_TREE;
5726 /* Handle an "trap_exit" attribute; arguments as in
5727 struct attribute_spec.handler. */
5728 static tree
5729 sh_handle_trap_exit_attribute (node, name, args, flags, no_add_attrs)
5730 tree *node;
5731 tree name;
5732 tree args;
5733 int flags ATTRIBUTE_UNUSED;
5734 bool *no_add_attrs;
5736 if (TREE_CODE (*node) != FUNCTION_DECL)
5738 warning ("`%s' attribute only applies to functions",
5739 IDENTIFIER_POINTER (name));
5740 *no_add_attrs = true;
5742 else if (!pragma_interrupt)
5744 /* The trap_exit attribute only has meaning for interrupt functions. */
5745 warning ("`%s' attribute only applies to interrupt functions",
5746 IDENTIFIER_POINTER (name));
5747 *no_add_attrs = true;
5749 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
5751 /* The argument must be a constant integer. */
5752 warning ("`%s' attribute argument not an integer constant",
5753 IDENTIFIER_POINTER (name));
5754 *no_add_attrs = true;
5756 else
5758 trap_exit = TREE_INT_CST_LOW (TREE_VALUE (args));
5761 return NULL_TREE;
5765 sh_cfun_interrupt_handler_p (void)
5767 return (lookup_attribute ("interrupt_handler",
5768 DECL_ATTRIBUTES (current_function_decl))
5769 != NULL_TREE);
5772 /* Predicates used by the templates. */
5774 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
5775 Used only in general_movsrc_operand. */
5778 system_reg_operand (op, mode)
5779 rtx op;
5780 enum machine_mode mode ATTRIBUTE_UNUSED;
5782 switch (REGNO (op))
5784 case PR_REG:
5785 case MACL_REG:
5786 case MACH_REG:
5787 return 1;
5789 return 0;
5792 /* Returns 1 if OP can be source of a simple move operation.
5793 Same as general_operand, but a LABEL_REF is valid, PRE_DEC is
5794 invalid as are subregs of system registers. */
5797 general_movsrc_operand (op, mode)
5798 rtx op;
5799 enum machine_mode mode;
5801 if (GET_CODE (op) == MEM)
5803 rtx inside = XEXP (op, 0);
5804 if (GET_CODE (inside) == CONST)
5805 inside = XEXP (inside, 0);
5807 if (GET_CODE (inside) == LABEL_REF)
5808 return 1;
5810 if (GET_CODE (inside) == PLUS
5811 && GET_CODE (XEXP (inside, 0)) == LABEL_REF
5812 && GET_CODE (XEXP (inside, 1)) == CONST_INT)
5813 return 1;
5815 /* Only post inc allowed. */
5816 if (GET_CODE (inside) == PRE_DEC)
5817 return 0;
5820 if ((mode == QImode || mode == HImode)
5821 && (GET_CODE (op) == SUBREG
5822 && GET_CODE (XEXP (op, 0)) == REG
5823 && system_reg_operand (XEXP (op, 0), mode)))
5824 return 0;
5826 return general_operand (op, mode);
5829 /* Returns 1 if OP can be a destination of a move.
5830 Same as general_operand, but no preinc allowed. */
5833 general_movdst_operand (op, mode)
5834 rtx op;
5835 enum machine_mode mode;
5837 /* Only pre dec allowed. */
5838 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == POST_INC)
5839 return 0;
5841 return general_operand (op, mode);
5844 /* Accept a register, but not a subreg of any kind. This allows us to
5845 avoid pathological cases in reload wrt data movement common in
5846 int->fp conversion. */
5849 reg_no_subreg_operand (op, mode)
5850 register rtx op;
5851 enum machine_mode mode;
5853 if (GET_CODE (op) == SUBREG)
5854 return 0;
5855 return register_operand (op, mode);
5858 /* Returns 1 if OP is a normal arithmetic register. */
5861 arith_reg_operand (op, mode)
5862 rtx op;
5863 enum machine_mode mode;
5865 if (register_operand (op, mode))
5867 int regno;
5869 if (GET_CODE (op) == REG)
5870 regno = REGNO (op);
5871 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
5872 regno = REGNO (SUBREG_REG (op));
5873 else
5874 return 1;
5876 return (regno != T_REG && regno != PR_REG
5877 && ! TARGET_REGISTER_P (regno)
5878 && (regno != FPUL_REG || TARGET_SH4)
5879 && regno != MACH_REG && regno != MACL_REG);
5881 return 0;
5884 /* Like above, but for DImode destinations: forbid paradoxical DImode subregs,
5885 because this would lead to missing sign extensions when truncating from
5886 DImode to SImode. */
5888 arith_reg_dest (op, mode)
5889 rtx op;
5890 enum machine_mode mode;
5892 if (mode == DImode && GET_CODE (op) == SUBREG
5893 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) < 8)
5894 return 0;
5895 return arith_reg_operand (op, mode);
5899 fp_arith_reg_operand (op, mode)
5900 rtx op;
5901 enum machine_mode mode;
5903 if (register_operand (op, mode))
5905 int regno;
5907 if (GET_CODE (op) == REG)
5908 regno = REGNO (op);
5909 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
5910 regno = REGNO (SUBREG_REG (op));
5911 else
5912 return 1;
5914 return (regno >= FIRST_PSEUDO_REGISTER
5915 || FP_REGISTER_P (regno));
5917 return 0;
5920 /* Returns 1 if OP is a valid source operand for an arithmetic insn. */
5923 arith_operand (op, mode)
5924 rtx op;
5925 enum machine_mode mode;
5927 if (arith_reg_operand (op, mode))
5928 return 1;
5930 if (TARGET_SHMEDIA)
5932 /* FIXME: We should be checking whether the CONST_INT fits in a
5933 CONST_OK_FOR_J here, but this causes reload_cse to crash when
5934 attempting to transform a sequence of two 64-bit sets of the
5935 same register from literal constants into a set and an add,
5936 when the difference is too wide for an add. */
5937 if (GET_CODE (op) == CONST_INT
5938 || EXTRA_CONSTRAINT_S (op))
5939 return 1;
5940 else
5941 return 0;
5943 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
5944 return 1;
5946 return 0;
5949 /* Returns 1 if OP is a valid source operand for a compare insn. */
5952 arith_reg_or_0_operand (op, mode)
5953 rtx op;
5954 enum machine_mode mode;
5956 if (arith_reg_operand (op, mode))
5957 return 1;
5959 if (EXTRA_CONSTRAINT_U (op))
5960 return 1;
5962 return 0;
5965 /* Return 1 if OP is a valid source operand for an SHmedia operation
5966 that takes either a register or a 6-bit immediate. */
5969 shmedia_6bit_operand (op, mode)
5970 rtx op;
5971 enum machine_mode mode;
5973 return (arith_reg_operand (op, mode)
5974 || (GET_CODE (op) == CONST_INT && CONST_OK_FOR_O (INTVAL (op))));
5977 /* Returns 1 if OP is a valid source operand for a logical operation. */
5980 logical_operand (op, mode)
5981 rtx op;
5982 enum machine_mode mode;
5984 if (arith_reg_operand (op, mode))
5985 return 1;
5987 if (TARGET_SHMEDIA)
5989 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_P (INTVAL (op)))
5990 return 1;
5991 else
5992 return 0;
5994 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
5995 return 1;
5997 return 0;
6001 and_operand (op, mode)
6002 rtx op;
6003 enum machine_mode mode;
6005 if (logical_operand (op, mode))
6006 return 1;
6008 /* Check mshflo.l / mshflhi.l opportunities. */
6009 if (TARGET_SHMEDIA
6010 && mode == DImode
6011 && GET_CODE (op) == CONST_INT
6012 && (INTVAL (op) == (unsigned) 0xffffffff
6013 || INTVAL (op) == (HOST_WIDE_INT) -1 << 32))
6014 return 1;
6016 return 0;
6019 /* Nonzero if OP is a floating point value with value 0.0. */
6022 fp_zero_operand (op)
6023 rtx op;
6025 REAL_VALUE_TYPE r;
6027 if (GET_MODE (op) != SFmode)
6028 return 0;
6030 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
6031 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
6034 /* Nonzero if OP is a floating point value with value 1.0. */
6037 fp_one_operand (op)
6038 rtx op;
6040 REAL_VALUE_TYPE r;
6042 if (GET_MODE (op) != SFmode)
6043 return 0;
6045 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
6046 return REAL_VALUES_EQUAL (r, dconst1);
6049 /* For -m4 and -m4-single-only, mode switching is used. If we are
6050 compiling without -mfmovd, movsf_ie isn't taken into account for
6051 mode switching. We could check in machine_dependent_reorg for
6052 cases where we know we are in single precision mode, but there is
6053 interface to find that out during reload, so we must avoid
6054 choosing an fldi alternative during reload and thus failing to
6055 allocate a scratch register for the constant loading. */
6057 fldi_ok ()
6059 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
6063 tertiary_reload_operand (op, mode)
6064 rtx op;
6065 enum machine_mode mode ATTRIBUTE_UNUSED;
6067 enum rtx_code code = GET_CODE (op);
6068 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
6072 fpscr_operand (op, mode)
6073 rtx op;
6074 enum machine_mode mode ATTRIBUTE_UNUSED;
6076 return (GET_CODE (op) == REG && REGNO (op) == FPSCR_REG
6077 && GET_MODE (op) == PSImode);
6081 fpul_operand (op, mode)
6082 rtx op;
6083 enum machine_mode mode;
6085 if (TARGET_SHMEDIA)
6086 return fp_arith_reg_operand (op, mode);
6088 return (GET_CODE (op) == REG
6089 && (REGNO (op) == FPUL_REG || REGNO (op) >= FIRST_PSEUDO_REGISTER)
6090 && GET_MODE (op) == mode);
6094 symbol_ref_operand (op, mode)
6095 rtx op;
6096 enum machine_mode mode ATTRIBUTE_UNUSED;
6098 return (GET_CODE (op) == SYMBOL_REF);
6102 commutative_float_operator (op, mode)
6103 rtx op;
6104 enum machine_mode mode;
6106 if (GET_MODE (op) != mode)
6107 return 0;
6108 switch (GET_CODE (op))
6110 case PLUS:
6111 case MULT:
6112 return 1;
6113 default:
6114 break;
6116 return 0;
6120 noncommutative_float_operator (op, mode)
6121 rtx op;
6122 enum machine_mode mode;
6124 if (GET_MODE (op) != mode)
6125 return 0;
6126 switch (GET_CODE (op))
6128 case MINUS:
6129 case DIV:
6130 return 1;
6131 default:
6132 break;
6134 return 0;
6138 binary_float_operator (op, mode)
6139 rtx op;
6140 enum machine_mode mode;
6142 if (GET_MODE (op) != mode)
6143 return 0;
6144 switch (GET_CODE (op))
6146 case PLUS:
6147 case MINUS:
6148 case MULT:
6149 case DIV:
6150 return 1;
6151 default:
6152 break;
6154 return 0;
6158 equality_comparison_operator (op, mode)
6159 rtx op;
6160 enum machine_mode mode;
6162 return ((mode == VOIDmode || GET_MODE (op) == mode)
6163 && (GET_CODE (op) == EQ || GET_CODE (op) == NE));
6166 int greater_comparison_operator (op, mode)
6167 rtx op;
6168 enum machine_mode mode;
6170 if (mode != VOIDmode && GET_MODE (op) == mode)
6171 return 0;
6172 switch (GET_CODE (op))
6174 case GT:
6175 case GE:
6176 case GTU:
6177 case GEU:
6178 return 1;
6179 default:
6180 return 0;
6184 int less_comparison_operator (op, mode)
6185 rtx op;
6186 enum machine_mode mode;
6188 if (mode != VOIDmode && GET_MODE (op) == mode)
6189 return 0;
6190 switch (GET_CODE (op))
6192 case LT:
6193 case LE:
6194 case LTU:
6195 case LEU:
6196 return 1;
6197 default:
6198 return 0;
6202 /* Accept pseudos and branch target registers. */
6204 target_reg_operand (op, mode)
6205 rtx op;
6206 enum machine_mode mode;
6208 if (mode != DImode
6209 || GET_MODE (op) != DImode)
6210 return 0;
6212 if (GET_CODE (op) == SUBREG)
6213 op = XEXP (op, 0);
6215 if (GET_CODE (op) != REG)
6216 return 0;
6218 /* We must protect ourselves from matching pseudos that are virtual
6219 register, because they will eventually be replaced with hardware
6220 registers that aren't branch-target registers. */
6221 if (REGNO (op) > LAST_VIRTUAL_REGISTER
6222 || TARGET_REGISTER_P (REGNO (op)))
6223 return 1;
6225 return 0;
6228 /* Same as target_reg_operand, except that label_refs and symbol_refs
6229 are accepted before reload. */
6231 target_operand (op, mode)
6232 rtx op;
6233 enum machine_mode mode;
6235 if (mode != DImode)
6236 return 0;
6238 if ((GET_MODE (op) == DImode || GET_MODE (op) == VOIDmode)
6239 && EXTRA_CONSTRAINT_T (op))
6240 return ! reload_completed;
6242 return target_reg_operand (op, mode);
6246 mextr_bit_offset (op, mode)
6247 rtx op;
6248 enum machine_mode mode ATTRIBUTE_UNUSED;
6250 HOST_WIDE_INT i;
6252 if (GET_CODE (op) != CONST_INT)
6253 return 0;
6254 i = INTVAL (op);
6255 return i >= 1*8 && i <= 7*8 && (i & 7) == 0;
6259 extend_reg_operand (op, mode)
6260 rtx op;
6261 enum machine_mode mode;
6263 return (GET_CODE (op) == TRUNCATE
6264 ? arith_operand
6265 : arith_reg_operand) (op, mode);
6269 extend_reg_or_0_operand (op, mode)
6270 rtx op;
6271 enum machine_mode mode;
6273 return (GET_CODE (op) == TRUNCATE
6274 ? arith_operand
6275 : arith_reg_or_0_operand) (op, mode);
6279 general_extend_operand (op, mode)
6280 rtx op;
6281 enum machine_mode mode;
6283 return (GET_CODE (op) == TRUNCATE
6284 ? arith_operand
6285 : nonimmediate_operand) (op, mode);
6289 inqhi_operand (op, mode)
6290 rtx op;
6291 enum machine_mode mode;
6293 if (GET_CODE (op) != TRUNCATE || mode != GET_MODE (op))
6294 return 0;
6295 op = XEXP (op, 0);
6296 /* Can't use true_regnum here because copy_cost wants to know about
6297 SECONDARY_INPUT_RELOAD_CLASS. */
6298 return GET_CODE (op) == REG && FP_REGISTER_P (REGNO (op));
6301 /* Return nonzero if V is a zero vector matching MODE. */
6303 zero_vec_operand (v, mode)
6304 rtx v;
6305 enum machine_mode mode;
6307 int i;
6309 if (GET_CODE (v) != CONST_VECTOR
6310 || (GET_MODE (v) != mode && mode != VOIDmode))
6311 return 0;
6312 for (i = XVECLEN (v, 0) - 1; i >= 0; i--)
6313 if (XVECEXP (v, 0, i) != const0_rtx)
6314 return 0;
6315 return 1;
6319 sh_rep_vec (v, mode)
6320 rtx v;
6321 enum machine_mode mode;
6323 int i;
6324 rtx x, y;
6326 if ((GET_CODE (v) != CONST_VECTOR && GET_CODE (v) != PARALLEL)
6327 || (GET_MODE (v) != mode && mode != VOIDmode))
6328 return 0;
6329 i = XVECLEN (v, 0) - 2;
6330 x = XVECEXP (v, 0, i + 1);
6331 if (GET_MODE_UNIT_SIZE (mode) == 1)
6333 y = XVECEXP (v, 0, i);
6334 for (i -= 2 ; i >= 0; i -= 2)
6335 if (! rtx_equal_p (XVECEXP (v, 0, i + 1), x)
6336 || ! rtx_equal_p (XVECEXP (v, 0, i), y))
6337 return 0;
6339 else
6340 for (; i >= 0; i--)
6341 if (XVECEXP (v, 0, i) != x)
6342 return 0;
6343 return 1;
6346 /* Determine if V is a constant vector matching MODE with only one element
6347 that is not a sign extension. Two byte-sized elements count as one. */
6349 sh_1el_vec (v, mode)
6350 rtx v;
6351 enum machine_mode mode;
6353 int unit_size;
6354 int i, last, least, sign_ix;
6355 rtx sign;
6357 if (GET_CODE (v) != CONST_VECTOR
6358 || (GET_MODE (v) != mode && mode != VOIDmode))
6359 return 0;
6360 /* Determine numbers of last and of least significat elements. */
6361 last = XVECLEN (v, 0) - 1;
6362 least = TARGET_LITTLE_ENDIAN ? 0 : last;
6363 if (GET_CODE (XVECEXP (v, 0, least)) != CONST_INT)
6364 return 0;
6365 sign_ix = least;
6366 if (GET_MODE_UNIT_SIZE (mode) == 1)
6367 sign_ix = TARGET_LITTLE_ENDIAN ? 1 : last - 1;
6368 if (GET_CODE (XVECEXP (v, 0, sign_ix)) != CONST_INT)
6369 return 0;
6370 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (v));
6371 sign = (INTVAL (XVECEXP (v, 0, sign_ix)) >> (unit_size * BITS_PER_UNIT - 1)
6372 ? constm1_rtx : const0_rtx);
6373 i = XVECLEN (v, 0) - 1;
6375 if (i != least && i != sign_ix && XVECEXP (v, 0, i) != sign)
6376 return 0;
6377 while (--i);
6378 return 1;
6382 sh_const_vec (v, mode)
6383 rtx v;
6384 enum machine_mode mode;
6386 int i;
6388 if (GET_CODE (v) != CONST_VECTOR
6389 || (GET_MODE (v) != mode && mode != VOIDmode))
6390 return 0;
6391 i = XVECLEN (v, 0) - 1;
6392 for (; i >= 0; i--)
6393 if (GET_CODE (XVECEXP (v, 0, i)) != CONST_INT)
6394 return 0;
6395 return 1;
6398 /* Return the destination address of a branch. */
6400 static int
6401 branch_dest (branch)
6402 rtx branch;
6404 rtx dest = SET_SRC (PATTERN (branch));
6405 int dest_uid;
6407 if (GET_CODE (dest) == IF_THEN_ELSE)
6408 dest = XEXP (dest, 1);
6409 dest = XEXP (dest, 0);
6410 dest_uid = INSN_UID (dest);
6411 return INSN_ADDRESSES (dest_uid);
6414 /* Return non-zero if REG is not used after INSN.
6415 We assume REG is a reload reg, and therefore does
6416 not live past labels. It may live past calls or jumps though. */
6418 reg_unused_after (reg, insn)
6419 rtx reg;
6420 rtx insn;
6422 enum rtx_code code;
6423 rtx set;
6425 /* If the reg is set by this instruction, then it is safe for our
6426 case. Disregard the case where this is a store to memory, since
6427 we are checking a register used in the store address. */
6428 set = single_set (insn);
6429 if (set && GET_CODE (SET_DEST (set)) != MEM
6430 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
6431 return 1;
6433 while ((insn = NEXT_INSN (insn)))
6435 code = GET_CODE (insn);
6437 #if 0
6438 /* If this is a label that existed before reload, then the register
6439 if dead here. However, if this is a label added by reorg, then
6440 the register may still be live here. We can't tell the difference,
6441 so we just ignore labels completely. */
6442 if (code == CODE_LABEL)
6443 return 1;
6444 /* else */
6445 #endif
6447 if (code == JUMP_INSN)
6448 return 0;
6450 /* If this is a sequence, we must handle them all at once.
6451 We could have for instance a call that sets the target register,
6452 and an insn in a delay slot that uses the register. In this case,
6453 we must return 0. */
6454 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
6456 int i;
6457 int retval = 0;
6459 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
6461 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
6462 rtx set = single_set (this_insn);
6464 if (GET_CODE (this_insn) == CALL_INSN)
6465 code = CALL_INSN;
6466 else if (GET_CODE (this_insn) == JUMP_INSN)
6468 if (INSN_ANNULLED_BRANCH_P (this_insn))
6469 return 0;
6470 code = JUMP_INSN;
6473 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
6474 return 0;
6475 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
6477 if (GET_CODE (SET_DEST (set)) != MEM)
6478 retval = 1;
6479 else
6480 return 0;
6482 if (set == 0
6483 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
6484 return 0;
6486 if (retval == 1)
6487 return 1;
6488 else if (code == JUMP_INSN)
6489 return 0;
6491 else if (GET_RTX_CLASS (code) == 'i')
6493 rtx set = single_set (insn);
6495 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
6496 return 0;
6497 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
6498 return GET_CODE (SET_DEST (set)) != MEM;
6499 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
6500 return 0;
6503 if (code == CALL_INSN && call_used_regs[REGNO (reg)])
6504 return 1;
6506 return 1;
6509 #include "ggc.h"
6511 static GTY(()) rtx fpscr_rtx;
6513 get_fpscr_rtx ()
6515 if (! fpscr_rtx)
6517 fpscr_rtx = gen_rtx (REG, PSImode, FPSCR_REG);
6518 REG_USERVAR_P (fpscr_rtx) = 1;
6519 mark_user_reg (fpscr_rtx);
6521 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
6522 mark_user_reg (fpscr_rtx);
6523 return fpscr_rtx;
6526 void
6527 emit_sf_insn (pat)
6528 rtx pat;
6530 emit_insn (pat);
6533 void
6534 emit_df_insn (pat)
6535 rtx pat;
6537 emit_insn (pat);
6540 void
6541 expand_sf_unop (fun, operands)
6542 rtx (*fun) PARAMS ((rtx, rtx, rtx));
6543 rtx *operands;
6545 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
6548 void
6549 expand_sf_binop (fun, operands)
6550 rtx (*fun) PARAMS ((rtx, rtx, rtx, rtx));
6551 rtx *operands;
6553 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
6554 get_fpscr_rtx ()));
6557 void
6558 expand_df_unop (fun, operands)
6559 rtx (*fun) PARAMS ((rtx, rtx, rtx));
6560 rtx *operands;
6562 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
6565 void
6566 expand_df_binop (fun, operands)
6567 rtx (*fun) PARAMS ((rtx, rtx, rtx, rtx));
6568 rtx *operands;
6570 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
6571 get_fpscr_rtx ()));
6574 /* ??? gcc does flow analysis strictly after common subexpression
6575 elimination. As a result, common subespression elimination fails
6576 when there are some intervening statements setting the same register.
6577 If we did nothing about this, this would hurt the precision switching
6578 for SH4 badly. There is some cse after reload, but it is unable to
6579 undo the extra register pressure from the unused instructions, and
6580 it cannot remove auto-increment loads.
6582 A C code example that shows this flow/cse weakness for (at least) SH
6583 and sparc (as of gcc ss-970706) is this:
6585 double
6586 f(double a)
6588 double d;
6589 d = 0.1;
6590 a += d;
6591 d = 1.1;
6592 d = 0.1;
6593 a *= d;
6594 return a;
6597 So we add another pass before common subexpression elimination, to
6598 remove assignments that are dead due to a following assignment in the
6599 same basic block. */
6601 static void
6602 mark_use (x, reg_set_block)
6603 rtx x, *reg_set_block;
6605 enum rtx_code code;
6607 if (! x)
6608 return;
6609 code = GET_CODE (x);
6610 switch (code)
6612 case REG:
6614 int regno = REGNO (x);
6615 int nregs = (regno < FIRST_PSEUDO_REGISTER
6616 ? HARD_REGNO_NREGS (regno, GET_MODE (x))
6617 : 1);
6620 reg_set_block[regno + nregs - 1] = 0;
6622 while (--nregs);
6623 break;
6625 case SET:
6627 rtx dest = SET_DEST (x);
6629 if (GET_CODE (dest) == SUBREG)
6630 dest = SUBREG_REG (dest);
6631 if (GET_CODE (dest) != REG)
6632 mark_use (dest, reg_set_block);
6633 mark_use (SET_SRC (x), reg_set_block);
6634 break;
6636 case CLOBBER:
6637 break;
6638 default:
6640 const char *fmt = GET_RTX_FORMAT (code);
6641 int i, j;
6642 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
6644 if (fmt[i] == 'e')
6645 mark_use (XEXP (x, i), reg_set_block);
6646 else if (fmt[i] == 'E')
6647 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
6648 mark_use (XVECEXP (x, i, j), reg_set_block);
6650 break;
6655 static rtx get_free_reg PARAMS ((HARD_REG_SET));
6657 /* This function returns a register to use to load the address to load
6658 the fpscr from. Currently it always returns r1 or r7, but when we are
6659 able to use pseudo registers after combine, or have a better mechanism
6660 for choosing a register, it should be done here. */
6661 /* REGS_LIVE is the liveness information for the point for which we
6662 need this allocation. In some bare-bones exit blocks, r1 is live at the
6663 start. We can even have all of r0..r3 being live:
6664 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
6665 INSN before which new insns are placed with will clobber the register
6666 we return. If a basic block consists only of setting the return value
6667 register to a pseudo and using that register, the return value is not
6668 live before or after this block, yet we we'll insert our insns right in
6669 the middle. */
6671 static rtx
6672 get_free_reg (regs_live)
6673 HARD_REG_SET regs_live;
6675 if (! TEST_HARD_REG_BIT (regs_live, 1))
6676 return gen_rtx_REG (Pmode, 1);
6678 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
6679 there shouldn't be anything but a jump before the function end. */
6680 if (! TEST_HARD_REG_BIT (regs_live, 7))
6681 return gen_rtx_REG (Pmode, 7);
6683 abort ();
6686 /* This function will set the fpscr from memory.
6687 MODE is the mode we are setting it to. */
6688 void
6689 fpscr_set_from_mem (mode, regs_live)
6690 int mode;
6691 HARD_REG_SET regs_live;
6693 enum attr_fp_mode fp_mode = mode;
6694 rtx addr_reg = get_free_reg (regs_live);
6696 if (fp_mode == (enum attr_fp_mode) NORMAL_MODE (FP_MODE))
6697 emit_insn (gen_fpu_switch1 (addr_reg));
6698 else
6699 emit_insn (gen_fpu_switch0 (addr_reg));
6702 /* Is the given character a logical line separator for the assembler? */
6703 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
6704 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
6705 #endif
6708 sh_insn_length_adjustment (insn)
6709 rtx insn;
6711 /* Instructions with unfilled delay slots take up an extra two bytes for
6712 the nop in the delay slot. */
6713 if (((GET_CODE (insn) == INSN
6714 && GET_CODE (PATTERN (insn)) != USE
6715 && GET_CODE (PATTERN (insn)) != CLOBBER)
6716 || GET_CODE (insn) == CALL_INSN
6717 || (GET_CODE (insn) == JUMP_INSN
6718 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
6719 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
6720 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
6721 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
6722 return 2;
6724 /* sh-dsp parallel processing insn take four bytes instead of two. */
6726 if (GET_CODE (insn) == INSN)
6728 int sum = 0;
6729 rtx body = PATTERN (insn);
6730 const char *template;
6731 char c;
6732 int maybe_label = 1;
6734 if (GET_CODE (body) == ASM_INPUT)
6735 template = XSTR (body, 0);
6736 else if (asm_noperands (body) >= 0)
6737 template
6738 = decode_asm_operands (body, NULL, NULL, NULL, NULL);
6739 else
6740 return 0;
6743 int ppi_adjust = 0;
6746 c = *template++;
6747 while (c == ' ' || c == '\t');
6748 /* all sh-dsp parallel-processing insns start with p.
6749 The only non-ppi sh insn starting with p is pref.
6750 The only ppi starting with pr is prnd. */
6751 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
6752 ppi_adjust = 2;
6753 /* The repeat pseudo-insn expands two three insns, a total of
6754 six bytes in size. */
6755 else if ((c == 'r' || c == 'R')
6756 && ! strncasecmp ("epeat", template, 5))
6757 ppi_adjust = 4;
6758 while (c && c != '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c))
6760 /* If this is a label, it is obviously not a ppi insn. */
6761 if (c == ':' && maybe_label)
6763 ppi_adjust = 0;
6764 break;
6766 else if (c == '\'' || c == '"')
6767 maybe_label = 0;
6768 c = *template++;
6770 sum += ppi_adjust;
6771 maybe_label = c != ':';
6773 while (c);
6774 return sum;
6776 return 0;
6779 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
6780 isn't protected by a PIC unspec. */
6782 nonpic_symbol_mentioned_p (x)
6783 rtx x;
6785 register const char *fmt;
6786 register int i;
6788 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
6789 || GET_CODE (x) == PC)
6790 return 1;
6792 /* We don't want to look into the possible MEM location of a
6793 CONST_DOUBLE, since we're not going to use it, in general. */
6794 if (GET_CODE (x) == CONST_DOUBLE)
6795 return 0;
6797 if (GET_CODE (x) == UNSPEC
6798 && (XINT (x, 1) == UNSPEC_PIC
6799 || XINT (x, 1) == UNSPEC_GOT
6800 || XINT (x, 1) == UNSPEC_GOTOFF
6801 || XINT (x, 1) == UNSPEC_GOTPLT
6802 || XINT (x, 1) == UNSPEC_PLT))
6803 return 0;
6805 fmt = GET_RTX_FORMAT (GET_CODE (x));
6806 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6808 if (fmt[i] == 'E')
6810 register int j;
6812 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
6813 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
6814 return 1;
6816 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
6817 return 1;
6820 return 0;
6823 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
6824 @GOTOFF in `reg'. */
6826 legitimize_pic_address (orig, mode, reg)
6827 rtx orig;
6828 enum machine_mode mode ATTRIBUTE_UNUSED;
6829 rtx reg;
6831 if (GET_CODE (orig) == LABEL_REF
6832 || (GET_CODE (orig) == SYMBOL_REF
6833 && (CONSTANT_POOL_ADDRESS_P (orig)
6834 /* SYMBOL_REF_FLAG is set on static symbols. */
6835 || SYMBOL_REF_FLAG (orig))))
6837 if (reg == 0)
6838 reg = gen_reg_rtx (Pmode);
6840 emit_insn (gen_symGOTOFF2reg (reg, orig));
6841 return reg;
6843 else if (GET_CODE (orig) == SYMBOL_REF)
6845 if (reg == 0)
6846 reg = gen_reg_rtx (Pmode);
6848 emit_insn (gen_symGOT2reg (reg, orig));
6849 return reg;
6851 return orig;
6854 /* Mark the use of a constant in the literal table. If the constant
6855 has multiple labels, make it unique. */
6856 static rtx
6857 mark_constant_pool_use (x)
6858 rtx x;
6860 rtx insn, lab, pattern;
6862 if (x == NULL)
6863 return x;
6865 switch (GET_CODE (x))
6867 case LABEL_REF:
6868 x = XEXP (x, 0);
6869 case CODE_LABEL:
6870 break;
6871 default:
6872 return x;
6875 /* Get the first label in the list of labels for the same constant
6876 and delete another labels in the list. */
6877 lab = x;
6878 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
6880 if (GET_CODE (insn) != CODE_LABEL
6881 || LABEL_REFS (insn) != NEXT_INSN (insn))
6882 break;
6883 lab = insn;
6886 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
6887 INSN_DELETED_P (insn) = 1;
6889 /* Mark constants in a window. */
6890 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
6892 if (GET_CODE (insn) != INSN)
6893 continue;
6895 pattern = PATTERN (insn);
6896 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
6897 continue;
6899 switch (XINT (pattern, 1))
6901 case UNSPECV_CONST2:
6902 case UNSPECV_CONST4:
6903 case UNSPECV_CONST8:
6904 XVECEXP (pattern, 0, 1) = const1_rtx;
6905 break;
6906 case UNSPECV_WINDOW_END:
6907 if (XVECEXP (pattern, 0, 0) == x)
6908 return lab;
6909 break;
6910 case UNSPECV_CONST_END:
6911 return lab;
6912 default:
6913 break;
6917 return lab;
6920 /* Return true if it's possible to redirect BRANCH1 to the destination
6921 of an unconditional jump BRANCH2. We only want to do this if the
6922 resulting branch will have a short displacement. */
6923 int
6924 sh_can_redirect_branch (branch1, branch2)
6925 rtx branch1;
6926 rtx branch2;
6928 if (flag_expensive_optimizations && simplejump_p (branch2))
6930 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
6931 rtx insn;
6932 int distance;
6934 for (distance = 0, insn = NEXT_INSN (branch1);
6935 insn && distance < 256;
6936 insn = PREV_INSN (insn))
6938 if (insn == dest)
6939 return 1;
6940 else
6941 distance += get_attr_length (insn);
6943 for (distance = 0, insn = NEXT_INSN (branch1);
6944 insn && distance < 256;
6945 insn = NEXT_INSN (insn))
6947 if (insn == dest)
6948 return 1;
6949 else
6950 distance += get_attr_length (insn);
6953 return 0;
6956 /* Return non-zero if register old_reg can be renamed to register new_reg. */
6958 sh_hard_regno_rename_ok (old_reg, new_reg)
6959 unsigned int old_reg ATTRIBUTE_UNUSED;
6960 unsigned int new_reg;
6963 /* Interrupt functions can only use registers that have already been
6964 saved by the prologue, even if they would normally be
6965 call-clobbered. */
6967 if (sh_cfun_interrupt_handler_p () && !regs_ever_live[new_reg])
6968 return 0;
6970 return 1;
6973 /* A C statement (sans semicolon) to update the integer variable COST
6974 based on the relationship between INSN that is dependent on
6975 DEP_INSN through the dependence LINK. The default is to make no
6976 adjustment to COST. This can be used for example to specify to
6977 the scheduler that an output- or anti-dependence does not incur
6978 the same cost as a data-dependence. */
6979 static int
6980 sh_adjust_cost (insn, link, dep_insn, cost)
6981 rtx insn;
6982 rtx link ATTRIBUTE_UNUSED;
6983 rtx dep_insn;
6984 int cost;
6986 rtx reg;
6988 if (GET_CODE(insn) == CALL_INSN)
6990 /* The only input for a call that is timing-critical is the
6991 function's address. */
6992 rtx call = PATTERN (insn);
6994 if (GET_CODE (call) == PARALLEL)
6995 call = XVECEXP (call, 0 ,0);
6996 if (GET_CODE (call) == SET)
6997 call = SET_SRC (call);
6998 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
6999 && ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn))
7000 cost = 0;
7002 /* All sfunc calls are parallels with at least four components.
7003 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
7004 else if (GET_CODE (PATTERN (insn)) == PARALLEL
7005 && XVECLEN (PATTERN (insn), 0) >= 4
7006 && (reg = sfunc_uses_reg (insn)))
7008 /* Likewise, the most timing critical input for an sfuncs call
7009 is the function address. However, sfuncs typically start
7010 using their arguments pretty quickly.
7011 Assume a four cycle delay before they are needed. */
7012 if (! reg_set_p (reg, dep_insn))
7013 cost -= TARGET_SUPERSCALAR ? 40 : 4;
7015 /* Adjust load_si / pcload_si type insns latency. Use the known
7016 nominal latency and form of the insn to speed up the check. */
7017 else if (cost == 3
7018 && GET_CODE (PATTERN (dep_insn)) == SET
7019 /* Latency for dmpy type insns is also 3, so check the that
7020 it's actually a move insn. */
7021 && general_movsrc_operand (SET_SRC (PATTERN (dep_insn)), SImode))
7022 cost = 2;
7023 else if (cost == 30
7024 && GET_CODE (PATTERN (dep_insn)) == SET
7025 && GET_MODE (SET_SRC (PATTERN (dep_insn))) == SImode)
7026 cost = 20;
7028 return cost;
7031 /* For use by ALLOCATE_INITIAL_VALUE. Note that sh.md contains some
7032 'special function' patterns (type sfunc) that clobber pr, but that
7033 do not look like function calls to leaf_function_p. Hence we must
7034 do this extra check. */
7036 sh_pr_n_sets ()
7038 return REG_N_SETS (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
7041 /* This Function Returns non zero if DFA based scheduler
7042 interface is to be used.At present supported only for
7043 SH4. */
7044 static int
7045 sh_use_dfa_interface()
7047 if (TARGET_SH4)
7048 return 1;
7049 else
7050 return 0;
7053 /* This function returns "2" that signifies dual issue
7054 for SH4 processor.To be used by DFA pipeline description. */
7055 static int
7056 sh_issue_rate()
7058 if(TARGET_SH4)
7059 return 2;
7060 else
7061 return 1;
7064 /* SHmedia requires registers for branches, so we can't generate new
7065 branches past reload. */
7066 static bool
7067 sh_cannot_modify_jumps_p ()
7069 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
7072 static bool
7073 sh_ms_bitfield_layout_p (record_type)
7074 tree record_type ATTRIBUTE_UNUSED;
7076 return TARGET_SH5;
7079 /* If using PIC, mark a SYMBOL_REF for a non-global symbol so that we
7080 may access it using GOTOFF instead of GOT. */
7082 static void
7083 sh_encode_section_info (decl, first)
7084 tree decl;
7085 int first;
7087 rtx rtl, symbol;
7089 if (DECL_P (decl))
7090 rtl = DECL_RTL (decl);
7091 else
7092 rtl = TREE_CST_RTL (decl);
7093 if (GET_CODE (rtl) != MEM)
7094 return;
7095 symbol = XEXP (rtl, 0);
7096 if (GET_CODE (symbol) != SYMBOL_REF)
7097 return;
7099 if (flag_pic)
7100 SYMBOL_REF_FLAG (symbol) = (*targetm.binds_local_p) (decl);
7102 if (TARGET_SH5 && first && TREE_CODE (decl) != FUNCTION_DECL)
7103 XEXP (rtl, 0) = gen_datalabel_ref (symbol);
7106 /* Undo the effects of the above. */
7108 static const char *
7109 sh_strip_name_encoding (str)
7110 const char *str;
7112 STRIP_DATALABEL_ENCODING (str, str);
7113 str += *str == '*';
7114 return str;
7118 /* Machine specific built-in functions. */
7120 struct builtin_description
7122 const enum insn_code icode;
7123 const char *const name;
7124 int signature;
7127 /* describe number and signedness of arguments; arg[0] == result
7128 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
7129 static const char signature_args[][4] =
7131 #define SH_BLTIN_V2SI2 0
7132 { 4, 4 },
7133 #define SH_BLTIN_V4HI2 1
7134 { 4, 4 },
7135 #define SH_BLTIN_V2SI3 2
7136 { 4, 4, 4 },
7137 #define SH_BLTIN_V4HI3 3
7138 { 4, 4, 4 },
7139 #define SH_BLTIN_V8QI3 4
7140 { 4, 4, 4 },
7141 #define SH_BLTIN_MAC_HISI 5
7142 { 1, 4, 4, 1 },
7143 #define SH_BLTIN_SH_HI 6
7144 { 4, 4, 1 },
7145 #define SH_BLTIN_SH_SI 7
7146 { 4, 4, 1 },
7147 #define SH_BLTIN_V4HI2V2SI 8
7148 { 4, 4, 4 },
7149 #define SH_BLTIN_V4HI2V8QI 9
7150 { 4, 4, 4 },
7151 #define SH_BLTIN_SISF 10
7152 { 4, 2 },
7153 #define SH_BLTIN_LDUA_L 11
7154 { 2, 8 },
7155 #define SH_BLTIN_LDUA_Q 12
7156 { 1, 8 },
7157 #define SH_BLTIN_STUA_L 13
7158 { 0, 8, 2 },
7159 #define SH_BLTIN_STUA_Q 14
7160 { 0, 8, 1 },
7161 #define SH_BLTIN_NUM_SHARED_SIGNATURES 15
7162 #define SH_BLTIN_2 15
7163 #define SH_BLTIN_SU 15
7164 { 1, 2 },
7165 #define SH_BLTIN_3 16
7166 #define SH_BLTIN_SUS 16
7167 { 2, 2, 1 },
7168 #define SH_BLTIN_PSSV 17
7169 { 0, 8, 2, 2 },
7170 #define SH_BLTIN_XXUU 18
7171 #define SH_BLTIN_UUUU 18
7172 { 1, 1, 1, 1 },
7173 #define SH_BLTIN_PV 19
7174 { 0, 8 },
7176 /* mcmv: operands considered unsigned. */
7177 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
7178 /* mperm: control value considered unsigned int. */
7179 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
7180 /* mshards_q: returns signed short. */
7181 /* nsb: takes long long arg, returns unsigned char. */
7182 static const struct builtin_description bdesc[] =
7184 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
7185 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
7186 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
7187 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
7188 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
7189 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
7190 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
7191 #if 0
7192 { CODE_FOR_alloco32, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
7193 { CODE_FOR_alloco64, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
7194 #endif
7195 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
7196 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
7197 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
7198 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
7199 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
7200 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
7201 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
7202 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
7203 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
7204 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
7205 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3 },
7206 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3 },
7207 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3 },
7208 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3 },
7209 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3 },
7210 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3 },
7211 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3 },
7212 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
7213 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
7214 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
7215 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
7216 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
7217 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
7218 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
7219 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
7220 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
7221 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
7222 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
7223 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
7224 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
7225 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
7226 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
7227 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
7228 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
7229 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
7230 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
7231 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
7232 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
7233 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
7234 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
7235 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
7236 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
7237 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
7238 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
7239 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
7240 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
7241 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
7242 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
7243 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
7244 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
7245 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
7246 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
7247 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
7248 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
7249 #if 0
7250 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
7251 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
7252 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
7253 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
7254 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
7255 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
7256 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
7257 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
7258 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
7259 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
7260 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
7261 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
7262 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
7263 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
7264 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
7265 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
7266 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
7267 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
7268 { CODE_FOR_prefetch32,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
7269 { CODE_FOR_prefetch64,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV }
7270 #endif
7273 static void
7274 sh_media_init_builtins ()
7276 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
7277 const struct builtin_description *d;
7279 memset (shared, 0, sizeof shared);
7280 for (d = bdesc; d - bdesc < sizeof bdesc / sizeof bdesc[0]; d++)
7282 tree type, arg_type;
7283 int signature = d->signature;
7284 int i;
7286 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
7287 type = shared[signature];
7288 else
7290 int has_result = signature_args[signature][0] != 0;
7292 if (signature_args[signature][1] == 8
7293 && (insn_data[d->icode].operand[has_result].mode != Pmode))
7294 continue;
7295 if (! TARGET_FPU_ANY
7296 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
7297 continue;
7298 type = void_list_node;
7299 for (i = 3; ; i--)
7301 int arg = signature_args[signature][i];
7302 int opno = i - 1 + has_result;
7304 if (arg == 8)
7305 arg_type = ptr_type_node;
7306 else if (arg)
7307 arg_type = ((*lang_hooks.types.type_for_mode)
7308 (insn_data[d->icode].operand[opno].mode,
7309 (arg & 1)));
7310 else if (i)
7311 continue;
7312 else
7313 arg_type = void_type_node;
7314 if (i == 0)
7315 break;
7316 type = tree_cons (NULL_TREE, arg_type, type);
7318 type = build_function_type (arg_type, type);
7319 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
7320 shared[signature] = type;
7322 builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
7323 NULL, NULL_TREE);
7327 static void
7328 sh_init_builtins ()
7330 if (TARGET_SHMEDIA)
7331 sh_media_init_builtins ();
7334 /* Expand an expression EXP that calls a built-in function,
7335 with result going to TARGET if that's convenient
7336 (and in mode MODE if that's convenient).
7337 SUBTARGET may be used as the target for computing one of EXP's operands.
7338 IGNORE is nonzero if the value is to be ignored. */
7340 static rtx
7341 sh_expand_builtin (exp, target, subtarget, mode, ignore)
7342 tree exp;
7343 rtx target;
7344 rtx subtarget ATTRIBUTE_UNUSED;
7345 enum machine_mode mode ATTRIBUTE_UNUSED;
7346 int ignore;
7348 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
7349 tree arglist = TREE_OPERAND (exp, 1);
7350 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
7351 const struct builtin_description *d = &bdesc[fcode];
7352 enum insn_code icode = d->icode;
7353 int signature = d->signature;
7354 enum machine_mode tmode = VOIDmode;
7355 int nop = 0, i;
7356 rtx op[4];
7357 rtx pat;
7359 if (signature_args[signature][0])
7361 if (ignore)
7362 return 0;
7364 tmode = insn_data[icode].operand[0].mode;
7365 if (! target
7366 || GET_MODE (target) != tmode
7367 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
7368 target = gen_reg_rtx (tmode);
7369 op[nop++] = target;
7371 else
7372 target = 0;
7374 for (i = 1; i <= 3; i++, nop++)
7376 tree arg;
7377 enum machine_mode opmode, argmode;
7379 if (! signature_args[signature][i])
7380 break;
7381 arg = TREE_VALUE (arglist);
7382 arglist = TREE_CHAIN (arglist);
7383 opmode = insn_data[icode].operand[nop].mode;
7384 argmode = TYPE_MODE (TREE_TYPE (arg));
7385 if (argmode != opmode)
7386 arg = build1 (NOP_EXPR,
7387 (*lang_hooks.types.type_for_mode) (opmode, 0), arg);
7388 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
7389 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
7390 op[nop] = copy_to_mode_reg (opmode, op[nop]);
7393 switch (nop)
7395 case 1:
7396 pat = (*insn_data[d->icode].genfun) (op[0]);
7397 break;
7398 case 2:
7399 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
7400 break;
7401 case 3:
7402 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
7403 break;
7404 case 4:
7405 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
7406 break;
7408 if (! pat)
7409 return 0;
7410 emit_insn (pat);
7411 return target;
7413 #include "gt-sh.h"