Add x prefix to v850e case for handling --with-cpu=v850e.
[official-gcc.git] / gcc / config / sh / sh.c
blob3521c4711b1857fa770b76b490c9320edd7060bb
1 /* Output routines for GCC for Hitachi / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1997, 1997, 1998, 1999, 2000, 2001, 2002
3 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GNU CC.
9 GNU CC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
12 any later version.
14 GNU CC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GNU CC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
24 #include "config.h"
25 #include "system.h"
26 #include "insn-config.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "flags.h"
30 #include "expr.h"
31 #include "optabs.h"
32 #include "function.h"
33 #include "regs.h"
34 #include "hard-reg-set.h"
35 #include "output.h"
36 #include "insn-attr.h"
37 #include "toplev.h"
38 #include "recog.h"
39 #include "c-pragma.h"
40 #include "integrate.h"
41 #include "tm_p.h"
42 #include "target.h"
43 #include "target-def.h"
44 #include "real.h"
45 #include "langhooks.h"
47 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
49 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
50 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
52 /* These are some macros to abstract register modes. */
53 #define CONST_OK_FOR_ADD(size) \
54 (TARGET_SHMEDIA ? CONST_OK_FOR_P (size) : CONST_OK_FOR_I (size))
55 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
56 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
57 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
59 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
60 int current_function_interrupt;
62 /* ??? The pragma interrupt support will not work for SH3. */
63 /* This is set by #pragma interrupt and #pragma trapa, and causes gcc to
64 output code for the next function appropriate for an interrupt handler. */
65 int pragma_interrupt;
67 /* This is set by the trap_exit attribute for functions. It specifies
68 a trap number to be used in a trapa instruction at function exit
69 (instead of an rte instruction). */
70 int trap_exit;
72 /* This is used by the sp_switch attribute for functions. It specifies
73 a variable holding the address of the stack the interrupt function
74 should switch to/from at entry/exit. */
75 rtx sp_switch;
77 /* This is set by #pragma trapa, and is similar to the above, except that
78 the compiler doesn't emit code to preserve all registers. */
79 static int pragma_trapa;
81 /* This is set by #pragma nosave_low_regs. This is useful on the SH3,
82 which has a separate set of low regs for User and Supervisor modes.
83 This should only be used for the lowest level of interrupts. Higher levels
84 of interrupts must save the registers in case they themselves are
85 interrupted. */
86 int pragma_nosave_low_regs;
88 /* This is used for communication between SETUP_INCOMING_VARARGS and
89 sh_expand_prologue. */
90 int current_function_anonymous_args;
92 /* Global variables for machine-dependent things. */
94 /* Which cpu are we scheduling for. */
95 enum processor_type sh_cpu;
97 /* Saved operands from the last compare to use when we generate an scc
98 or bcc insn. */
100 rtx sh_compare_op0;
101 rtx sh_compare_op1;
103 /* Provides the class number of the smallest class containing
104 reg number. */
106 int regno_reg_class[FIRST_PSEUDO_REGISTER] =
108 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
109 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
110 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
111 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
112 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
113 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
114 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
115 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
116 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
117 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
118 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
119 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
125 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
126 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
127 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
128 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
129 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
130 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
131 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
132 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
133 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
134 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
135 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
141 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
142 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
143 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
144 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
145 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
146 GENERAL_REGS,
149 char sh_register_names[FIRST_PSEUDO_REGISTER] \
150 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
152 char sh_additional_register_names[ADDREGNAMES_SIZE] \
153 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
154 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
156 /* Provide reg_class from a letter such as appears in the machine
157 description. */
159 const enum reg_class reg_class_from_letter[] =
161 /* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
162 /* e */ NO_REGS, /* f */ FP_REGS, /* g */ NO_REGS, /* h */ NO_REGS,
163 /* i */ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
164 /* m */ NO_REGS, /* n */ NO_REGS, /* o */ NO_REGS, /* p */ NO_REGS,
165 /* q */ NO_REGS, /* r */ NO_REGS, /* s */ NO_REGS, /* t */ T_REGS,
166 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
167 /* y */ FPUL_REGS, /* z */ R0_REGS
170 int assembler_dialect;
172 static void split_branches PARAMS ((rtx));
173 static int branch_dest PARAMS ((rtx));
174 static void force_into PARAMS ((rtx, rtx));
175 static void print_slot PARAMS ((rtx));
176 static rtx add_constant PARAMS ((rtx, enum machine_mode, rtx));
177 static void dump_table PARAMS ((rtx));
178 static int hi_const PARAMS ((rtx));
179 static int broken_move PARAMS ((rtx));
180 static int mova_p PARAMS ((rtx));
181 static rtx find_barrier PARAMS ((int, rtx, rtx));
182 static int noncall_uses_reg PARAMS ((rtx, rtx, rtx *));
183 static rtx gen_block_redirect PARAMS ((rtx, int, int));
184 static void output_stack_adjust PARAMS ((int, rtx, int, rtx (*) (rtx)));
185 static rtx frame_insn PARAMS ((rtx));
186 static rtx push PARAMS ((int));
187 static void pop PARAMS ((int));
188 static void push_regs PARAMS ((HOST_WIDE_INT *));
189 static void calc_live_regs PARAMS ((int *, HOST_WIDE_INT *));
190 static void mark_use PARAMS ((rtx, rtx *));
191 static HOST_WIDE_INT rounded_frame_size PARAMS ((int));
192 static rtx mark_constant_pool_use PARAMS ((rtx));
193 const struct attribute_spec sh_attribute_table[];
194 static tree sh_handle_interrupt_handler_attribute PARAMS ((tree *, tree, tree, int, bool *));
195 static tree sh_handle_sp_switch_attribute PARAMS ((tree *, tree, tree, int, bool *));
196 static tree sh_handle_trap_exit_attribute PARAMS ((tree *, tree, tree, int, bool *));
197 static void sh_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
198 static void sh_insert_attributes PARAMS ((tree, tree *));
199 static int sh_adjust_cost PARAMS ((rtx, rtx, rtx, int));
200 static int sh_use_dfa_interface PARAMS ((void));
201 static int sh_issue_rate PARAMS ((void));
203 static bool sh_cannot_modify_jumps_p PARAMS ((void));
204 static bool sh_ms_bitfield_layout_p PARAMS ((tree));
206 static void sh_encode_section_info PARAMS ((tree, int));
207 static const char *sh_strip_name_encoding PARAMS ((const char *));
208 static void sh_init_builtins PARAMS ((void));
209 static void sh_media_init_builtins PARAMS ((void));
210 static rtx sh_expand_builtin PARAMS ((tree, rtx, rtx, enum machine_mode, int));
211 static int flow_dependent_p PARAMS ((rtx, rtx));
212 static void flow_dependent_p_1 PARAMS ((rtx, rtx, void *));
215 /* Initialize the GCC target structure. */
216 #undef TARGET_ATTRIBUTE_TABLE
217 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
219 /* The next two are used for debug info when compiling with -gdwarf. */
220 #undef TARGET_ASM_UNALIGNED_HI_OP
221 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
222 #undef TARGET_ASM_UNALIGNED_SI_OP
223 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
225 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
226 #undef TARGET_ASM_UNALIGNED_DI_OP
227 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
228 #undef TARGET_ASM_ALIGNED_DI_OP
229 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
231 #undef TARGET_ASM_FUNCTION_EPILOGUE
232 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
234 #undef TARGET_INSERT_ATTRIBUTES
235 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
237 #undef TARGET_SCHED_ADJUST_COST
238 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
240 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
241 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
242 sh_use_dfa_interface
243 #undef TARGET_SCHED_ISSUE_RATE
244 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
246 #undef TARGET_CANNOT_MODIFY_JUMPS_P
247 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
249 #undef TARGET_MS_BITFIELD_LAYOUT_P
250 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
252 #undef TARGET_ENCODE_SECTION_INFO
253 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
254 #undef TARGET_STRIP_NAME_ENCODING
255 #define TARGET_STRIP_NAME_ENCODING sh_strip_name_encoding
257 #undef TARGET_INIT_BUILTINS
258 #define TARGET_INIT_BUILTINS sh_init_builtins
259 #undef TARGET_EXPAND_BUILTIN
260 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
262 struct gcc_target targetm = TARGET_INITIALIZER;
264 /* Print the operand address in x to the stream. */
266 void
267 print_operand_address (stream, x)
268 FILE *stream;
269 rtx x;
271 switch (GET_CODE (x))
273 case REG:
274 case SUBREG:
275 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
276 break;
278 case PLUS:
280 rtx base = XEXP (x, 0);
281 rtx index = XEXP (x, 1);
283 switch (GET_CODE (index))
285 case CONST_INT:
286 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
287 reg_names[true_regnum (base)]);
288 break;
290 case REG:
291 case SUBREG:
293 int base_num = true_regnum (base);
294 int index_num = true_regnum (index);
296 fprintf (stream, "@(r0,%s)",
297 reg_names[MAX (base_num, index_num)]);
298 break;
301 default:
302 debug_rtx (x);
303 abort ();
306 break;
308 case PRE_DEC:
309 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
310 break;
312 case POST_INC:
313 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
314 break;
316 default:
317 x = mark_constant_pool_use (x);
318 output_addr_const (stream, x);
319 break;
323 /* Print operand x (an rtx) in assembler syntax to file stream
324 according to modifier code.
326 '.' print a .s if insn needs delay slot
327 ',' print LOCAL_LABEL_PREFIX
328 '@' print trap, rte or rts depending upon pragma interruptness
329 '#' output a nop if there is nothing to put in the delay slot
330 ''' print likelyhood suffix (/u for unlikely).
331 'O' print a constant without the #
332 'R' print the LSW of a dp value - changes if in little endian
333 'S' print the MSW of a dp value - changes if in little endian
334 'T' print the next word of a dp value - same as 'R' in big endian mode.
335 'M' print an `x' if `m' will print `base,index'.
336 'N' print 'r63' if the operand is (const_int 0).
337 'm' print a pair `base,offset' or `base,index', for LD and ST.
338 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
339 'o' output an operator. */
341 void
342 print_operand (stream, x, code)
343 FILE *stream;
344 rtx x;
345 int code;
347 switch (code)
349 case '.':
350 if (final_sequence
351 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
352 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
353 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
354 break;
355 case ',':
356 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
357 break;
358 case '@':
359 if (trap_exit)
360 fprintf (stream, "trapa #%d", trap_exit);
361 else if (sh_cfun_interrupt_handler_p ())
362 fprintf (stream, "rte");
363 else
364 fprintf (stream, "rts");
365 break;
366 case '#':
367 /* Output a nop if there's nothing in the delay slot. */
368 if (dbr_sequence_length () == 0)
369 fprintf (stream, "\n\tnop");
370 break;
371 case '\'':
373 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
375 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
376 fputs ("/u", stream);
377 break;
379 case 'O':
380 x = mark_constant_pool_use (x);
381 output_addr_const (stream, x);
382 break;
383 case 'R':
384 fputs (reg_names[REGNO (x) + LSW], (stream));
385 break;
386 case 'S':
387 fputs (reg_names[REGNO (x) + MSW], (stream));
388 break;
389 case 'T':
390 /* Next word of a double. */
391 switch (GET_CODE (x))
393 case REG:
394 fputs (reg_names[REGNO (x) + 1], (stream));
395 break;
396 case MEM:
397 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
398 && GET_CODE (XEXP (x, 0)) != POST_INC)
399 x = adjust_address (x, SImode, 4);
400 print_operand_address (stream, XEXP (x, 0));
401 break;
402 default:
403 break;
405 break;
406 case 'o':
407 switch (GET_CODE (x))
409 case PLUS: fputs ("add", stream); break;
410 case MINUS: fputs ("sub", stream); break;
411 case MULT: fputs ("mul", stream); break;
412 case DIV: fputs ("div", stream); break;
413 case EQ: fputs ("eq", stream); break;
414 case NE: fputs ("ne", stream); break;
415 case GT: case LT: fputs ("gt", stream); break;
416 case GE: case LE: fputs ("ge", stream); break;
417 case GTU: case LTU: fputs ("gtu", stream); break;
418 case GEU: case LEU: fputs ("geu", stream); break;
419 default:
420 break;
422 break;
423 case 'M':
424 if (GET_CODE (x) == MEM
425 && GET_CODE (XEXP (x, 0)) == PLUS
426 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
427 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
428 fputc ('x', stream);
429 break;
431 case 'm':
432 if (GET_CODE (x) != MEM)
433 abort ();
434 x = XEXP (x, 0);
435 switch (GET_CODE (x))
437 case REG:
438 case SUBREG:
439 print_operand (stream, x, 0);
440 fputs (", 0", stream);
441 break;
443 case PLUS:
444 print_operand (stream, XEXP (x, 0), 0);
445 fputs (", ", stream);
446 print_operand (stream, XEXP (x, 1), 0);
447 break;
449 default:
450 abort ();
452 break;
454 case 'N':
455 if (x == CONST0_RTX (GET_MODE (x)))
457 fprintf ((stream), "r63");
458 break;
460 goto default_output;
461 case 'u':
462 if (GET_CODE (x) == CONST_INT)
464 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
465 break;
467 /* Fall through. */
469 default_output:
470 default:
471 switch (GET_CODE (x))
473 /* FIXME: We need this on SHmedia32 because reload generates
474 some sign-extended HI or QI loads into DImode registers
475 but, because Pmode is SImode, the address ends up with a
476 subreg:SI of the DImode register. Maybe reload should be
477 fixed so as to apply alter_subreg to such loads? */
478 case SUBREG:
479 if (SUBREG_BYTE (x) != 0
480 || GET_CODE (SUBREG_REG (x)) != REG)
481 abort ();
483 x = SUBREG_REG (x);
484 /* Fall through. */
486 case REG:
487 if (FP_REGISTER_P (REGNO (x))
488 && GET_MODE (x) == V16SFmode)
489 fprintf ((stream), "mtrx%s", reg_names[REGNO (x)] + 2);
490 else if (FP_REGISTER_P (REGNO (x))
491 && GET_MODE (x) == V4SFmode)
492 fprintf ((stream), "fv%s", reg_names[REGNO (x)] + 2);
493 else if (GET_CODE (x) == REG
494 && GET_MODE (x) == V2SFmode)
495 fprintf ((stream), "fp%s", reg_names[REGNO (x)] + 2);
496 else if (FP_REGISTER_P (REGNO (x))
497 && GET_MODE_SIZE (GET_MODE (x)) > 4)
498 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
499 else
500 fputs (reg_names[REGNO (x)], (stream));
501 break;
503 case MEM:
504 output_address (XEXP (x, 0));
505 break;
507 case CONST:
508 if (TARGET_SHMEDIA
509 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
510 && GET_MODE (XEXP (x, 0)) == DImode
511 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
512 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
514 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
516 fputc ('(', stream);
517 if (GET_CODE (val) == ASHIFTRT)
519 fputc ('(', stream);
520 if (GET_CODE (XEXP (val, 0)) == CONST)
521 fputc ('(', stream);
522 output_addr_const (stream, XEXP (val, 0));
523 if (GET_CODE (XEXP (val, 0)) == CONST)
524 fputc (')', stream);
525 fputs (" >> ", stream);
526 output_addr_const (stream, XEXP (val, 1));
527 fputc (')', stream);
529 else
531 if (GET_CODE (val) == CONST)
532 fputc ('(', stream);
533 output_addr_const (stream, val);
534 if (GET_CODE (val) == CONST)
535 fputc (')', stream);
537 fputs (" & 65535)", stream);
538 break;
541 /* Fall through. */
542 default:
543 if (TARGET_SH1)
544 fputc ('#', stream);
545 output_addr_const (stream, x);
546 break;
548 break;
552 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
553 static void
554 force_into (value, target)
555 rtx value, target;
557 value = force_operand (value, target);
558 if (! rtx_equal_p (value, target))
559 emit_insn (gen_move_insn (target, value));
562 /* Emit code to perform a block move. Choose the best method.
564 OPERANDS[0] is the destination.
565 OPERANDS[1] is the source.
566 OPERANDS[2] is the size.
567 OPERANDS[3] is the alignment safe to use. */
570 expand_block_move (operands)
571 rtx *operands;
573 int align = INTVAL (operands[3]);
574 int constp = (GET_CODE (operands[2]) == CONST_INT);
575 int bytes = (constp ? INTVAL (operands[2]) : 0);
577 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
578 alignment, or if it isn't a multiple of 4 bytes, then fail. */
579 if (! constp || align < 4 || (bytes % 4 != 0))
580 return 0;
582 if (TARGET_HARD_SH4)
584 if (bytes < 12)
585 return 0;
586 else if (bytes == 12)
588 tree entry_name;
589 rtx sym;
590 rtx func_addr_rtx;
591 rtx r4 = gen_rtx (REG, SImode, 4);
592 rtx r5 = gen_rtx (REG, SImode, 5);
594 entry_name = get_identifier ("__movstrSI12_i4");
596 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (entry_name));
597 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
598 force_into (XEXP (operands[0], 0), r4);
599 force_into (XEXP (operands[1], 0), r5);
600 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
601 return 1;
603 else if (! TARGET_SMALLCODE)
605 tree entry_name;
606 rtx sym;
607 rtx func_addr_rtx;
608 int dwords;
609 rtx r4 = gen_rtx (REG, SImode, 4);
610 rtx r5 = gen_rtx (REG, SImode, 5);
611 rtx r6 = gen_rtx (REG, SImode, 6);
613 entry_name = get_identifier (bytes & 4
614 ? "__movstr_i4_odd"
615 : "__movstr_i4_even");
616 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (entry_name));
617 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
618 force_into (XEXP (operands[0], 0), r4);
619 force_into (XEXP (operands[1], 0), r5);
621 dwords = bytes >> 3;
622 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
623 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
624 return 1;
626 else
627 return 0;
629 if (bytes < 64)
631 char entry[30];
632 tree entry_name;
633 rtx sym;
634 rtx func_addr_rtx;
635 rtx r4 = gen_rtx_REG (SImode, 4);
636 rtx r5 = gen_rtx_REG (SImode, 5);
638 sprintf (entry, "__movstrSI%d", bytes);
639 entry_name = get_identifier (entry);
640 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (entry_name));
641 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
642 force_into (XEXP (operands[0], 0), r4);
643 force_into (XEXP (operands[1], 0), r5);
644 emit_insn (gen_block_move_real (func_addr_rtx));
645 return 1;
648 /* This is the same number of bytes as a memcpy call, but to a different
649 less common function name, so this will occasionally use more space. */
650 if (! TARGET_SMALLCODE)
652 tree entry_name;
653 rtx sym;
654 rtx func_addr_rtx;
655 int final_switch, while_loop;
656 rtx r4 = gen_rtx_REG (SImode, 4);
657 rtx r5 = gen_rtx_REG (SImode, 5);
658 rtx r6 = gen_rtx_REG (SImode, 6);
660 entry_name = get_identifier ("__movstr");
661 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (entry_name));
662 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
663 force_into (XEXP (operands[0], 0), r4);
664 force_into (XEXP (operands[1], 0), r5);
666 /* r6 controls the size of the move. 16 is decremented from it
667 for each 64 bytes moved. Then the negative bit left over is used
668 as an index into a list of move instructions. e.g., a 72 byte move
669 would be set up with size(r6) = 14, for one iteration through the
670 big while loop, and a switch of -2 for the last part. */
672 final_switch = 16 - ((bytes / 4) % 16);
673 while_loop = ((bytes / 4) / 16 - 1) * 16;
674 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
675 emit_insn (gen_block_lump_real (func_addr_rtx));
676 return 1;
679 return 0;
682 /* Prepare operands for a move define_expand; specifically, one of the
683 operands must be in a register. */
686 prepare_move_operands (operands, mode)
687 rtx operands[];
688 enum machine_mode mode;
690 if ((mode == SImode || mode == DImode) && flag_pic)
692 rtx temp;
693 if (SYMBOLIC_CONST_P (operands[1]))
695 if (GET_CODE (operands[0]) == MEM)
696 operands[1] = force_reg (Pmode, operands[1]);
697 else if (TARGET_SHMEDIA
698 && GET_CODE (operands[1]) == LABEL_REF
699 && target_reg_operand (operands[0], mode))
700 /* It's ok. */;
701 else
703 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
704 operands[1] = legitimize_pic_address (operands[1], mode, temp);
707 else if (GET_CODE (operands[1]) == CONST
708 && GET_CODE (XEXP (operands[1], 0)) == PLUS
709 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
711 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
712 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
713 mode, temp);
714 operands[1] = expand_binop (mode, add_optab, temp,
715 XEXP (XEXP (operands[1], 0), 1),
716 no_new_pseudos ? temp
717 : gen_reg_rtx (Pmode),
718 0, OPTAB_LIB_WIDEN);
722 if (! reload_in_progress && ! reload_completed)
724 /* Copy the source to a register if both operands aren't registers. */
725 if (! register_operand (operands[0], mode)
726 && ! register_operand (operands[1], mode))
727 operands[1] = copy_to_mode_reg (mode, operands[1]);
729 /* This case can happen while generating code to move the result
730 of a library call to the target. Reject `st r0,@(rX,rY)' because
731 reload will fail to find a spill register for rX, since r0 is already
732 being used for the source. */
733 else if (GET_CODE (operands[1]) == REG && REGNO (operands[1]) == 0
734 && GET_CODE (operands[0]) == MEM
735 && GET_CODE (XEXP (operands[0], 0)) == PLUS
736 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
737 operands[1] = copy_to_mode_reg (mode, operands[1]);
740 return 0;
743 /* Prepare the operands for an scc instruction; make sure that the
744 compare has been done. */
746 prepare_scc_operands (code)
747 enum rtx_code code;
749 rtx t_reg = gen_rtx_REG (SImode, T_REG);
750 enum rtx_code oldcode = code;
751 enum machine_mode mode;
753 /* First need a compare insn. */
754 switch (code)
756 case NE:
757 /* It isn't possible to handle this case. */
758 abort ();
759 case LT:
760 code = GT;
761 break;
762 case LE:
763 code = GE;
764 break;
765 case LTU:
766 code = GTU;
767 break;
768 case LEU:
769 code = GEU;
770 break;
771 default:
772 break;
774 if (code != oldcode)
776 rtx tmp = sh_compare_op0;
777 sh_compare_op0 = sh_compare_op1;
778 sh_compare_op1 = tmp;
781 mode = GET_MODE (sh_compare_op0);
782 if (mode == VOIDmode)
783 mode = GET_MODE (sh_compare_op1);
785 sh_compare_op0 = force_reg (mode, sh_compare_op0);
786 if ((code != EQ && code != NE
787 && (sh_compare_op1 != const0_rtx
788 || code == GTU || code == GEU || code == LTU || code == LEU))
789 || (mode == DImode && sh_compare_op1 != const0_rtx)
790 || (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT))
791 sh_compare_op1 = force_reg (mode, sh_compare_op1);
793 if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
794 (mode == SFmode ? emit_sf_insn : emit_df_insn)
795 (gen_rtx (PARALLEL, VOIDmode, gen_rtvec (2,
796 gen_rtx (SET, VOIDmode, t_reg,
797 gen_rtx (code, SImode,
798 sh_compare_op0, sh_compare_op1)),
799 gen_rtx (USE, VOIDmode, get_fpscr_rtx ()))));
800 else
801 emit_insn (gen_rtx (SET, VOIDmode, t_reg,
802 gen_rtx (code, SImode, sh_compare_op0,
803 sh_compare_op1)));
805 return t_reg;
808 /* Called from the md file, set up the operands of a compare instruction. */
810 void
811 from_compare (operands, code)
812 rtx *operands;
813 int code;
815 enum machine_mode mode = GET_MODE (sh_compare_op0);
816 rtx insn;
817 if (mode == VOIDmode)
818 mode = GET_MODE (sh_compare_op1);
819 if (code != EQ
820 || mode == DImode
821 || (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT))
823 /* Force args into regs, since we can't use constants here. */
824 sh_compare_op0 = force_reg (mode, sh_compare_op0);
825 if (sh_compare_op1 != const0_rtx
826 || code == GTU || code == GEU
827 || (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT))
828 sh_compare_op1 = force_reg (mode, sh_compare_op1);
830 if (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
832 from_compare (operands, GT);
833 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
835 else
836 insn = gen_rtx_SET (VOIDmode,
837 gen_rtx_REG (SImode, T_REG),
838 gen_rtx (code, SImode, sh_compare_op0,
839 sh_compare_op1));
840 if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
842 insn = gen_rtx (PARALLEL, VOIDmode,
843 gen_rtvec (2, insn,
844 gen_rtx (USE, VOIDmode, get_fpscr_rtx ())));
845 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
847 else
848 emit_insn (insn);
851 /* Functions to output assembly code. */
853 /* Return a sequence of instructions to perform DI or DF move.
855 Since the SH cannot move a DI or DF in one instruction, we have
856 to take care when we see overlapping source and dest registers. */
858 const char *
859 output_movedouble (insn, operands, mode)
860 rtx insn ATTRIBUTE_UNUSED;
861 rtx operands[];
862 enum machine_mode mode;
864 rtx dst = operands[0];
865 rtx src = operands[1];
867 if (GET_CODE (dst) == MEM
868 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
869 return "mov.l %T1,%0\n\tmov.l %1,%0";
871 if (register_operand (dst, mode)
872 && register_operand (src, mode))
874 if (REGNO (src) == MACH_REG)
875 return "sts mach,%S0\n\tsts macl,%R0";
877 /* When mov.d r1,r2 do r2->r3 then r1->r2;
878 when mov.d r1,r0 do r1->r0 then r2->r1. */
880 if (REGNO (src) + 1 == REGNO (dst))
881 return "mov %T1,%T0\n\tmov %1,%0";
882 else
883 return "mov %1,%0\n\tmov %T1,%T0";
885 else if (GET_CODE (src) == CONST_INT)
887 if (INTVAL (src) < 0)
888 output_asm_insn ("mov #-1,%S0", operands);
889 else
890 output_asm_insn ("mov #0,%S0", operands);
892 return "mov %1,%R0";
894 else if (GET_CODE (src) == MEM)
896 int ptrreg = -1;
897 int dreg = REGNO (dst);
898 rtx inside = XEXP (src, 0);
900 if (GET_CODE (inside) == REG)
901 ptrreg = REGNO (inside);
902 else if (GET_CODE (inside) == SUBREG)
903 ptrreg = subreg_regno (inside);
904 else if (GET_CODE (inside) == PLUS)
906 ptrreg = REGNO (XEXP (inside, 0));
907 /* ??? A r0+REG address shouldn't be possible here, because it isn't
908 an offsettable address. Unfortunately, offsettable addresses use
909 QImode to check the offset, and a QImode offsettable address
910 requires r0 for the other operand, which is not currently
911 supported, so we can't use the 'o' constraint.
912 Thus we must check for and handle r0+REG addresses here.
913 We punt for now, since this is likely very rare. */
914 if (GET_CODE (XEXP (inside, 1)) == REG)
915 abort ();
917 else if (GET_CODE (inside) == LABEL_REF)
918 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
919 else if (GET_CODE (inside) == POST_INC)
920 return "mov.l %1,%0\n\tmov.l %1,%T0";
921 else
922 abort ();
924 /* Work out the safe way to copy. Copy into the second half first. */
925 if (dreg == ptrreg)
926 return "mov.l %T1,%T0\n\tmov.l %1,%0";
929 return "mov.l %1,%0\n\tmov.l %T1,%T0";
932 /* Print an instruction which would have gone into a delay slot after
933 another instruction, but couldn't because the other instruction expanded
934 into a sequence where putting the slot insn at the end wouldn't work. */
936 static void
937 print_slot (insn)
938 rtx insn;
940 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 0, 1);
942 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
945 const char *
946 output_far_jump (insn, op)
947 rtx insn;
948 rtx op;
950 struct { rtx lab, reg, op; } this;
951 rtx braf_base_lab = NULL_RTX;
952 const char *jump;
953 int far;
954 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
956 this.lab = gen_label_rtx ();
958 if (TARGET_SH2
959 && offset >= -32764
960 && offset - get_attr_length (insn) <= 32766)
962 far = 0;
963 jump = "mov.w %O0,%1; braf %1";
965 else
967 far = 1;
968 if (flag_pic)
970 if (TARGET_SH2)
971 jump = "mov.l %O0,%1; braf %1";
972 else
973 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
975 else
976 jump = "mov.l %O0,%1; jmp @%1";
978 /* If we have a scratch register available, use it. */
979 if (GET_CODE (PREV_INSN (insn)) == INSN
980 && INSN_CODE (PREV_INSN (insn)) == CODE_FOR_indirect_jump_scratch)
982 this.reg = SET_DEST (PATTERN (PREV_INSN (insn)));
983 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
984 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
985 output_asm_insn (jump, &this.lab);
986 if (dbr_sequence_length ())
987 print_slot (final_sequence);
988 else
989 output_asm_insn ("nop", 0);
991 else
993 /* Output the delay slot insn first if any. */
994 if (dbr_sequence_length ())
995 print_slot (final_sequence);
997 this.reg = gen_rtx_REG (SImode, 13);
998 /* We must keep the stack aligned to 8-byte boundaries on SH5.
999 Fortunately, MACL is fixed and call-clobbered, and we never
1000 need its value across jumps, so save r13 in it instead of in
1001 the stack. */
1002 if (TARGET_SH5)
1003 output_asm_insn ("lds r13, macl", 0);
1004 else
1005 output_asm_insn ("mov.l r13,@-r15", 0);
1006 output_asm_insn (jump, &this.lab);
1007 if (TARGET_SH5)
1008 output_asm_insn ("sts macl, r13", 0);
1009 else
1010 output_asm_insn ("mov.l @r15+,r13", 0);
1012 if (far && flag_pic && TARGET_SH2)
1014 braf_base_lab = gen_label_rtx ();
1015 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
1016 CODE_LABEL_NUMBER (braf_base_lab));
1018 if (far)
1019 output_asm_insn (".align 2", 0);
1020 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1021 this.op = op;
1022 if (far && flag_pic)
1024 if (TARGET_SH2)
1025 this.lab = braf_base_lab;
1026 output_asm_insn (".long %O2-%O0", &this.lab);
1028 else
1029 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1030 return "";
1033 /* Local label counter, used for constants in the pool and inside
1034 pattern branches. */
1036 static int lf = 100;
1038 /* Output code for ordinary branches. */
1040 const char *
1041 output_branch (logic, insn, operands)
1042 int logic;
1043 rtx insn;
1044 rtx *operands;
1046 switch (get_attr_length (insn))
1048 case 6:
1049 /* This can happen if filling the delay slot has caused a forward
1050 branch to exceed its range (we could reverse it, but only
1051 when we know we won't overextend other branches; this should
1052 best be handled by relaxation).
1053 It can also happen when other condbranches hoist delay slot insn
1054 from their destination, thus leading to code size increase.
1055 But the branch will still be in the range -4092..+4098 bytes. */
1057 if (! TARGET_RELAX)
1059 int label = lf++;
1060 /* The call to print_slot will clobber the operands. */
1061 rtx op0 = operands[0];
1063 /* If the instruction in the delay slot is annulled (true), then
1064 there is no delay slot where we can put it now. The only safe
1065 place for it is after the label. final will do that by default. */
1067 if (final_sequence
1068 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
1070 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1071 ASSEMBLER_DIALECT ? "/" : ".", label);
1072 print_slot (final_sequence);
1074 else
1075 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1077 output_asm_insn ("bra\t%l0", &op0);
1078 fprintf (asm_out_file, "\tnop\n");
1079 ASM_OUTPUT_INTERNAL_LABEL(asm_out_file, "LF", label);
1081 return "";
1083 /* When relaxing, handle this like a short branch. The linker
1084 will fix it up if it still doesn't fit after relaxation. */
1085 case 2:
1086 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1087 default:
1088 /* There should be no longer branches now - that would
1089 indicate that something has destroyed the branches set
1090 up in machine_dependent_reorg. */
1091 abort ();
1095 const char *
1096 output_branchy_insn (code, template, insn, operands)
1097 enum rtx_code code;
1098 const char *template;
1099 rtx insn;
1100 rtx *operands;
1102 rtx next_insn = NEXT_INSN (insn);
1104 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
1106 rtx src = SET_SRC (PATTERN (next_insn));
1107 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
1109 /* Following branch not taken */
1110 operands[9] = gen_label_rtx ();
1111 emit_label_after (operands[9], next_insn);
1112 INSN_ADDRESSES_NEW (operands[9],
1113 INSN_ADDRESSES (INSN_UID (next_insn))
1114 + get_attr_length (next_insn));
1115 return template;
1117 else
1119 int offset = (branch_dest (next_insn)
1120 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
1121 if (offset >= -252 && offset <= 258)
1123 if (GET_CODE (src) == IF_THEN_ELSE)
1124 /* branch_true */
1125 src = XEXP (src, 1);
1126 operands[9] = src;
1127 return template;
1131 operands[9] = gen_label_rtx ();
1132 emit_label_after (operands[9], insn);
1133 INSN_ADDRESSES_NEW (operands[9],
1134 INSN_ADDRESSES (INSN_UID (insn))
1135 + get_attr_length (insn));
1136 return template;
1139 const char *
1140 output_ieee_ccmpeq (insn, operands)
1141 rtx insn, *operands;
1143 return output_branchy_insn (NE, "bt\t%l9\\;fcmp/eq\t%1,%0", insn, operands);
1146 /* Output to FILE the start of the assembler file. */
1148 void
1149 output_file_start (file)
1150 FILE *file;
1152 output_file_directive (file, main_input_filename);
1154 /* Switch to the data section so that the coffsem symbol
1155 isn't in the text section. */
1156 data_section ();
1158 if (TARGET_LITTLE_ENDIAN)
1159 fprintf (file, "\t.little\n");
1161 if (TARGET_SHCOMPACT)
1162 fprintf (file, "\t.mode\tSHcompact\n");
1163 else if (TARGET_SHMEDIA)
1164 fprintf (file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
1165 TARGET_SHMEDIA64 ? 64 : 32);
1168 /* Actual number of instructions used to make a shift by N. */
1169 static const char ashiftrt_insns[] =
1170 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
1172 /* Left shift and logical right shift are the same. */
1173 static const char shift_insns[] =
1174 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1176 /* Individual shift amounts needed to get the above length sequences.
1177 One bit right shifts clobber the T bit, so when possible, put one bit
1178 shifts in the middle of the sequence, so the ends are eligible for
1179 branch delay slots. */
1180 static const short shift_amounts[32][5] = {
1181 {0}, {1}, {2}, {2, 1},
1182 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
1183 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1184 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
1185 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1186 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1187 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1188 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1190 /* Likewise, but for shift amounts < 16, up to three highmost bits
1191 might be clobbered. This is typically used when combined with some
1192 kind of sign or zero extension. */
1194 static const char ext_shift_insns[] =
1195 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1197 static const short ext_shift_amounts[32][4] = {
1198 {0}, {1}, {2}, {2, 1},
1199 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
1200 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1201 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
1202 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1203 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1204 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1205 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1207 /* Assuming we have a value that has been sign-extended by at least one bit,
1208 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
1209 to shift it by N without data loss, and quicker than by other means? */
1210 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
1212 /* This is used in length attributes in sh.md to help compute the length
1213 of arbitrary constant shift instructions. */
1216 shift_insns_rtx (insn)
1217 rtx insn;
1219 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1220 int shift_count = INTVAL (XEXP (set_src, 1));
1221 enum rtx_code shift_code = GET_CODE (set_src);
1223 switch (shift_code)
1225 case ASHIFTRT:
1226 return ashiftrt_insns[shift_count];
1227 case LSHIFTRT:
1228 case ASHIFT:
1229 return shift_insns[shift_count];
1230 default:
1231 abort();
1235 /* Return the cost of a shift. */
1238 shiftcosts (x)
1239 rtx x;
1241 int value;
1243 if (TARGET_SHMEDIA)
1244 return 1;
1246 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
1248 if (GET_MODE (x) == DImode
1249 && GET_CODE (XEXP (x, 1)) == CONST_INT
1250 && INTVAL (XEXP (x, 1)) == 1)
1251 return 2;
1253 /* Everything else is invalid, because there is no pattern for it. */
1254 return 10000;
1256 /* If shift by a non constant, then this will be expensive. */
1257 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1258 return SH_DYNAMIC_SHIFT_COST;
1260 value = INTVAL (XEXP (x, 1));
1262 /* Otherwise, return the true cost in instructions. */
1263 if (GET_CODE (x) == ASHIFTRT)
1265 int cost = ashiftrt_insns[value];
1266 /* If SH3, then we put the constant in a reg and use shad. */
1267 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
1268 cost = 1 + SH_DYNAMIC_SHIFT_COST;
1269 return cost;
1271 else
1272 return shift_insns[value];
1275 /* Return the cost of an AND operation. */
1278 andcosts (x)
1279 rtx x;
1281 int i;
1283 /* Anding with a register is a single cycle and instruction. */
1284 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1285 return 1;
1287 i = INTVAL (XEXP (x, 1));
1289 if (TARGET_SHMEDIA)
1291 if ((GET_CODE (XEXP (x, 1)) == CONST_INT
1292 && CONST_OK_FOR_J (INTVAL (XEXP (x, 1))))
1293 || EXTRA_CONSTRAINT_S (XEXP (x, 1)))
1294 return 1;
1295 else
1296 return 2;
1299 /* These constants are single cycle extu.[bw] instructions. */
1300 if (i == 0xff || i == 0xffff)
1301 return 1;
1302 /* Constants that can be used in an and immediate instruction is a single
1303 cycle, but this requires r0, so make it a little more expensive. */
1304 if (CONST_OK_FOR_L (i))
1305 return 2;
1306 /* Constants that can be loaded with a mov immediate and an and.
1307 This case is probably unnecessary. */
1308 if (CONST_OK_FOR_I (i))
1309 return 2;
1310 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1311 This case is probably unnecessary. */
1312 return 3;
1315 /* Return the cost of an addition or a subtraction. */
1318 addsubcosts (x)
1319 rtx x;
1321 /* Adding a register is a single cycle insn. */
1322 if (GET_CODE (XEXP (x, 1)) == REG
1323 || GET_CODE (XEXP (x, 1)) == SUBREG)
1324 return 1;
1326 /* Likewise for small constants. */
1327 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1328 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
1329 return 1;
1331 if (TARGET_SHMEDIA)
1332 switch (GET_CODE (XEXP (x, 1)))
1334 case CONST:
1335 case LABEL_REF:
1336 case SYMBOL_REF:
1337 return TARGET_SHMEDIA64 ? 5 : 3;
1339 case CONST_INT:
1340 if (CONST_OK_FOR_J (INTVAL (XEXP (x, 1))))
1341 return 2;
1342 else if (CONST_OK_FOR_J (INTVAL (XEXP (x, 1)) >> 16))
1343 return 3;
1344 else if (CONST_OK_FOR_J ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
1345 return 4;
1347 /* Fall through. */
1348 default:
1349 return 5;
1352 /* Any other constant requires a 2 cycle pc-relative load plus an
1353 addition. */
1354 return 3;
1357 /* Return the cost of a multiply. */
1359 multcosts (x)
1360 rtx x ATTRIBUTE_UNUSED;
1362 if (TARGET_SHMEDIA)
1363 return 3;
1365 if (TARGET_SH2)
1367 /* We have a mul insn, so we can never take more than the mul and the
1368 read of the mac reg, but count more because of the latency and extra
1369 reg usage. */
1370 if (TARGET_SMALLCODE)
1371 return 2;
1372 return 3;
1375 /* If we're aiming at small code, then just count the number of
1376 insns in a multiply call sequence. */
1377 if (TARGET_SMALLCODE)
1378 return 5;
1380 /* Otherwise count all the insns in the routine we'd be calling too. */
1381 return 20;
1384 /* Code to expand a shift. */
1386 void
1387 gen_ashift (type, n, reg)
1388 int type;
1389 int n;
1390 rtx reg;
1392 /* Negative values here come from the shift_amounts array. */
1393 if (n < 0)
1395 if (type == ASHIFT)
1396 type = LSHIFTRT;
1397 else
1398 type = ASHIFT;
1399 n = -n;
1402 switch (type)
1404 case ASHIFTRT:
1405 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
1406 break;
1407 case LSHIFTRT:
1408 if (n == 1)
1409 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
1410 else
1411 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
1412 break;
1413 case ASHIFT:
1414 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
1415 break;
1419 /* Same for HImode */
1421 void
1422 gen_ashift_hi (type, n, reg)
1423 int type;
1424 int n;
1425 rtx reg;
1427 /* Negative values here come from the shift_amounts array. */
1428 if (n < 0)
1430 if (type == ASHIFT)
1431 type = LSHIFTRT;
1432 else
1433 type = ASHIFT;
1434 n = -n;
1437 switch (type)
1439 case ASHIFTRT:
1440 case LSHIFTRT:
1441 /* We don't have HImode right shift operations because using the
1442 ordinary 32 bit shift instructions for that doesn't generate proper
1443 zero/sign extension.
1444 gen_ashift_hi is only called in contexts where we know that the
1445 sign extension works out correctly. */
1447 int offset = 0;
1448 if (GET_CODE (reg) == SUBREG)
1450 offset = SUBREG_BYTE (reg);
1451 reg = SUBREG_REG (reg);
1453 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
1454 break;
1456 case ASHIFT:
1457 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
1458 break;
1462 /* Output RTL to split a constant shift into its component SH constant
1463 shift instructions. */
1465 void
1466 gen_shifty_op (code, operands)
1467 int code;
1468 rtx *operands;
1470 int value = INTVAL (operands[2]);
1471 int max, i;
1473 /* Truncate the shift count in case it is out of bounds. */
1474 value = value & 0x1f;
1476 if (value == 31)
1478 if (code == LSHIFTRT)
1480 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
1481 emit_insn (gen_movt (operands[0]));
1482 return;
1484 else if (code == ASHIFT)
1486 /* There is a two instruction sequence for 31 bit left shifts,
1487 but it requires r0. */
1488 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
1490 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
1491 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
1492 return;
1496 else if (value == 0)
1498 /* This can happen when not optimizing. We must output something here
1499 to prevent the compiler from aborting in final.c after the try_split
1500 call. */
1501 emit_insn (gen_nop ());
1502 return;
1505 max = shift_insns[value];
1506 for (i = 0; i < max; i++)
1507 gen_ashift (code, shift_amounts[value][i], operands[0]);
1510 /* Same as above, but optimized for values where the topmost bits don't
1511 matter. */
1513 void
1514 gen_shifty_hi_op (code, operands)
1515 int code;
1516 rtx *operands;
1518 int value = INTVAL (operands[2]);
1519 int max, i;
1520 void (*gen_fun) PARAMS ((int, int, rtx));
1522 /* This operation is used by and_shl for SImode values with a few
1523 high bits known to be cleared. */
1524 value &= 31;
1525 if (value == 0)
1527 emit_insn (gen_nop ());
1528 return;
1531 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
1532 if (code == ASHIFT)
1534 max = ext_shift_insns[value];
1535 for (i = 0; i < max; i++)
1536 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
1538 else
1539 /* When shifting right, emit the shifts in reverse order, so that
1540 solitary negative values come first. */
1541 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
1542 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
1545 /* Output RTL for an arithmetic right shift. */
1547 /* ??? Rewrite to use super-optimizer sequences. */
1550 expand_ashiftrt (operands)
1551 rtx *operands;
1553 rtx sym;
1554 rtx wrk;
1555 char func[18];
1556 tree func_name;
1557 int value;
1559 if (TARGET_SH3)
1561 if (GET_CODE (operands[2]) != CONST_INT)
1563 rtx count = copy_to_mode_reg (SImode, operands[2]);
1564 emit_insn (gen_negsi2 (count, count));
1565 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
1566 return 1;
1568 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
1569 > 1 + SH_DYNAMIC_SHIFT_COST)
1571 rtx count
1572 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
1573 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
1574 return 1;
1577 if (GET_CODE (operands[2]) != CONST_INT)
1578 return 0;
1580 value = INTVAL (operands[2]) & 31;
1582 if (value == 31)
1584 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
1585 return 1;
1587 else if (value >= 16 && value <= 19)
1589 wrk = gen_reg_rtx (SImode);
1590 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
1591 value -= 16;
1592 while (value--)
1593 gen_ashift (ASHIFTRT, 1, wrk);
1594 emit_move_insn (operands[0], wrk);
1595 return 1;
1597 /* Expand a short sequence inline, longer call a magic routine. */
1598 else if (value <= 5)
1600 wrk = gen_reg_rtx (SImode);
1601 emit_move_insn (wrk, operands[1]);
1602 while (value--)
1603 gen_ashift (ASHIFTRT, 1, wrk);
1604 emit_move_insn (operands[0], wrk);
1605 return 1;
1608 wrk = gen_reg_rtx (Pmode);
1610 /* Load the value into an arg reg and call a helper. */
1611 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
1612 sprintf (func, "__ashiftrt_r4_%d", value);
1613 func_name = get_identifier (func);
1614 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (func_name));
1615 emit_move_insn (wrk, sym);
1616 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
1617 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
1618 return 1;
1622 sh_dynamicalize_shift_p (count)
1623 rtx count;
1625 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
1628 /* Try to find a good way to implement the combiner pattern
1629 [(set (match_operand:SI 0 "register_operand" "r")
1630 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
1631 (match_operand:SI 2 "const_int_operand" "n"))
1632 (match_operand:SI 3 "const_int_operand" "n"))) .
1633 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
1634 return 0 for simple right / left or left/right shift combination.
1635 return 1 for a combination of shifts with zero_extend.
1636 return 2 for a combination of shifts with an AND that needs r0.
1637 return 3 for a combination of shifts with an AND that needs an extra
1638 scratch register, when the three highmost bits of the AND mask are clear.
1639 return 4 for a combination of shifts with an AND that needs an extra
1640 scratch register, when any of the three highmost bits of the AND mask
1641 is set.
1642 If ATTRP is set, store an initial right shift width in ATTRP[0],
1643 and the instruction length in ATTRP[1] . These values are not valid
1644 when returning 0.
1645 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
1646 shift_amounts for the last shift value that is to be used before the
1647 sign extend. */
1649 shl_and_kind (left_rtx, mask_rtx, attrp)
1650 rtx left_rtx, mask_rtx;
1651 int *attrp;
1653 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
1654 int left = INTVAL (left_rtx), right;
1655 int best = 0;
1656 int cost, best_cost = 10000;
1657 int best_right = 0, best_len = 0;
1658 int i;
1659 int can_ext;
1661 if (left < 0 || left > 31)
1662 return 0;
1663 if (GET_CODE (mask_rtx) == CONST_INT)
1664 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
1665 else
1666 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
1667 /* Can this be expressed as a right shift / left shift pair ? */
1668 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
1669 right = exact_log2 (lsb);
1670 mask2 = ~(mask + lsb - 1);
1671 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
1672 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
1673 if (! mask2)
1674 best_cost = shift_insns[right] + shift_insns[right + left];
1675 /* mask has no trailing zeroes <==> ! right */
1676 else if (! right && mask2 == ~(lsb2 - 1))
1678 int late_right = exact_log2 (lsb2);
1679 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
1681 /* Try to use zero extend */
1682 if (mask2 == ~(lsb2 - 1))
1684 int width, first;
1686 for (width = 8; width <= 16; width += 8)
1688 /* Can we zero-extend right away? */
1689 if (lsb2 == (unsigned HOST_WIDE_INT)1 << width)
1691 cost
1692 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
1693 if (cost < best_cost)
1695 best = 1;
1696 best_cost = cost;
1697 best_right = right;
1698 best_len = cost;
1699 if (attrp)
1700 attrp[2] = -1;
1702 continue;
1704 /* ??? Could try to put zero extend into initial right shift,
1705 or even shift a bit left before the right shift. */
1706 /* Determine value of first part of left shift, to get to the
1707 zero extend cut-off point. */
1708 first = width - exact_log2 (lsb2) + right;
1709 if (first >= 0 && right + left - first >= 0)
1711 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
1712 + ext_shift_insns[right + left - first];
1713 if (cost < best_cost)
1715 best = 1;
1716 best_cost = cost;
1717 best_right = right;
1718 best_len = cost;
1719 if (attrp)
1720 attrp[2] = first;
1725 /* Try to use r0 AND pattern */
1726 for (i = 0; i <= 2; i++)
1728 if (i > right)
1729 break;
1730 if (! CONST_OK_FOR_L (mask >> i))
1731 continue;
1732 cost = (i != 0) + 2 + ext_shift_insns[left + i];
1733 if (cost < best_cost)
1735 best = 2;
1736 best_cost = cost;
1737 best_right = i;
1738 best_len = cost - 1;
1741 /* Try to use a scratch register to hold the AND operand. */
1742 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT)3 << 30)) == 0;
1743 for (i = 0; i <= 2; i++)
1745 if (i > right)
1746 break;
1747 cost = (i != 0) + (CONST_OK_FOR_I (mask >> i) ? 2 : 3)
1748 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
1749 if (cost < best_cost)
1751 best = 4 - can_ext;
1752 best_cost = cost;
1753 best_right = i;
1754 best_len = cost - 1 - ! CONST_OK_FOR_I (mask >> i);
1758 if (attrp)
1760 attrp[0] = best_right;
1761 attrp[1] = best_len;
1763 return best;
1766 /* This is used in length attributes of the unnamed instructions
1767 corresponding to shl_and_kind return values of 1 and 2. */
1769 shl_and_length (insn)
1770 rtx insn;
1772 rtx set_src, left_rtx, mask_rtx;
1773 int attributes[3];
1775 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1776 left_rtx = XEXP (XEXP (set_src, 0), 1);
1777 mask_rtx = XEXP (set_src, 1);
1778 shl_and_kind (left_rtx, mask_rtx, attributes);
1779 return attributes[1];
1782 /* This is used in length attribute of the and_shl_scratch instruction. */
1785 shl_and_scr_length (insn)
1786 rtx insn;
1788 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1789 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
1790 rtx op = XEXP (set_src, 0);
1791 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
1792 op = XEXP (XEXP (op, 0), 0);
1793 return len + shift_insns[INTVAL (XEXP (op, 1))];
1796 /* Generating rtl? */
1797 extern int rtx_equal_function_value_matters;
1799 /* Generate rtl for instructions for which shl_and_kind advised a particular
1800 method of generating them, i.e. returned zero. */
1803 gen_shl_and (dest, left_rtx, mask_rtx, source)
1804 rtx dest, left_rtx, mask_rtx, source;
1806 int attributes[3];
1807 unsigned HOST_WIDE_INT mask;
1808 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
1809 int right, total_shift;
1810 void (*shift_gen_fun) PARAMS ((int, rtx*)) = gen_shifty_hi_op;
1812 right = attributes[0];
1813 total_shift = INTVAL (left_rtx) + right;
1814 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
1815 switch (kind)
1817 default:
1818 return -1;
1819 case 1:
1821 int first = attributes[2];
1822 rtx operands[3];
1824 if (first < 0)
1826 emit_insn ((mask << right) <= 0xff
1827 ? gen_zero_extendqisi2(dest,
1828 gen_lowpart (QImode, source))
1829 : gen_zero_extendhisi2(dest,
1830 gen_lowpart (HImode, source)));
1831 source = dest;
1833 if (source != dest)
1834 emit_insn (gen_movsi (dest, source));
1835 operands[0] = dest;
1836 if (right)
1838 operands[2] = GEN_INT (right);
1839 gen_shifty_hi_op (LSHIFTRT, operands);
1841 if (first > 0)
1843 operands[2] = GEN_INT (first);
1844 gen_shifty_hi_op (ASHIFT, operands);
1845 total_shift -= first;
1846 mask <<= first;
1848 if (first >= 0)
1849 emit_insn (mask <= 0xff
1850 ? gen_zero_extendqisi2(dest, gen_lowpart (QImode, dest))
1851 : gen_zero_extendhisi2(dest, gen_lowpart (HImode, dest)));
1852 if (total_shift > 0)
1854 operands[2] = GEN_INT (total_shift);
1855 gen_shifty_hi_op (ASHIFT, operands);
1857 break;
1859 case 4:
1860 shift_gen_fun = gen_shifty_op;
1861 case 3:
1862 /* If the topmost bit that matters is set, set the topmost bits
1863 that don't matter. This way, we might be able to get a shorter
1864 signed constant. */
1865 if (mask & ((HOST_WIDE_INT)1 << (31 - total_shift)))
1866 mask |= (HOST_WIDE_INT)~0 << (31 - total_shift);
1867 case 2:
1868 /* Don't expand fine-grained when combining, because that will
1869 make the pattern fail. */
1870 if (rtx_equal_function_value_matters
1871 || reload_in_progress || reload_completed)
1873 rtx operands[3];
1875 /* Cases 3 and 4 should be handled by this split
1876 only while combining */
1877 if (kind > 2)
1878 abort ();
1879 if (right)
1881 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
1882 source = dest;
1884 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
1885 if (total_shift)
1887 operands[0] = dest;
1888 operands[1] = dest;
1889 operands[2] = GEN_INT (total_shift);
1890 shift_gen_fun (ASHIFT, operands);
1892 break;
1894 else
1896 int neg = 0;
1897 if (kind != 4 && total_shift < 16)
1899 neg = -ext_shift_amounts[total_shift][1];
1900 if (neg > 0)
1901 neg -= ext_shift_amounts[total_shift][2];
1902 else
1903 neg = 0;
1905 emit_insn (gen_and_shl_scratch (dest, source,
1906 GEN_INT (right),
1907 GEN_INT (mask),
1908 GEN_INT (total_shift + neg),
1909 GEN_INT (neg)));
1910 emit_insn (gen_movsi (dest, dest));
1911 break;
1914 return 0;
1917 /* Try to find a good way to implement the combiner pattern
1918 [(set (match_operand:SI 0 "register_operand" "=r")
1919 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
1920 (match_operand:SI 2 "const_int_operand" "n")
1921 (match_operand:SI 3 "const_int_operand" "n")
1922 (const_int 0)))
1923 (clobber (reg:SI T_REG))]
1924 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
1925 return 0 for simple left / right shift combination.
1926 return 1 for left shift / 8 bit sign extend / left shift.
1927 return 2 for left shift / 16 bit sign extend / left shift.
1928 return 3 for left shift / 8 bit sign extend / shift / sign extend.
1929 return 4 for left shift / 16 bit sign extend / shift / sign extend.
1930 return 5 for left shift / 16 bit sign extend / right shift
1931 return 6 for < 8 bit sign extend / left shift.
1932 return 7 for < 8 bit sign extend / left shift / single right shift.
1933 If COSTP is nonzero, assign the calculated cost to *COSTP. */
1936 shl_sext_kind (left_rtx, size_rtx, costp)
1937 rtx left_rtx, size_rtx;
1938 int *costp;
1940 int left, size, insize, ext;
1941 int cost, best_cost;
1942 int kind;
1944 left = INTVAL (left_rtx);
1945 size = INTVAL (size_rtx);
1946 insize = size - left;
1947 if (insize <= 0)
1948 abort ();
1949 /* Default to left / right shift. */
1950 kind = 0;
1951 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
1952 if (size <= 16)
1954 /* 16 bit shift / sign extend / 16 bit shift */
1955 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
1956 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
1957 below, by alternative 3 or something even better. */
1958 if (cost < best_cost)
1960 kind = 5;
1961 best_cost = cost;
1964 /* Try a plain sign extend between two shifts. */
1965 for (ext = 16; ext >= insize; ext -= 8)
1967 if (ext <= size)
1969 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
1970 if (cost < best_cost)
1972 kind = ext / (unsigned) 8;
1973 best_cost = cost;
1976 /* Check if we can do a sloppy shift with a final signed shift
1977 restoring the sign. */
1978 if (EXT_SHIFT_SIGNED (size - ext))
1979 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
1980 /* If not, maybe it's still cheaper to do the second shift sloppy,
1981 and do a final sign extend? */
1982 else if (size <= 16)
1983 cost = ext_shift_insns[ext - insize] + 1
1984 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
1985 else
1986 continue;
1987 if (cost < best_cost)
1989 kind = ext / (unsigned) 8 + 2;
1990 best_cost = cost;
1993 /* Check if we can sign extend in r0 */
1994 if (insize < 8)
1996 cost = 3 + shift_insns[left];
1997 if (cost < best_cost)
1999 kind = 6;
2000 best_cost = cost;
2002 /* Try the same with a final signed shift. */
2003 if (left < 31)
2005 cost = 3 + ext_shift_insns[left + 1] + 1;
2006 if (cost < best_cost)
2008 kind = 7;
2009 best_cost = cost;
2013 if (TARGET_SH3)
2015 /* Try to use a dynamic shift. */
2016 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
2017 if (cost < best_cost)
2019 kind = 0;
2020 best_cost = cost;
2023 if (costp)
2024 *costp = cost;
2025 return kind;
2028 /* Function to be used in the length attribute of the instructions
2029 implementing this pattern. */
2032 shl_sext_length (insn)
2033 rtx insn;
2035 rtx set_src, left_rtx, size_rtx;
2036 int cost;
2038 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2039 left_rtx = XEXP (XEXP (set_src, 0), 1);
2040 size_rtx = XEXP (set_src, 1);
2041 shl_sext_kind (left_rtx, size_rtx, &cost);
2042 return cost;
2045 /* Generate rtl for this pattern */
2048 gen_shl_sext (dest, left_rtx, size_rtx, source)
2049 rtx dest, left_rtx, size_rtx, source;
2051 int kind;
2052 int left, size, insize, cost;
2053 rtx operands[3];
2055 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
2056 left = INTVAL (left_rtx);
2057 size = INTVAL (size_rtx);
2058 insize = size - left;
2059 switch (kind)
2061 case 1:
2062 case 2:
2063 case 3:
2064 case 4:
2066 int ext = kind & 1 ? 8 : 16;
2067 int shift2 = size - ext;
2069 /* Don't expand fine-grained when combining, because that will
2070 make the pattern fail. */
2071 if (! rtx_equal_function_value_matters
2072 && ! reload_in_progress && ! reload_completed)
2074 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2075 emit_insn (gen_movsi (dest, source));
2076 break;
2078 if (dest != source)
2079 emit_insn (gen_movsi (dest, source));
2080 operands[0] = dest;
2081 if (ext - insize)
2083 operands[2] = GEN_INT (ext - insize);
2084 gen_shifty_hi_op (ASHIFT, operands);
2086 emit_insn (kind & 1
2087 ? gen_extendqisi2(dest, gen_lowpart (QImode, dest))
2088 : gen_extendhisi2(dest, gen_lowpart (HImode, dest)));
2089 if (kind <= 2)
2091 if (shift2)
2093 operands[2] = GEN_INT (shift2);
2094 gen_shifty_op (ASHIFT, operands);
2097 else
2099 if (shift2 > 0)
2101 if (EXT_SHIFT_SIGNED (shift2))
2103 operands[2] = GEN_INT (shift2 + 1);
2104 gen_shifty_op (ASHIFT, operands);
2105 operands[2] = GEN_INT (1);
2106 gen_shifty_op (ASHIFTRT, operands);
2107 break;
2109 operands[2] = GEN_INT (shift2);
2110 gen_shifty_hi_op (ASHIFT, operands);
2112 else if (shift2)
2114 operands[2] = GEN_INT (-shift2);
2115 gen_shifty_hi_op (LSHIFTRT, operands);
2117 emit_insn (size <= 8
2118 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2119 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2121 break;
2123 case 5:
2125 int i = 16 - size;
2126 if (! rtx_equal_function_value_matters
2127 && ! reload_in_progress && ! reload_completed)
2128 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2129 else
2131 operands[0] = dest;
2132 operands[2] = GEN_INT (16 - insize);
2133 gen_shifty_hi_op (ASHIFT, operands);
2134 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2136 /* Don't use gen_ashrsi3 because it generates new pseudos. */
2137 while (--i >= 0)
2138 gen_ashift (ASHIFTRT, 1, dest);
2139 break;
2141 case 6:
2142 case 7:
2143 /* Don't expand fine-grained when combining, because that will
2144 make the pattern fail. */
2145 if (! rtx_equal_function_value_matters
2146 && ! reload_in_progress && ! reload_completed)
2148 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2149 emit_insn (gen_movsi (dest, source));
2150 break;
2152 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
2153 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
2154 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
2155 operands[0] = dest;
2156 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
2157 gen_shifty_op (ASHIFT, operands);
2158 if (kind == 7)
2159 emit_insn (gen_ashrsi3_k (dest, dest, GEN_INT (1)));
2160 break;
2161 default:
2162 return -1;
2164 return 0;
2167 /* Prefix a symbol_ref name with "datalabel". */
2170 gen_datalabel_ref (sym)
2171 rtx sym;
2173 if (GET_CODE (sym) == LABEL_REF)
2174 return gen_rtx_CONST (GET_MODE (sym),
2175 gen_rtx_UNSPEC (GET_MODE (sym),
2176 gen_rtvec (1, sym),
2177 UNSPEC_DATALABEL));
2179 if (GET_CODE (sym) != SYMBOL_REF)
2180 abort ();
2182 XSTR (sym, 0) = concat (SH_DATALABEL_ENCODING, XSTR (sym, 0), NULL);
2184 return sym;
2188 /* The SH cannot load a large constant into a register, constants have to
2189 come from a pc relative load. The reference of a pc relative load
2190 instruction must be less than 1k infront of the instruction. This
2191 means that we often have to dump a constant inside a function, and
2192 generate code to branch around it.
2194 It is important to minimize this, since the branches will slow things
2195 down and make things bigger.
2197 Worst case code looks like:
2199 mov.l L1,rn
2200 bra L2
2202 align
2203 L1: .long value
2207 mov.l L3,rn
2208 bra L4
2210 align
2211 L3: .long value
2215 We fix this by performing a scan before scheduling, which notices which
2216 instructions need to have their operands fetched from the constant table
2217 and builds the table.
2219 The algorithm is:
2221 scan, find an instruction which needs a pcrel move. Look forward, find the
2222 last barrier which is within MAX_COUNT bytes of the requirement.
2223 If there isn't one, make one. Process all the instructions between
2224 the find and the barrier.
2226 In the above example, we can tell that L3 is within 1k of L1, so
2227 the first move can be shrunk from the 3 insn+constant sequence into
2228 just 1 insn, and the constant moved to L3 to make:
2230 mov.l L1,rn
2232 mov.l L3,rn
2233 bra L4
2235 align
2236 L3:.long value
2237 L4:.long value
2239 Then the second move becomes the target for the shortening process. */
2241 typedef struct
2243 rtx value; /* Value in table. */
2244 rtx label; /* Label of value. */
2245 rtx wend; /* End of window. */
2246 enum machine_mode mode; /* Mode of value. */
2247 } pool_node;
2249 /* The maximum number of constants that can fit into one pool, since
2250 the pc relative range is 0...1020 bytes and constants are at least 4
2251 bytes long. */
2253 #define MAX_POOL_SIZE (1020/4)
2254 static pool_node pool_vector[MAX_POOL_SIZE];
2255 static int pool_size;
2256 static rtx pool_window_label;
2257 static int pool_window_last;
2259 /* ??? If we need a constant in HImode which is the truncated value of a
2260 constant we need in SImode, we could combine the two entries thus saving
2261 two bytes. Is this common enough to be worth the effort of implementing
2262 it? */
2264 /* ??? This stuff should be done at the same time that we shorten branches.
2265 As it is now, we must assume that all branches are the maximum size, and
2266 this causes us to almost always output constant pools sooner than
2267 necessary. */
2269 /* Add a constant to the pool and return its label. */
2271 static rtx
2272 add_constant (x, mode, last_value)
2273 rtx x;
2274 enum machine_mode mode;
2275 rtx last_value;
2277 int i;
2278 rtx lab, new, ref, newref;
2280 /* First see if we've already got it. */
2281 for (i = 0; i < pool_size; i++)
2283 if (x->code == pool_vector[i].value->code
2284 && mode == pool_vector[i].mode)
2286 if (x->code == CODE_LABEL)
2288 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
2289 continue;
2291 if (rtx_equal_p (x, pool_vector[i].value))
2293 lab = new = 0;
2294 if (! last_value
2295 || ! i
2296 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
2298 new = gen_label_rtx ();
2299 LABEL_REFS (new) = pool_vector[i].label;
2300 pool_vector[i].label = lab = new;
2302 if (lab && pool_window_label)
2304 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2305 ref = pool_vector[pool_window_last].wend;
2306 LABEL_NEXTREF (newref) = ref;
2307 pool_vector[pool_window_last].wend = newref;
2309 if (new)
2310 pool_window_label = new;
2311 pool_window_last = i;
2312 return lab;
2317 /* Need a new one. */
2318 pool_vector[pool_size].value = x;
2319 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
2320 lab = 0;
2321 else
2322 lab = gen_label_rtx ();
2323 pool_vector[pool_size].mode = mode;
2324 pool_vector[pool_size].label = lab;
2325 pool_vector[pool_size].wend = NULL_RTX;
2326 if (lab && pool_window_label)
2328 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2329 ref = pool_vector[pool_window_last].wend;
2330 LABEL_NEXTREF (newref) = ref;
2331 pool_vector[pool_window_last].wend = newref;
2333 if (lab)
2334 pool_window_label = lab;
2335 pool_window_last = pool_size;
2336 pool_size++;
2337 return lab;
2340 /* Output the literal table. */
2342 static void
2343 dump_table (scan)
2344 rtx scan;
2346 int i;
2347 int need_align = 1;
2348 rtx lab, ref;
2349 int have_di = 0;
2351 /* Do two passes, first time dump out the HI sized constants. */
2353 for (i = 0; i < pool_size; i++)
2355 pool_node *p = &pool_vector[i];
2357 if (p->mode == HImode)
2359 if (need_align)
2361 scan = emit_insn_after (gen_align_2 (), scan);
2362 need_align = 0;
2364 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2365 scan = emit_label_after (lab, scan);
2366 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
2367 scan);
2368 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2370 lab = XEXP (ref, 0);
2371 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2374 else if (p->mode == DImode || p->mode == DFmode)
2375 have_di = 1;
2378 need_align = 1;
2380 if (TARGET_SHCOMPACT && have_di)
2382 rtx align_insn = NULL_RTX;
2384 scan = emit_label_after (gen_label_rtx (), scan);
2385 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2386 need_align = 0;
2388 for (i = 0; i < pool_size; i++)
2390 pool_node *p = &pool_vector[i];
2392 switch (p->mode)
2394 case HImode:
2395 break;
2396 case SImode:
2397 case SFmode:
2398 if (align_insn)
2400 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2401 emit_label_before (lab, align_insn);
2402 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
2403 align_insn);
2404 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2406 lab = XEXP (ref, 0);
2407 emit_insn_before (gen_consttable_window_end (lab),
2408 align_insn);
2410 delete_insn (align_insn);
2411 align_insn = NULL_RTX;
2412 continue;
2414 else
2416 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2417 scan = emit_label_after (lab, scan);
2418 scan = emit_insn_after (gen_consttable_4 (p->value,
2419 const0_rtx), scan);
2420 need_align = ! need_align;
2422 break;
2423 case DFmode:
2424 case DImode:
2425 if (need_align)
2427 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2428 align_insn = scan;
2429 need_align = 0;
2431 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2432 scan = emit_label_after (lab, scan);
2433 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2434 scan);
2435 break;
2436 default:
2437 abort ();
2438 break;
2441 if (p->mode != HImode)
2443 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2445 lab = XEXP (ref, 0);
2446 scan = emit_insn_after (gen_consttable_window_end (lab),
2447 scan);
2452 pool_size = 0;
2455 for (i = 0; i < pool_size; i++)
2457 pool_node *p = &pool_vector[i];
2459 switch (p->mode)
2461 case HImode:
2462 break;
2463 case SImode:
2464 case SFmode:
2465 if (need_align)
2467 need_align = 0;
2468 scan = emit_label_after (gen_label_rtx (), scan);
2469 scan = emit_insn_after (gen_align_4 (), scan);
2471 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2472 scan = emit_label_after (lab, scan);
2473 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
2474 scan);
2475 break;
2476 case DFmode:
2477 case DImode:
2478 if (need_align)
2480 need_align = 0;
2481 scan = emit_label_after (gen_label_rtx (), scan);
2482 scan = emit_insn_after (gen_align_4 (), scan);
2484 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2485 scan = emit_label_after (lab, scan);
2486 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2487 scan);
2488 break;
2489 default:
2490 abort ();
2491 break;
2494 if (p->mode != HImode)
2496 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2498 lab = XEXP (ref, 0);
2499 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2504 scan = emit_insn_after (gen_consttable_end (), scan);
2505 scan = emit_barrier_after (scan);
2506 pool_size = 0;
2507 pool_window_label = NULL_RTX;
2508 pool_window_last = 0;
2511 /* Return nonzero if constant would be an ok source for a
2512 mov.w instead of a mov.l. */
2514 static int
2515 hi_const (src)
2516 rtx src;
2518 return (GET_CODE (src) == CONST_INT
2519 && INTVAL (src) >= -32768
2520 && INTVAL (src) <= 32767);
2523 /* Nonzero if the insn is a move instruction which needs to be fixed. */
2525 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
2526 CONST_DOUBLE input value is CONST_OK_FOR_I. For a SFmode move, we don't
2527 need to fix it if the input value is CONST_OK_FOR_I. */
2529 static int
2530 broken_move (insn)
2531 rtx insn;
2533 if (GET_CODE (insn) == INSN)
2535 rtx pat = PATTERN (insn);
2536 if (GET_CODE (pat) == PARALLEL)
2537 pat = XVECEXP (pat, 0, 0);
2538 if (GET_CODE (pat) == SET
2539 /* We can load any 8 bit value if we don't care what the high
2540 order bits end up as. */
2541 && GET_MODE (SET_DEST (pat)) != QImode
2542 && (CONSTANT_P (SET_SRC (pat))
2543 /* Match mova_const. */
2544 || (GET_CODE (SET_SRC (pat)) == UNSPEC
2545 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
2546 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
2547 && ! (TARGET_SH3E
2548 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
2549 && (fp_zero_operand (SET_SRC (pat))
2550 || fp_one_operand (SET_SRC (pat)))
2551 /* ??? If this is a -m4 or -m4-single compilation, in general
2552 we don't know the current setting of fpscr, so disable fldi.
2553 There is an exception if this was a register-register move
2554 before reload - and hence it was ascertained that we have
2555 single precision setting - and in a post-reload optimization
2556 we changed this to do a constant load. In that case
2557 we don't have an r0 clobber, hence we must use fldi. */
2558 && (! TARGET_SH4 || TARGET_FMOVD
2559 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
2560 == SCRATCH))
2561 && GET_CODE (SET_DEST (pat)) == REG
2562 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
2563 && (GET_CODE (SET_SRC (pat)) != CONST_INT
2564 || ! CONST_OK_FOR_I (INTVAL (SET_SRC (pat)))))
2565 return 1;
2568 return 0;
2571 static int
2572 mova_p (insn)
2573 rtx insn;
2575 return (GET_CODE (insn) == INSN
2576 && GET_CODE (PATTERN (insn)) == SET
2577 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
2578 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
2579 /* Don't match mova_const. */
2580 && GET_CODE (XVECEXP (SET_SRC (PATTERN (insn)), 0, 0)) == LABEL_REF);
2583 /* Find the last barrier from insn FROM which is close enough to hold the
2584 constant pool. If we can't find one, then create one near the end of
2585 the range. */
2587 static rtx
2588 find_barrier (num_mova, mova, from)
2589 int num_mova;
2590 rtx mova, from;
2592 int count_si = 0;
2593 int count_hi = 0;
2594 int found_hi = 0;
2595 int found_si = 0;
2596 int found_di = 0;
2597 int hi_align = 2;
2598 int si_align = 2;
2599 int leading_mova = num_mova;
2600 rtx barrier_before_mova, found_barrier = 0, good_barrier = 0;
2601 int si_limit;
2602 int hi_limit;
2604 /* For HImode: range is 510, add 4 because pc counts from address of
2605 second instruction after this one, subtract 2 for the jump instruction
2606 that we may need to emit before the table, subtract 2 for the instruction
2607 that fills the jump delay slot (in very rare cases, reorg will take an
2608 instruction from after the constant pool or will leave the delay slot
2609 empty). This gives 510.
2610 For SImode: range is 1020, add 4 because pc counts from address of
2611 second instruction after this one, subtract 2 in case pc is 2 byte
2612 aligned, subtract 2 for the jump instruction that we may need to emit
2613 before the table, subtract 2 for the instruction that fills the jump
2614 delay slot. This gives 1018. */
2616 /* The branch will always be shortened now that the reference address for
2617 forward branches is the successor address, thus we need no longer make
2618 adjustments to the [sh]i_limit for -O0. */
2620 si_limit = 1018;
2621 hi_limit = 510;
2623 while (from && count_si < si_limit && count_hi < hi_limit)
2625 int inc = get_attr_length (from);
2626 int new_align = 1;
2628 if (GET_CODE (from) == CODE_LABEL)
2630 if (optimize)
2631 new_align = 1 << label_to_alignment (from);
2632 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
2633 new_align = 1 << barrier_align (from);
2634 else
2635 new_align = 1;
2636 inc = 0;
2639 if (GET_CODE (from) == BARRIER)
2642 found_barrier = from;
2644 /* If we are at the end of the function, or in front of an alignment
2645 instruction, we need not insert an extra alignment. We prefer
2646 this kind of barrier. */
2647 if (barrier_align (from) > 2)
2648 good_barrier = from;
2651 if (broken_move (from))
2653 rtx pat, src, dst;
2654 enum machine_mode mode;
2656 pat = PATTERN (from);
2657 if (GET_CODE (pat) == PARALLEL)
2658 pat = XVECEXP (pat, 0, 0);
2659 src = SET_SRC (pat);
2660 dst = SET_DEST (pat);
2661 mode = GET_MODE (dst);
2663 /* We must explicitly check the mode, because sometimes the
2664 front end will generate code to load unsigned constants into
2665 HImode targets without properly sign extending them. */
2666 if (mode == HImode
2667 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
2669 found_hi += 2;
2670 /* We put the short constants before the long constants, so
2671 we must count the length of short constants in the range
2672 for the long constants. */
2673 /* ??? This isn't optimal, but is easy to do. */
2674 si_limit -= 2;
2676 else
2678 /* We dump DF/DI constants before SF/SI ones, because
2679 the limit is the same, but the alignment requirements
2680 are higher. We may waste up to 4 additional bytes
2681 for alignment, and the DF/DI constant may have
2682 another SF/SI constant placed before it. */
2683 if (TARGET_SHCOMPACT
2684 && ! found_di
2685 && (mode == DFmode || mode == DImode))
2687 found_di = 1;
2688 si_limit -= 8;
2690 while (si_align > 2 && found_si + si_align - 2 > count_si)
2691 si_align >>= 1;
2692 if (found_si > count_si)
2693 count_si = found_si;
2694 found_si += GET_MODE_SIZE (mode);
2695 if (num_mova)
2696 si_limit -= GET_MODE_SIZE (mode);
2699 /* See the code in machine_dependent_reorg, which has a similar if
2700 statement that generates a new mova insn in many cases. */
2701 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
2702 inc += 2;
2705 if (mova_p (from))
2707 if (! num_mova++)
2709 leading_mova = 0;
2710 mova = from;
2711 barrier_before_mova = good_barrier ? good_barrier : found_barrier;
2713 if (found_si > count_si)
2714 count_si = found_si;
2716 else if (GET_CODE (from) == JUMP_INSN
2717 && (GET_CODE (PATTERN (from)) == ADDR_VEC
2718 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
2720 if (num_mova)
2721 num_mova--;
2722 if (barrier_align (next_real_insn (from)) == CACHE_LOG)
2724 /* We have just passed the barrier in front of the
2725 ADDR_DIFF_VEC, which is stored in found_barrier. Since
2726 the ADDR_DIFF_VEC is accessed as data, just like our pool
2727 constants, this is a good opportunity to accommodate what
2728 we have gathered so far.
2729 If we waited any longer, we could end up at a barrier in
2730 front of code, which gives worse cache usage for separated
2731 instruction / data caches. */
2732 good_barrier = found_barrier;
2733 break;
2735 else
2737 rtx body = PATTERN (from);
2738 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
2741 /* For the SH1, we generate alignments even after jumps-around-jumps. */
2742 else if (GET_CODE (from) == JUMP_INSN
2743 && ! TARGET_SH2
2744 && ! TARGET_SMALLCODE)
2745 new_align = 4;
2747 if (found_si)
2749 count_si += inc;
2750 if (new_align > si_align)
2752 si_limit -= (count_si - 1) & (new_align - si_align);
2753 si_align = new_align;
2755 count_si = (count_si + new_align - 1) & -new_align;
2757 if (found_hi)
2759 count_hi += inc;
2760 if (new_align > hi_align)
2762 hi_limit -= (count_hi - 1) & (new_align - hi_align);
2763 hi_align = new_align;
2765 count_hi = (count_hi + new_align - 1) & -new_align;
2767 from = NEXT_INSN (from);
2770 if (num_mova)
2772 if (leading_mova)
2774 /* Try as we might, the leading mova is out of range. Change
2775 it into a load (which will become a pcload) and retry. */
2776 SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
2777 INSN_CODE (mova) = -1;
2778 return find_barrier (0, 0, mova);
2780 else
2782 /* Insert the constant pool table before the mova instruction,
2783 to prevent the mova label reference from going out of range. */
2784 from = mova;
2785 good_barrier = found_barrier = barrier_before_mova;
2789 if (found_barrier)
2791 if (good_barrier && next_real_insn (found_barrier))
2792 found_barrier = good_barrier;
2794 else
2796 /* We didn't find a barrier in time to dump our stuff,
2797 so we'll make one. */
2798 rtx label = gen_label_rtx ();
2800 /* If we exceeded the range, then we must back up over the last
2801 instruction we looked at. Otherwise, we just need to undo the
2802 NEXT_INSN at the end of the loop. */
2803 if (count_hi > hi_limit || count_si > si_limit)
2804 from = PREV_INSN (PREV_INSN (from));
2805 else
2806 from = PREV_INSN (from);
2808 /* Walk back to be just before any jump or label.
2809 Putting it before a label reduces the number of times the branch
2810 around the constant pool table will be hit. Putting it before
2811 a jump makes it more likely that the bra delay slot will be
2812 filled. */
2813 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
2814 || GET_CODE (from) == CODE_LABEL)
2815 from = PREV_INSN (from);
2817 from = emit_jump_insn_after (gen_jump (label), from);
2818 JUMP_LABEL (from) = label;
2819 LABEL_NUSES (label) = 1;
2820 found_barrier = emit_barrier_after (from);
2821 emit_label_after (label, found_barrier);
2824 return found_barrier;
2827 /* If the instruction INSN is implemented by a special function, and we can
2828 positively find the register that is used to call the sfunc, and this
2829 register is not used anywhere else in this instruction - except as the
2830 destination of a set, return this register; else, return 0. */
2832 sfunc_uses_reg (insn)
2833 rtx insn;
2835 int i;
2836 rtx pattern, part, reg_part, reg;
2838 if (GET_CODE (insn) != INSN)
2839 return 0;
2840 pattern = PATTERN (insn);
2841 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
2842 return 0;
2844 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
2846 part = XVECEXP (pattern, 0, i);
2847 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
2848 reg_part = part;
2850 if (! reg_part)
2851 return 0;
2852 reg = XEXP (reg_part, 0);
2853 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
2855 part = XVECEXP (pattern, 0, i);
2856 if (part == reg_part || GET_CODE (part) == CLOBBER)
2857 continue;
2858 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
2859 && GET_CODE (SET_DEST (part)) == REG)
2860 ? SET_SRC (part) : part)))
2861 return 0;
2863 return reg;
2866 /* See if the only way in which INSN uses REG is by calling it, or by
2867 setting it while calling it. Set *SET to a SET rtx if the register
2868 is set by INSN. */
2870 static int
2871 noncall_uses_reg (reg, insn, set)
2872 rtx reg;
2873 rtx insn;
2874 rtx *set;
2876 rtx pattern, reg2;
2878 *set = NULL_RTX;
2880 reg2 = sfunc_uses_reg (insn);
2881 if (reg2 && REGNO (reg2) == REGNO (reg))
2883 pattern = single_set (insn);
2884 if (pattern
2885 && GET_CODE (SET_DEST (pattern)) == REG
2886 && REGNO (reg) == REGNO (SET_DEST (pattern)))
2887 *set = pattern;
2888 return 0;
2890 if (GET_CODE (insn) != CALL_INSN)
2892 /* We don't use rtx_equal_p because we don't care if the mode is
2893 different. */
2894 pattern = single_set (insn);
2895 if (pattern
2896 && GET_CODE (SET_DEST (pattern)) == REG
2897 && REGNO (reg) == REGNO (SET_DEST (pattern)))
2899 rtx par, part;
2900 int i;
2902 *set = pattern;
2903 par = PATTERN (insn);
2904 if (GET_CODE (par) == PARALLEL)
2905 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
2907 part = XVECEXP (par, 0, i);
2908 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
2909 return 1;
2911 return reg_mentioned_p (reg, SET_SRC (pattern));
2914 return 1;
2917 pattern = PATTERN (insn);
2919 if (GET_CODE (pattern) == PARALLEL)
2921 int i;
2923 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
2924 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
2925 return 1;
2926 pattern = XVECEXP (pattern, 0, 0);
2929 if (GET_CODE (pattern) == SET)
2931 if (reg_mentioned_p (reg, SET_DEST (pattern)))
2933 /* We don't use rtx_equal_p, because we don't care if the
2934 mode is different. */
2935 if (GET_CODE (SET_DEST (pattern)) != REG
2936 || REGNO (reg) != REGNO (SET_DEST (pattern)))
2937 return 1;
2939 *set = pattern;
2942 pattern = SET_SRC (pattern);
2945 if (GET_CODE (pattern) != CALL
2946 || GET_CODE (XEXP (pattern, 0)) != MEM
2947 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
2948 return 1;
2950 return 0;
2953 /* Given a X, a pattern of an insn or a part of it, return a mask of used
2954 general registers. Bits 0..15 mean that the respective registers
2955 are used as inputs in the instruction. Bits 16..31 mean that the
2956 registers 0..15, respectively, are used as outputs, or are clobbered.
2957 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
2959 regs_used (x, is_dest)
2960 rtx x; int is_dest;
2962 enum rtx_code code;
2963 const char *fmt;
2964 int i, used = 0;
2966 if (! x)
2967 return used;
2968 code = GET_CODE (x);
2969 switch (code)
2971 case REG:
2972 if (REGNO (x) < 16)
2973 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
2974 << (REGNO (x) + is_dest));
2975 return 0;
2976 case SUBREG:
2978 rtx y = SUBREG_REG (x);
2980 if (GET_CODE (y) != REG)
2981 break;
2982 if (REGNO (y) < 16)
2983 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
2984 << (REGNO (y) +
2985 subreg_regno_offset (REGNO (y),
2986 GET_MODE (y),
2987 SUBREG_BYTE (x),
2988 GET_MODE (x)) + is_dest));
2989 return 0;
2991 case SET:
2992 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
2993 case RETURN:
2994 /* If there was a return value, it must have been indicated with USE. */
2995 return 0x00ffff00;
2996 case CLOBBER:
2997 is_dest = 1;
2998 break;
2999 case MEM:
3000 is_dest = 0;
3001 break;
3002 case CALL:
3003 used |= 0x00ff00f0;
3004 break;
3005 default:
3006 break;
3009 fmt = GET_RTX_FORMAT (code);
3011 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3013 if (fmt[i] == 'E')
3015 register int j;
3016 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3017 used |= regs_used (XVECEXP (x, i, j), is_dest);
3019 else if (fmt[i] == 'e')
3020 used |= regs_used (XEXP (x, i), is_dest);
3022 return used;
3025 /* Create an instruction that prevents redirection of a conditional branch
3026 to the destination of the JUMP with address ADDR.
3027 If the branch needs to be implemented as an indirect jump, try to find
3028 a scratch register for it.
3029 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
3030 If any preceding insn that doesn't fit into a delay slot is good enough,
3031 pass 1. Pass 2 if a definite blocking insn is needed.
3032 -1 is used internally to avoid deep recursion.
3033 If a blocking instruction is made or recognized, return it. */
3035 static rtx
3036 gen_block_redirect (jump, addr, need_block)
3037 rtx jump;
3038 int addr, need_block;
3040 int dead = 0;
3041 rtx prev = prev_nonnote_insn (jump);
3042 rtx dest;
3044 /* First, check if we already have an instruction that satisfies our need. */
3045 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
3047 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
3048 return prev;
3049 if (GET_CODE (PATTERN (prev)) == USE
3050 || GET_CODE (PATTERN (prev)) == CLOBBER
3051 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3052 prev = jump;
3053 else if ((need_block &= ~1) < 0)
3054 return prev;
3055 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
3056 need_block = 0;
3058 /* We can't use JUMP_LABEL here because it might be undefined
3059 when not optimizing. */
3060 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
3061 /* If the branch is out of range, try to find a scratch register for it. */
3062 if (optimize
3063 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3064 > 4092 + 4098))
3066 rtx scan;
3067 /* Don't look for the stack pointer as a scratch register,
3068 it would cause trouble if an interrupt occurred. */
3069 unsigned try = 0x7fff, used;
3070 int jump_left = flag_expensive_optimizations + 1;
3072 /* It is likely that the most recent eligible instruction is wanted for
3073 the delay slot. Therefore, find out which registers it uses, and
3074 try to avoid using them. */
3076 for (scan = jump; (scan = PREV_INSN (scan)); )
3078 enum rtx_code code;
3080 if (INSN_DELETED_P (scan))
3081 continue;
3082 code = GET_CODE (scan);
3083 if (code == CODE_LABEL || code == JUMP_INSN)
3084 break;
3085 if (code == INSN
3086 && GET_CODE (PATTERN (scan)) != USE
3087 && GET_CODE (PATTERN (scan)) != CLOBBER
3088 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
3090 try &= ~regs_used (PATTERN (scan), 0);
3091 break;
3094 for (used = dead = 0, scan = JUMP_LABEL (jump);
3095 (scan = NEXT_INSN (scan)); )
3097 enum rtx_code code;
3099 if (INSN_DELETED_P (scan))
3100 continue;
3101 code = GET_CODE (scan);
3102 if (GET_RTX_CLASS (code) == 'i')
3104 used |= regs_used (PATTERN (scan), 0);
3105 if (code == CALL_INSN)
3106 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
3107 dead |= (used >> 16) & ~used;
3108 if (dead & try)
3110 dead &= try;
3111 break;
3113 if (code == JUMP_INSN)
3115 if (jump_left-- && simplejump_p (scan))
3116 scan = JUMP_LABEL (scan);
3117 else
3118 break;
3122 /* Mask out the stack pointer again, in case it was
3123 the only 'free' register we have found. */
3124 dead &= 0x7fff;
3126 /* If the immediate destination is still in range, check for possible
3127 threading with a jump beyond the delay slot insn.
3128 Don't check if we are called recursively; the jump has been or will be
3129 checked in a different invocation then. */
3131 else if (optimize && need_block >= 0)
3133 rtx next = next_active_insn (next_active_insn (dest));
3134 if (next && GET_CODE (next) == JUMP_INSN
3135 && GET_CODE (PATTERN (next)) == SET
3136 && recog_memoized (next) == CODE_FOR_jump)
3138 dest = JUMP_LABEL (next);
3139 if (dest
3140 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3141 > 4092 + 4098))
3142 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
3146 if (dead)
3148 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
3150 /* It would be nice if we could convert the jump into an indirect
3151 jump / far branch right now, and thus exposing all constituent
3152 instructions to further optimization. However, reorg uses
3153 simplejump_p to determine if there is an unconditional jump where
3154 it should try to schedule instructions from the target of the
3155 branch; simplejump_p fails for indirect jumps even if they have
3156 a JUMP_LABEL. */
3157 rtx insn = emit_insn_before (gen_indirect_jump_scratch
3158 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
3159 , jump);
3160 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
3161 return insn;
3163 else if (need_block)
3164 /* We can't use JUMP_LABEL here because it might be undefined
3165 when not optimizing. */
3166 return emit_insn_before (gen_block_branch_redirect
3167 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
3168 , jump);
3169 return prev;
3172 #define CONDJUMP_MIN -252
3173 #define CONDJUMP_MAX 262
3174 struct far_branch
3176 /* A label (to be placed) in front of the jump
3177 that jumps to our ultimate destination. */
3178 rtx near_label;
3179 /* Where we are going to insert it if we cannot move the jump any farther,
3180 or the jump itself if we have picked up an existing jump. */
3181 rtx insert_place;
3182 /* The ultimate destination. */
3183 rtx far_label;
3184 struct far_branch *prev;
3185 /* If the branch has already been created, its address;
3186 else the address of its first prospective user. */
3187 int address;
3190 static void gen_far_branch PARAMS ((struct far_branch *));
3191 enum mdep_reorg_phase_e mdep_reorg_phase;
3192 static void
3193 gen_far_branch (bp)
3194 struct far_branch *bp;
3196 rtx insn = bp->insert_place;
3197 rtx jump;
3198 rtx label = gen_label_rtx ();
3200 emit_label_after (label, insn);
3201 if (bp->far_label)
3203 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
3204 LABEL_NUSES (bp->far_label)++;
3206 else
3207 jump = emit_jump_insn_after (gen_return (), insn);
3208 /* Emit a barrier so that reorg knows that any following instructions
3209 are not reachable via a fall-through path.
3210 But don't do this when not optimizing, since we wouldn't supress the
3211 alignment for the barrier then, and could end up with out-of-range
3212 pc-relative loads. */
3213 if (optimize)
3214 emit_barrier_after (jump);
3215 emit_label_after (bp->near_label, insn);
3216 JUMP_LABEL (jump) = bp->far_label;
3217 if (! invert_jump (insn, label, 1))
3218 abort ();
3219 (emit_insn_after
3220 (gen_stuff_delay_slot
3221 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
3222 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
3223 insn));
3224 /* Prevent reorg from undoing our splits. */
3225 gen_block_redirect (jump, bp->address += 2, 2);
3228 /* Fix up ADDR_DIFF_VECs. */
3229 void
3230 fixup_addr_diff_vecs (first)
3231 rtx first;
3233 rtx insn;
3235 for (insn = first; insn; insn = NEXT_INSN (insn))
3237 rtx vec_lab, pat, prev, prevpat, x, braf_label;
3239 if (GET_CODE (insn) != JUMP_INSN
3240 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
3241 continue;
3242 pat = PATTERN (insn);
3243 vec_lab = XEXP (XEXP (pat, 0), 0);
3245 /* Search the matching casesi_jump_2. */
3246 for (prev = vec_lab; ; prev = PREV_INSN (prev))
3248 if (GET_CODE (prev) != JUMP_INSN)
3249 continue;
3250 prevpat = PATTERN (prev);
3251 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
3252 continue;
3253 x = XVECEXP (prevpat, 0, 1);
3254 if (GET_CODE (x) != USE)
3255 continue;
3256 x = XEXP (x, 0);
3257 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
3258 break;
3261 /* Emit the reference label of the braf where it belongs, right after
3262 the casesi_jump_2 (i.e. braf). */
3263 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
3264 emit_label_after (braf_label, prev);
3266 /* Fix up the ADDR_DIF_VEC to be relative
3267 to the reference address of the braf. */
3268 XEXP (XEXP (pat, 0), 0) = braf_label;
3272 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
3273 a barrier. Return the base 2 logarithm of the desired alignment. */
3275 barrier_align (barrier_or_label)
3276 rtx barrier_or_label;
3278 rtx next = next_real_insn (barrier_or_label), pat, prev;
3279 int slot, credit, jump_to_next;
3281 if (! next)
3282 return 0;
3284 pat = PATTERN (next);
3286 if (GET_CODE (pat) == ADDR_DIFF_VEC)
3287 return 2;
3289 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
3290 /* This is a barrier in front of a constant table. */
3291 return 0;
3293 prev = prev_real_insn (barrier_or_label);
3294 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
3296 pat = PATTERN (prev);
3297 /* If this is a very small table, we want to keep the alignment after
3298 the table to the minimum for proper code alignment. */
3299 return ((TARGET_SMALLCODE
3300 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
3301 <= (unsigned)1 << (CACHE_LOG - 2)))
3302 ? 1 << TARGET_SHMEDIA : CACHE_LOG);
3305 if (TARGET_SMALLCODE)
3306 return 0;
3308 if (! TARGET_SH2 || ! optimize)
3309 return CACHE_LOG;
3311 /* When fixing up pcloads, a constant table might be inserted just before
3312 the basic block that ends with the barrier. Thus, we can't trust the
3313 instruction lengths before that. */
3314 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
3316 /* Check if there is an immediately preceding branch to the insn beyond
3317 the barrier. We must weight the cost of discarding useful information
3318 from the current cache line when executing this branch and there is
3319 an alignment, against that of fetching unneeded insn in front of the
3320 branch target when there is no alignment. */
3322 /* There are two delay_slot cases to consider. One is the simple case
3323 where the preceding branch is to the insn beyond the barrier (simple
3324 delay slot filling), and the other is where the preceding branch has
3325 a delay slot that is a duplicate of the insn after the barrier
3326 (fill_eager_delay_slots) and the branch is to the insn after the insn
3327 after the barrier. */
3329 /* PREV is presumed to be the JUMP_INSN for the barrier under
3330 investigation. Skip to the insn before it. */
3331 prev = prev_real_insn (prev);
3333 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
3334 credit >= 0 && prev && GET_CODE (prev) == INSN;
3335 prev = prev_real_insn (prev))
3337 jump_to_next = 0;
3338 if (GET_CODE (PATTERN (prev)) == USE
3339 || GET_CODE (PATTERN (prev)) == CLOBBER)
3340 continue;
3341 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
3343 prev = XVECEXP (PATTERN (prev), 0, 1);
3344 if (INSN_UID (prev) == INSN_UID (next))
3346 /* Delay slot was filled with insn at jump target. */
3347 jump_to_next = 1;
3348 continue;
3352 if (slot &&
3353 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3354 slot = 0;
3355 credit -= get_attr_length (prev);
3357 if (prev
3358 && GET_CODE (prev) == JUMP_INSN
3359 && JUMP_LABEL (prev))
3361 rtx x;
3362 if (jump_to_next
3363 || next_real_insn (JUMP_LABEL (prev)) == next
3364 /* If relax_delay_slots() decides NEXT was redundant
3365 with some previous instruction, it will have
3366 redirected PREV's jump to the following insn. */
3367 || JUMP_LABEL (prev) == next_nonnote_insn (next)
3368 /* There is no upper bound on redundant instructions
3369 that might have been skipped, but we must not put an
3370 alignment where none had been before. */
3371 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
3372 (INSN_P (x)
3373 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
3374 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch))))
3376 rtx pat = PATTERN (prev);
3377 if (GET_CODE (pat) == PARALLEL)
3378 pat = XVECEXP (pat, 0, 0);
3379 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
3380 return 0;
3385 return CACHE_LOG;
3388 /* If we are inside a phony loop, almost any kind of label can turn up as the
3389 first one in the loop. Aligning a braf label causes incorrect switch
3390 destination addresses; we can detect braf labels because they are
3391 followed by a BARRIER.
3392 Applying loop alignment to small constant or switch tables is a waste
3393 of space, so we suppress this too. */
3395 sh_loop_align (label)
3396 rtx label;
3398 rtx next = label;
3401 next = next_nonnote_insn (next);
3402 while (next && GET_CODE (next) == CODE_LABEL);
3404 if (! next
3405 || ! INSN_P (next)
3406 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
3407 || recog_memoized (next) == CODE_FOR_consttable_2)
3408 return 0;
3410 if (TARGET_SH5)
3411 return 3;
3413 return 2;
3416 /* Exported to toplev.c.
3418 Do a final pass over the function, just before delayed branch
3419 scheduling. */
3421 void
3422 machine_dependent_reorg (first)
3423 rtx first;
3425 rtx insn, mova;
3426 int num_mova;
3427 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
3428 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
3430 /* We must split call insns before introducing `mova's. If we're
3431 optimizing, they'll have already been split. Otherwise, make
3432 sure we don't split them too late. */
3433 if (! optimize)
3434 split_all_insns_noflow ();
3436 if (TARGET_SHMEDIA)
3437 return;
3439 /* If relaxing, generate pseudo-ops to associate function calls with
3440 the symbols they call. It does no harm to not generate these
3441 pseudo-ops. However, when we can generate them, it enables to
3442 linker to potentially relax the jsr to a bsr, and eliminate the
3443 register load and, possibly, the constant pool entry. */
3445 mdep_reorg_phase = SH_INSERT_USES_LABELS;
3446 if (TARGET_RELAX)
3448 /* Remove all REG_LABEL notes. We want to use them for our own
3449 purposes. This works because none of the remaining passes
3450 need to look at them.
3452 ??? But it may break in the future. We should use a machine
3453 dependent REG_NOTE, or some other approach entirely. */
3454 for (insn = first; insn; insn = NEXT_INSN (insn))
3456 if (INSN_P (insn))
3458 rtx note;
3460 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
3461 remove_note (insn, note);
3465 for (insn = first; insn; insn = NEXT_INSN (insn))
3467 rtx pattern, reg, link, set, scan, dies, label;
3468 int rescan = 0, foundinsn = 0;
3470 if (GET_CODE (insn) == CALL_INSN)
3472 pattern = PATTERN (insn);
3474 if (GET_CODE (pattern) == PARALLEL)
3475 pattern = XVECEXP (pattern, 0, 0);
3476 if (GET_CODE (pattern) == SET)
3477 pattern = SET_SRC (pattern);
3479 if (GET_CODE (pattern) != CALL
3480 || GET_CODE (XEXP (pattern, 0)) != MEM)
3481 continue;
3483 reg = XEXP (XEXP (pattern, 0), 0);
3485 else
3487 reg = sfunc_uses_reg (insn);
3488 if (! reg)
3489 continue;
3492 if (GET_CODE (reg) != REG)
3493 continue;
3495 /* This is a function call via REG. If the only uses of REG
3496 between the time that it is set and the time that it dies
3497 are in function calls, then we can associate all the
3498 function calls with the setting of REG. */
3500 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
3502 if (REG_NOTE_KIND (link) != 0)
3503 continue;
3504 set = single_set (XEXP (link, 0));
3505 if (set && rtx_equal_p (reg, SET_DEST (set)))
3507 link = XEXP (link, 0);
3508 break;
3512 if (! link)
3514 /* ??? Sometimes global register allocation will have
3515 deleted the insn pointed to by LOG_LINKS. Try
3516 scanning backward to find where the register is set. */
3517 for (scan = PREV_INSN (insn);
3518 scan && GET_CODE (scan) != CODE_LABEL;
3519 scan = PREV_INSN (scan))
3521 if (! INSN_P (scan))
3522 continue;
3524 if (! reg_mentioned_p (reg, scan))
3525 continue;
3527 if (noncall_uses_reg (reg, scan, &set))
3528 break;
3530 if (set)
3532 link = scan;
3533 break;
3538 if (! link)
3539 continue;
3541 /* The register is set at LINK. */
3543 /* We can only optimize the function call if the register is
3544 being set to a symbol. In theory, we could sometimes
3545 optimize calls to a constant location, but the assembler
3546 and linker do not support that at present. */
3547 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
3548 && GET_CODE (SET_SRC (set)) != LABEL_REF)
3549 continue;
3551 /* Scan forward from LINK to the place where REG dies, and
3552 make sure that the only insns which use REG are
3553 themselves function calls. */
3555 /* ??? This doesn't work for call targets that were allocated
3556 by reload, since there may not be a REG_DEAD note for the
3557 register. */
3559 dies = NULL_RTX;
3560 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
3562 rtx scanset;
3564 /* Don't try to trace forward past a CODE_LABEL if we haven't
3565 seen INSN yet. Ordinarily, we will only find the setting insn
3566 in LOG_LINKS if it is in the same basic block. However,
3567 cross-jumping can insert code labels in between the load and
3568 the call, and can result in situations where a single call
3569 insn may have two targets depending on where we came from. */
3571 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
3572 break;
3574 if (! INSN_P (scan))
3575 continue;
3577 /* Don't try to trace forward past a JUMP. To optimize
3578 safely, we would have to check that all the
3579 instructions at the jump destination did not use REG. */
3581 if (GET_CODE (scan) == JUMP_INSN)
3582 break;
3584 if (! reg_mentioned_p (reg, scan))
3585 continue;
3587 if (noncall_uses_reg (reg, scan, &scanset))
3588 break;
3590 if (scan == insn)
3591 foundinsn = 1;
3593 if (scan != insn
3594 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
3596 /* There is a function call to this register other
3597 than the one we are checking. If we optimize
3598 this call, we need to rescan again below. */
3599 rescan = 1;
3602 /* ??? We shouldn't have to worry about SCANSET here.
3603 We should just be able to check for a REG_DEAD note
3604 on a function call. However, the REG_DEAD notes are
3605 apparently not dependable around libcalls; c-torture
3606 execute/920501-2 is a test case. If SCANSET is set,
3607 then this insn sets the register, so it must have
3608 died earlier. Unfortunately, this will only handle
3609 the cases in which the register is, in fact, set in a
3610 later insn. */
3612 /* ??? We shouldn't have to use FOUNDINSN here.
3613 However, the LOG_LINKS fields are apparently not
3614 entirely reliable around libcalls;
3615 newlib/libm/math/e_pow.c is a test case. Sometimes
3616 an insn will appear in LOG_LINKS even though it is
3617 not the most recent insn which sets the register. */
3619 if (foundinsn
3620 && (scanset
3621 || find_reg_note (scan, REG_DEAD, reg)))
3623 dies = scan;
3624 break;
3628 if (! dies)
3630 /* Either there was a branch, or some insn used REG
3631 other than as a function call address. */
3632 continue;
3635 /* Create a code label, and put it in a REG_LABEL note on
3636 the insn which sets the register, and on each call insn
3637 which uses the register. In final_prescan_insn we look
3638 for the REG_LABEL notes, and output the appropriate label
3639 or pseudo-op. */
3641 label = gen_label_rtx ();
3642 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL, label,
3643 REG_NOTES (link));
3644 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, label,
3645 REG_NOTES (insn));
3646 if (rescan)
3648 scan = link;
3651 rtx reg2;
3653 scan = NEXT_INSN (scan);
3654 if (scan != insn
3655 && ((GET_CODE (scan) == CALL_INSN
3656 && reg_mentioned_p (reg, scan))
3657 || ((reg2 = sfunc_uses_reg (scan))
3658 && REGNO (reg2) == REGNO (reg))))
3659 REG_NOTES (scan)
3660 = gen_rtx_INSN_LIST (REG_LABEL, label, REG_NOTES (scan));
3662 while (scan != dies);
3667 if (TARGET_SH2)
3668 fixup_addr_diff_vecs (first);
3670 if (optimize)
3672 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
3673 shorten_branches (first);
3675 /* Scan the function looking for move instructions which have to be
3676 changed to pc-relative loads and insert the literal tables. */
3678 mdep_reorg_phase = SH_FIXUP_PCLOAD;
3679 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
3681 if (mova_p (insn))
3683 if (! num_mova++)
3684 mova = insn;
3686 else if (GET_CODE (insn) == JUMP_INSN
3687 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
3688 && num_mova)
3690 rtx scan;
3691 int total;
3693 num_mova--;
3695 /* Some code might have been inserted between the mova and
3696 its ADDR_DIFF_VEC. Check if the mova is still in range. */
3697 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
3698 total += get_attr_length (scan);
3700 /* range of mova is 1020, add 4 because pc counts from address of
3701 second instruction after this one, subtract 2 in case pc is 2
3702 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
3703 cancels out with alignment effects of the mova itself. */
3704 if (total > 1022)
3706 /* Change the mova into a load, and restart scanning
3707 there. broken_move will then return true for mova. */
3708 SET_SRC (PATTERN (mova))
3709 = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
3710 INSN_CODE (mova) = -1;
3711 insn = mova;
3714 if (broken_move (insn))
3716 rtx scan;
3717 /* Scan ahead looking for a barrier to stick the constant table
3718 behind. */
3719 rtx barrier = find_barrier (num_mova, mova, insn);
3720 rtx last_float_move, last_float = 0, *last_float_addr;
3721 int may_need_align = 1;
3723 if (num_mova && ! mova_p (mova))
3725 /* find_barrier had to change the first mova into a
3726 pcload; thus, we have to start with this new pcload. */
3727 insn = mova;
3728 num_mova = 0;
3730 /* Now find all the moves between the points and modify them. */
3731 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
3733 if (GET_CODE (scan) == CODE_LABEL)
3734 last_float = 0;
3735 if (broken_move (scan))
3737 rtx *patp = &PATTERN (scan), pat = *patp;
3738 rtx src, dst;
3739 rtx lab;
3740 rtx newsrc;
3741 enum machine_mode mode;
3743 if (GET_CODE (pat) == PARALLEL)
3744 patp = &XVECEXP (pat, 0, 0), pat = *patp;
3745 src = SET_SRC (pat);
3746 dst = SET_DEST (pat);
3747 mode = GET_MODE (dst);
3749 if (mode == SImode && hi_const (src)
3750 && REGNO (dst) != FPUL_REG)
3752 int offset = 0;
3754 mode = HImode;
3755 while (GET_CODE (dst) == SUBREG)
3757 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
3758 GET_MODE (SUBREG_REG (dst)),
3759 SUBREG_BYTE (dst),
3760 GET_MODE (dst));
3761 dst = SUBREG_REG (dst);
3763 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
3766 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
3768 /* This must be an insn that clobbers r0. */
3769 rtx clobber = XVECEXP (PATTERN (scan), 0,
3770 XVECLEN (PATTERN (scan), 0) - 1);
3772 if (GET_CODE (clobber) != CLOBBER
3773 || ! rtx_equal_p (XEXP (clobber, 0), r0_rtx))
3774 abort ();
3776 if (last_float
3777 && reg_set_between_p (r0_rtx, last_float_move, scan))
3778 last_float = 0;
3779 if (TARGET_SHCOMPACT)
3781 /* The first SFmode constant after a DFmode
3782 constant may be pulled before a sequence
3783 of DFmode constants, so the second SFmode
3784 needs a label, just in case. */
3785 if (GET_MODE_SIZE (mode) == 4)
3787 if (last_float && may_need_align)
3788 last_float = 0;
3789 may_need_align = 0;
3791 if (last_float
3792 && (GET_MODE_SIZE (GET_MODE (last_float))
3793 != GET_MODE_SIZE (mode)))
3795 last_float = 0;
3796 if (GET_MODE_SIZE (mode) == 4)
3797 may_need_align = 1;
3800 lab = add_constant (src, mode, last_float);
3801 if (lab)
3802 emit_insn_before (gen_mova (lab), scan);
3803 else
3805 /* There will be a REG_UNUSED note for r0 on
3806 LAST_FLOAT_MOVE; we have to change it to REG_INC,
3807 lest reorg:mark_target_live_regs will not
3808 consider r0 to be used, and we end up with delay
3809 slot insn in front of SCAN that clobbers r0. */
3810 rtx note
3811 = find_regno_note (last_float_move, REG_UNUSED, 0);
3813 /* If we are not optimizing, then there may not be
3814 a note. */
3815 if (note)
3816 PUT_MODE (note, REG_INC);
3818 *last_float_addr = r0_inc_rtx;
3820 last_float_move = scan;
3821 last_float = src;
3822 newsrc = gen_rtx (MEM, mode,
3823 (((TARGET_SH4 && ! TARGET_FMOVD)
3824 || REGNO (dst) == FPUL_REG)
3825 ? r0_inc_rtx
3826 : r0_rtx));
3827 last_float_addr = &XEXP (newsrc, 0);
3829 /* Remove the clobber of r0. */
3830 XEXP (clobber, 0) = gen_rtx_SCRATCH (Pmode);
3831 RTX_UNCHANGING_P (newsrc) = 1;
3833 /* This is a mova needing a label. Create it. */
3834 else if (GET_CODE (src) == UNSPEC
3835 && XINT (src, 1) == UNSPEC_MOVA
3836 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
3838 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
3839 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
3840 newsrc = gen_rtx_UNSPEC (SImode,
3841 gen_rtvec (1, newsrc),
3842 UNSPEC_MOVA);
3844 else
3846 lab = add_constant (src, mode, 0);
3847 newsrc = gen_rtx_MEM (mode,
3848 gen_rtx_LABEL_REF (VOIDmode, lab));
3849 RTX_UNCHANGING_P (newsrc) = 1;
3851 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
3852 INSN_CODE (scan) = -1;
3855 dump_table (barrier);
3856 insn = barrier;
3860 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
3861 INSN_ADDRESSES_FREE ();
3862 split_branches (first);
3864 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
3865 also has an effect on the register that holds the address of the sfunc.
3866 Insert an extra dummy insn in front of each sfunc that pretends to
3867 use this register. */
3868 if (flag_delayed_branch)
3870 for (insn = first; insn; insn = NEXT_INSN (insn))
3872 rtx reg = sfunc_uses_reg (insn);
3874 if (! reg)
3875 continue;
3876 emit_insn_before (gen_use_sfunc_addr (reg), insn);
3879 #if 0
3880 /* fpscr is not actually a user variable, but we pretend it is for the
3881 sake of the previous optimization passes, since we want it handled like
3882 one. However, we don't have any debugging information for it, so turn
3883 it into a non-user variable now. */
3884 if (TARGET_SH4)
3885 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
3886 #endif
3887 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
3891 get_dest_uid (label, max_uid)
3892 rtx label;
3893 int max_uid;
3895 rtx dest = next_real_insn (label);
3896 int dest_uid;
3897 if (! dest)
3898 /* This can happen for an undefined label. */
3899 return 0;
3900 dest_uid = INSN_UID (dest);
3901 /* If this is a newly created branch redirection blocking instruction,
3902 we cannot index the branch_uid or insn_addresses arrays with its
3903 uid. But then, we won't need to, because the actual destination is
3904 the following branch. */
3905 while (dest_uid >= max_uid)
3907 dest = NEXT_INSN (dest);
3908 dest_uid = INSN_UID (dest);
3910 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
3911 return 0;
3912 return dest_uid;
3915 /* Split condbranches that are out of range. Also add clobbers for
3916 scratch registers that are needed in far jumps.
3917 We do this before delay slot scheduling, so that it can take our
3918 newly created instructions into account. It also allows us to
3919 find branches with common targets more easily. */
3921 static void
3922 split_branches (first)
3923 rtx first;
3925 rtx insn;
3926 struct far_branch **uid_branch, *far_branch_list = 0;
3927 int max_uid = get_max_uid ();
3929 /* Find out which branches are out of range. */
3930 shorten_branches (first);
3932 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
3933 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
3935 for (insn = first; insn; insn = NEXT_INSN (insn))
3936 if (! INSN_P (insn))
3937 continue;
3938 else if (INSN_DELETED_P (insn))
3940 /* Shorten_branches would split this instruction again,
3941 so transform it into a note. */
3942 PUT_CODE (insn, NOTE);
3943 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
3944 NOTE_SOURCE_FILE (insn) = 0;
3946 else if (GET_CODE (insn) == JUMP_INSN
3947 /* Don't mess with ADDR_DIFF_VEC */
3948 && (GET_CODE (PATTERN (insn)) == SET
3949 || GET_CODE (PATTERN (insn)) == RETURN))
3951 enum attr_type type = get_attr_type (insn);
3952 if (type == TYPE_CBRANCH)
3954 rtx next, beyond;
3956 if (get_attr_length (insn) > 4)
3958 rtx src = SET_SRC (PATTERN (insn));
3959 rtx olabel = XEXP (XEXP (src, 1), 0);
3960 int addr = INSN_ADDRESSES (INSN_UID (insn));
3961 rtx label = 0;
3962 int dest_uid = get_dest_uid (olabel, max_uid);
3963 struct far_branch *bp = uid_branch[dest_uid];
3965 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
3966 the label if the LABEL_NUSES count drops to zero. There is
3967 always a jump_optimize pass that sets these values, but it
3968 proceeds to delete unreferenced code, and then if not
3969 optimizing, to un-delete the deleted instructions, thus
3970 leaving labels with too low uses counts. */
3971 if (! optimize)
3973 JUMP_LABEL (insn) = olabel;
3974 LABEL_NUSES (olabel)++;
3976 if (! bp)
3978 bp = (struct far_branch *) alloca (sizeof *bp);
3979 uid_branch[dest_uid] = bp;
3980 bp->prev = far_branch_list;
3981 far_branch_list = bp;
3982 bp->far_label
3983 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
3984 LABEL_NUSES (bp->far_label)++;
3986 else
3988 label = bp->near_label;
3989 if (! label && bp->address - addr >= CONDJUMP_MIN)
3991 rtx block = bp->insert_place;
3993 if (GET_CODE (PATTERN (block)) == RETURN)
3994 block = PREV_INSN (block);
3995 else
3996 block = gen_block_redirect (block,
3997 bp->address, 2);
3998 label = emit_label_after (gen_label_rtx (),
3999 PREV_INSN (block));
4000 bp->near_label = label;
4002 else if (label && ! NEXT_INSN (label))
4004 if (addr + 2 - bp->address <= CONDJUMP_MAX)
4005 bp->insert_place = insn;
4006 else
4007 gen_far_branch (bp);
4010 if (! label
4011 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
4013 bp->near_label = label = gen_label_rtx ();
4014 bp->insert_place = insn;
4015 bp->address = addr;
4017 if (! redirect_jump (insn, label, 1))
4018 abort ();
4020 else
4022 /* get_attr_length (insn) == 2 */
4023 /* Check if we have a pattern where reorg wants to redirect
4024 the branch to a label from an unconditional branch that
4025 is too far away. */
4026 /* We can't use JUMP_LABEL here because it might be undefined
4027 when not optimizing. */
4028 /* A syntax error might cause beyond to be NULL_RTX. */
4029 beyond
4030 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
4031 0));
4033 if (beyond
4034 && (GET_CODE (beyond) == JUMP_INSN
4035 || ((beyond = next_active_insn (beyond))
4036 && GET_CODE (beyond) == JUMP_INSN))
4037 && GET_CODE (PATTERN (beyond)) == SET
4038 && recog_memoized (beyond) == CODE_FOR_jump
4039 && ((INSN_ADDRESSES
4040 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
4041 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4042 > 252 + 258 + 2))
4043 gen_block_redirect (beyond,
4044 INSN_ADDRESSES (INSN_UID (beyond)), 1);
4047 next = next_active_insn (insn);
4049 if ((GET_CODE (next) == JUMP_INSN
4050 || GET_CODE (next = next_active_insn (next)) == JUMP_INSN)
4051 && GET_CODE (PATTERN (next)) == SET
4052 && recog_memoized (next) == CODE_FOR_jump
4053 && ((INSN_ADDRESSES
4054 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
4055 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4056 > 252 + 258 + 2))
4057 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
4059 else if (type == TYPE_JUMP || type == TYPE_RETURN)
4061 int addr = INSN_ADDRESSES (INSN_UID (insn));
4062 rtx far_label = 0;
4063 int dest_uid = 0;
4064 struct far_branch *bp;
4066 if (type == TYPE_JUMP)
4068 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
4069 dest_uid = get_dest_uid (far_label, max_uid);
4070 if (! dest_uid)
4072 /* Parse errors can lead to labels outside
4073 the insn stream. */
4074 if (! NEXT_INSN (far_label))
4075 continue;
4077 if (! optimize)
4079 JUMP_LABEL (insn) = far_label;
4080 LABEL_NUSES (far_label)++;
4082 redirect_jump (insn, NULL_RTX, 1);
4083 far_label = 0;
4086 bp = uid_branch[dest_uid];
4087 if (! bp)
4089 bp = (struct far_branch *) alloca (sizeof *bp);
4090 uid_branch[dest_uid] = bp;
4091 bp->prev = far_branch_list;
4092 far_branch_list = bp;
4093 bp->near_label = 0;
4094 bp->far_label = far_label;
4095 if (far_label)
4096 LABEL_NUSES (far_label)++;
4098 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
4099 if (addr - bp->address <= CONDJUMP_MAX)
4100 emit_label_after (bp->near_label, PREV_INSN (insn));
4101 else
4103 gen_far_branch (bp);
4104 bp->near_label = 0;
4106 else
4107 bp->near_label = 0;
4108 bp->address = addr;
4109 bp->insert_place = insn;
4110 if (! far_label)
4111 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
4112 else
4113 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
4116 /* Generate all pending far branches,
4117 and free our references to the far labels. */
4118 while (far_branch_list)
4120 if (far_branch_list->near_label
4121 && ! NEXT_INSN (far_branch_list->near_label))
4122 gen_far_branch (far_branch_list);
4123 if (optimize
4124 && far_branch_list->far_label
4125 && ! --LABEL_NUSES (far_branch_list->far_label))
4126 delete_insn (far_branch_list->far_label);
4127 far_branch_list = far_branch_list->prev;
4130 /* Instruction length information is no longer valid due to the new
4131 instructions that have been generated. */
4132 init_insn_lengths ();
4135 /* Dump out instruction addresses, which is useful for debugging the
4136 constant pool table stuff.
4138 If relaxing, output the label and pseudo-ops used to link together
4139 calls and the instruction which set the registers. */
4141 /* ??? This is unnecessary, and probably should be deleted. This makes
4142 the insn_addresses declaration above unnecessary. */
4144 /* ??? The addresses printed by this routine for insns are nonsense for
4145 insns which are inside of a sequence where none of the inner insns have
4146 variable length. This is because the second pass of shorten_branches
4147 does not bother to update them. */
4149 void
4150 final_prescan_insn (insn, opvec, noperands)
4151 rtx insn;
4152 rtx *opvec ATTRIBUTE_UNUSED;
4153 int noperands ATTRIBUTE_UNUSED;
4155 if (TARGET_DUMPISIZE)
4156 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
4158 if (TARGET_RELAX)
4160 rtx note;
4162 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
4163 if (note)
4165 rtx pattern;
4167 pattern = PATTERN (insn);
4168 if (GET_CODE (pattern) == PARALLEL)
4169 pattern = XVECEXP (pattern, 0, 0);
4170 if (GET_CODE (pattern) == CALL
4171 || (GET_CODE (pattern) == SET
4172 && (GET_CODE (SET_SRC (pattern)) == CALL
4173 || get_attr_type (insn) == TYPE_SFUNC)))
4174 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
4175 CODE_LABEL_NUMBER (XEXP (note, 0)));
4176 else if (GET_CODE (pattern) == SET)
4177 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
4178 CODE_LABEL_NUMBER (XEXP (note, 0)));
4179 else
4180 abort ();
4185 /* Dump out any constants accumulated in the final pass. These will
4186 only be labels. */
4188 const char *
4189 output_jump_label_table ()
4191 int i;
4193 if (pool_size)
4195 fprintf (asm_out_file, "\t.align 2\n");
4196 for (i = 0; i < pool_size; i++)
4198 pool_node *p = &pool_vector[i];
4200 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
4201 CODE_LABEL_NUMBER (p->label));
4202 output_asm_insn (".long %O0", &p->value);
4204 pool_size = 0;
4207 return "";
4210 /* A full frame looks like:
4212 arg-5
4213 arg-4
4214 [ if current_function_anonymous_args
4215 arg-3
4216 arg-2
4217 arg-1
4218 arg-0 ]
4219 saved-fp
4220 saved-r10
4221 saved-r11
4222 saved-r12
4223 saved-pr
4224 local-n
4226 local-1
4227 local-0 <- fp points here. */
4229 /* Number of bytes pushed for anonymous args, used to pass information
4230 between expand_prologue and expand_epilogue. */
4232 static int extra_push;
4234 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register
4235 to be adjusted, and TEMP, if nonnegative, holds the register number
4236 of a general register that we may clobber. */
4238 static void
4239 output_stack_adjust (size, reg, temp, emit_fn)
4240 int size;
4241 rtx reg;
4242 int temp;
4243 rtx (*emit_fn) PARAMS ((rtx));
4245 if (size)
4247 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
4249 if (size % align)
4250 abort ();
4252 if (CONST_OK_FOR_ADD (size))
4253 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
4254 /* Try to do it with two partial adjustments; however, we must make
4255 sure that the stack is properly aligned at all times, in case
4256 an interrupt occurs between the two partial adjustments. */
4257 else if (CONST_OK_FOR_ADD (size / 2 & -align)
4258 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
4260 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
4261 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
4263 else
4265 rtx const_reg;
4266 rtx insn;
4268 /* If TEMP is invalid, we could temporarily save a general
4269 register to MACL. However, there is currently no need
4270 to handle this case, so just abort when we see it. */
4271 if (temp < 0)
4272 abort ();
4273 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
4275 /* If SIZE is negative, subtract the positive value.
4276 This sometimes allows a constant pool entry to be shared
4277 between prologue and epilogue code. */
4278 if (size < 0)
4280 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
4281 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
4283 else
4285 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
4286 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
4288 if (emit_fn == frame_insn)
4289 REG_NOTES (insn)
4290 = (gen_rtx_EXPR_LIST
4291 (REG_FRAME_RELATED_EXPR,
4292 gen_rtx_SET (VOIDmode, reg,
4293 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
4294 REG_NOTES (insn)));
4299 static rtx
4300 frame_insn (x)
4301 rtx x;
4303 x = emit_insn (x);
4304 RTX_FRAME_RELATED_P (x) = 1;
4305 return x;
4308 /* Output RTL to push register RN onto the stack. */
4310 static rtx
4311 push (rn)
4312 int rn;
4314 rtx x;
4315 if (rn == FPUL_REG)
4316 x = gen_push_fpul ();
4317 else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
4318 && FP_OR_XD_REGISTER_P (rn))
4320 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
4321 return NULL_RTX;
4322 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
4324 else if (TARGET_SH3E && FP_REGISTER_P (rn))
4325 x = gen_push_e (gen_rtx_REG (SFmode, rn));
4326 else
4327 x = gen_push (gen_rtx_REG (SImode, rn));
4329 x = frame_insn (x);
4330 REG_NOTES (x)
4331 = gen_rtx_EXPR_LIST (REG_INC,
4332 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
4333 return x;
4336 /* Output RTL to pop register RN from the stack. */
4338 static void
4339 pop (rn)
4340 int rn;
4342 rtx x;
4343 if (rn == FPUL_REG)
4344 x = gen_pop_fpul ();
4345 else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
4346 && FP_OR_XD_REGISTER_P (rn))
4348 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
4349 return;
4350 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
4352 else if (TARGET_SH3E && FP_REGISTER_P (rn))
4353 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
4354 else
4355 x = gen_pop (gen_rtx_REG (SImode, rn));
4357 x = emit_insn (x);
4358 REG_NOTES (x)
4359 = gen_rtx_EXPR_LIST (REG_INC,
4360 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
4363 /* Generate code to push the regs specified in the mask. */
4365 static void
4366 push_regs (mask)
4367 HOST_WIDE_INT *mask;
4369 int i;
4371 /* Push PR last; this gives better latencies after the prologue, and
4372 candidates for the return delay slot when there are no general
4373 registers pushed. */
4374 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4375 if (i != PR_REG && mask[i / 32] & (1 << (i % 32)))
4376 push (i);
4377 if (mask[PR_REG / 32] & (1 << (PR_REG % 32)))
4378 push (PR_REG);
4381 /* Work out the registers which need to be saved, both as a mask and a
4382 count of saved words.
4384 If doing a pragma interrupt function, then push all regs used by the
4385 function, and if we call another function (we can tell by looking at PR),
4386 make sure that all the regs it clobbers are safe too. */
4388 static void
4389 calc_live_regs (count_ptr, live_regs_mask)
4390 int *count_ptr;
4391 HOST_WIDE_INT *live_regs_mask;
4393 int reg;
4394 int count;
4395 int interrupt_handler;
4396 int pr_live;
4398 interrupt_handler = sh_cfun_interrupt_handler_p ();
4400 for (count = 0; 32 * count < FIRST_PSEUDO_REGISTER; count++)
4401 live_regs_mask[count] = 0;
4402 /* If we can save a lot of saves by switching to double mode, do that. */
4403 if (TARGET_SH4 && TARGET_FMOVD && TARGET_FPU_SINGLE)
4404 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
4405 if (regs_ever_live[reg] && regs_ever_live[reg+1]
4406 && (! call_used_regs[reg] || (interrupt_handler && ! pragma_trapa))
4407 && ++count > 2)
4409 target_flags &= ~FPU_SINGLE_BIT;
4410 break;
4412 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
4413 knows how to use it. That means the pseudo originally allocated for
4414 the initial value can become the PR_MEDIA_REG hard register, as seen for
4415 execute/20010122-1.c:test9. */
4416 if (TARGET_SHMEDIA)
4417 pr_live = regs_ever_live[PR_MEDIA_REG];
4418 else
4420 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
4421 pr_live = (pr_initial
4422 ? REGNO (pr_initial) != (PR_REG) : regs_ever_live[PR_REG]);
4424 /* Force PR to be live if the prologue has to call the SHmedia
4425 argument decoder or register saver. */
4426 if (TARGET_SHCOMPACT
4427 && ((current_function_args_info.call_cookie
4428 & ~ CALL_COOKIE_RET_TRAMP (1))
4429 || current_function_has_nonlocal_label))
4430 pr_live = 1;
4431 for (count = 0, reg = FIRST_PSEUDO_REGISTER - 1; reg >= 0; reg--)
4433 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
4434 ? pr_live
4435 : (interrupt_handler && ! pragma_trapa)
4436 ? (/* Need to save all the regs ever live. */
4437 (regs_ever_live[reg]
4438 || (call_used_regs[reg]
4439 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG)
4440 && pr_live))
4441 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
4442 && reg != RETURN_ADDRESS_POINTER_REGNUM
4443 && reg != T_REG && reg != GBR_REG)
4444 : (/* Only push those regs which are used and need to be saved. */
4445 regs_ever_live[reg] && ! call_used_regs[reg]))
4447 live_regs_mask[reg / 32] |= 1 << (reg % 32);
4448 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
4450 if ((TARGET_SH4 || TARGET_SH5) && TARGET_FMOVD
4451 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
4453 if (FP_REGISTER_P (reg))
4455 if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
4457 live_regs_mask[(reg ^ 1) / 32] |= 1 << ((reg ^ 1) % 32);
4458 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
4461 else if (XD_REGISTER_P (reg))
4463 /* Must switch to double mode to access these registers. */
4464 target_flags &= ~FPU_SINGLE_BIT;
4470 *count_ptr = count;
4473 /* Code to generate prologue and epilogue sequences */
4475 /* PUSHED is the number of bytes that are bing pushed on the
4476 stack for register saves. Return the frame size, padded
4477 appropriately so that the stack stays properly aligned. */
4478 static HOST_WIDE_INT
4479 rounded_frame_size (pushed)
4480 int pushed;
4482 HOST_WIDE_INT size = get_frame_size ();
4483 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
4485 return ((size + pushed + align - 1) & -align) - pushed;
4488 /* Choose a call-clobbered target-branch register that remains
4489 unchanged along the whole function. We set it up as the return
4490 value in the prologue. */
4492 sh_media_register_for_return ()
4494 int regno;
4495 int tr0_used;
4497 if (! current_function_is_leaf)
4498 return -1;
4500 tr0_used = flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM];
4502 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
4503 if (call_used_regs[regno] && ! regs_ever_live[regno])
4504 return regno;
4506 return -1;
4509 void
4510 sh_expand_prologue ()
4512 HOST_WIDE_INT live_regs_mask[(FIRST_PSEUDO_REGISTER + 31) / 32];
4513 int d, i;
4514 int d_rounding = 0;
4515 int save_flags = target_flags;
4517 current_function_interrupt = sh_cfun_interrupt_handler_p ();
4519 /* We have pretend args if we had an object sent partially in registers
4520 and partially on the stack, e.g. a large structure. */
4521 output_stack_adjust (-current_function_pretend_args_size
4522 - current_function_args_info.stack_regs * 8,
4523 stack_pointer_rtx, TARGET_SH5 ? 0 : 1, frame_insn);
4525 extra_push = 0;
4527 if (TARGET_SHCOMPACT && flag_pic && current_function_args_info.call_cookie)
4528 /* We're going to use the PIC register to load the address of the
4529 incoming-argument decoder and/or of the return trampoline from
4530 the GOT, so make sure the PIC register is preserved and
4531 initialized. */
4532 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
4534 if (TARGET_SHCOMPACT
4535 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
4537 int reg;
4539 /* First, make all registers with incoming arguments that will
4540 be pushed onto the stack live, so that register renaming
4541 doesn't overwrite them. */
4542 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
4543 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info.call_cookie)
4544 >= NPARM_REGS (SImode) - reg)
4545 for (; reg < NPARM_REGS (SImode); reg++)
4546 emit_insn (gen_shcompact_preserve_incoming_args
4547 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
4548 else if (CALL_COOKIE_INT_REG_GET
4549 (current_function_args_info.call_cookie, reg) == 1)
4550 emit_insn (gen_shcompact_preserve_incoming_args
4551 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
4553 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
4554 stack_pointer_rtx);
4555 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
4556 GEN_INT (current_function_args_info.call_cookie));
4557 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
4558 gen_rtx_REG (SImode, R0_REG));
4560 else if (TARGET_SHMEDIA)
4562 int tr = sh_media_register_for_return ();
4564 if (tr >= 0)
4566 rtx insn = emit_move_insn (gen_rtx_REG (DImode, tr),
4567 gen_rtx_REG (DImode, PR_MEDIA_REG));
4569 /* If this function only exits with sibcalls, this copy
4570 will be flagged as dead. */
4571 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
4572 const0_rtx,
4573 REG_NOTES (insn));
4577 /* Emit the code for SETUP_VARARGS. */
4578 if (current_function_stdarg)
4580 /* This is not used by the SH3E calling convention */
4581 if (TARGET_SH1 && ! TARGET_SH3E && ! TARGET_SH5 && ! TARGET_HITACHI)
4583 /* Push arg regs as if they'd been provided by caller in stack. */
4584 for (i = 0; i < NPARM_REGS(SImode); i++)
4586 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
4587 rtx insn;
4589 if (i >= (NPARM_REGS(SImode)
4590 - current_function_args_info.arg_count[(int) SH_ARG_INT]
4592 break;
4593 insn = push (rn);
4594 RTX_FRAME_RELATED_P (insn) = 0;
4595 extra_push += 4;
4600 /* If we're supposed to switch stacks at function entry, do so now. */
4601 if (sp_switch)
4602 emit_insn (gen_sp_switch_1 ());
4604 calc_live_regs (&d, live_regs_mask);
4605 /* ??? Maybe we could save some switching if we can move a mode switch
4606 that already happens to be at the function start into the prologue. */
4607 if (target_flags != save_flags)
4608 emit_insn (gen_toggle_sz ());
4610 if (TARGET_SH5)
4612 int i;
4613 int offset;
4614 int align;
4615 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
4616 int offset_in_r0 = -1;
4617 int sp_in_r0 = 0;
4619 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
4620 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
4621 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
4623 offset = d + d_rounding;
4624 output_stack_adjust (-offset, stack_pointer_rtx, 1, frame_insn);
4626 /* We loop twice: first, we save 8-byte aligned registers in the
4627 higher addresses, that are known to be aligned. Then, we
4628 proceed to saving 32-bit registers that don't need 8-byte
4629 alignment. */
4630 for (align = 1; align >= 0; align--)
4631 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
4632 if (live_regs_mask[i/32] & (1 << (i % 32)))
4634 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
4635 int reg = i;
4636 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
4638 if (mode == SFmode && (i % 2) == 1
4639 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
4640 && (live_regs_mask[(i ^ 1) / 32] & (1 << ((i ^ 1) % 32))))
4642 mode = DFmode;
4643 i--;
4644 reg--;
4647 /* If we're doing the aligned pass and this is not aligned,
4648 or we're doing the unaligned pass and this is aligned,
4649 skip it. */
4650 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT)
4651 == 0) != align)
4652 continue;
4654 offset -= GET_MODE_SIZE (mode);
4656 reg_rtx = gen_rtx_REG (mode, reg);
4658 mem_rtx = gen_rtx_MEM (mode,
4659 gen_rtx_PLUS (Pmode,
4660 stack_pointer_rtx,
4661 GEN_INT (offset)));
4663 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
4665 mem_rtx = NULL_RTX;
4667 try_pre_dec:
4669 if (HAVE_PRE_DECREMENT
4670 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
4671 || mem_rtx == NULL_RTX
4672 || i == PR_REG || SPECIAL_REGISTER_P (i)))
4674 pre_dec = gen_rtx_MEM (mode,
4675 gen_rtx_PRE_DEC (Pmode, r0));
4677 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
4678 pre_dec_ok);
4680 pre_dec = NULL_RTX;
4682 break;
4684 pre_dec_ok:
4685 mem_rtx = NULL_RTX;
4686 offset += GET_MODE_SIZE (mode);
4688 while (0);
4690 if (mem_rtx != NULL_RTX)
4691 goto addr_ok;
4693 if (offset_in_r0 == -1)
4695 emit_move_insn (r0, GEN_INT (offset));
4696 offset_in_r0 = offset;
4698 else if (offset != offset_in_r0)
4700 emit_move_insn (r0,
4701 gen_rtx_PLUS
4702 (Pmode, r0,
4703 GEN_INT (offset - offset_in_r0)));
4704 offset_in_r0 += offset - offset_in_r0;
4707 if (pre_dec != NULL_RTX)
4709 if (! sp_in_r0)
4711 emit_move_insn (r0,
4712 gen_rtx_PLUS
4713 (Pmode, r0, stack_pointer_rtx));
4714 sp_in_r0 = 1;
4717 offset -= GET_MODE_SIZE (mode);
4718 offset_in_r0 -= GET_MODE_SIZE (mode);
4720 mem_rtx = pre_dec;
4722 else if (sp_in_r0)
4723 mem_rtx = gen_rtx_MEM (mode, r0);
4724 else
4725 mem_rtx = gen_rtx_MEM (mode,
4726 gen_rtx_PLUS (Pmode,
4727 stack_pointer_rtx,
4728 r0));
4730 /* We must not use an r0-based address for target-branch
4731 registers or for special registers without pre-dec
4732 memory addresses, since we store their values in r0
4733 first. */
4734 if (TARGET_REGISTER_P (i)
4735 || ((i == PR_REG || SPECIAL_REGISTER_P (i))
4736 && mem_rtx != pre_dec))
4737 abort ();
4739 addr_ok:
4740 if (TARGET_REGISTER_P (i)
4741 || ((i == PR_REG || SPECIAL_REGISTER_P (i))
4742 && mem_rtx != pre_dec))
4744 rtx r0mode = gen_rtx_REG (GET_MODE (reg_rtx), R0_REG);
4746 emit_move_insn (r0mode, reg_rtx);
4748 offset_in_r0 = -1;
4749 sp_in_r0 = 0;
4751 reg_rtx = r0mode;
4754 emit_move_insn (mem_rtx, reg_rtx);
4757 if (offset != d_rounding)
4758 abort ();
4760 else
4761 push_regs (live_regs_mask);
4763 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
4765 rtx insn = get_last_insn ();
4766 rtx last = emit_insn (gen_GOTaddr2picreg ());
4768 /* Mark these insns as possibly dead. Sometimes, flow2 may
4769 delete all uses of the PIC register. In this case, let it
4770 delete the initialization too. */
4773 insn = NEXT_INSN (insn);
4775 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
4776 const0_rtx,
4777 REG_NOTES (insn));
4779 while (insn != last);
4782 if (SHMEDIA_REGS_STACK_ADJUST ())
4784 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
4785 gen_rtx_SYMBOL_REF (Pmode,
4786 TARGET_FPU_ANY
4787 ? "__GCC_push_shmedia_regs"
4788 : "__GCC_push_shmedia_regs_nofpu"));
4789 /* This must NOT go through the PLT, otherwise mach and macl
4790 may be clobbered. */
4791 emit_insn (gen_shmedia_save_restore_regs_compact
4792 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
4795 if (target_flags != save_flags)
4797 rtx insn = emit_insn (gen_toggle_sz ());
4799 /* If we're lucky, a mode switch in the function body will
4800 overwrite fpscr, turning this insn dead. Tell flow this
4801 insn is ok to delete. */
4802 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
4803 const0_rtx,
4804 REG_NOTES (insn));
4807 target_flags = save_flags;
4809 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
4810 stack_pointer_rtx, TARGET_SH5 ? 0 : 1, frame_insn);
4812 if (frame_pointer_needed)
4813 frame_insn (GEN_MOV (frame_pointer_rtx, stack_pointer_rtx));
4815 if (TARGET_SHCOMPACT
4816 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
4818 /* This must NOT go through the PLT, otherwise mach and macl
4819 may be clobbered. */
4820 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
4821 gen_rtx_SYMBOL_REF (Pmode,
4822 "__GCC_shcompact_incoming_args"));
4823 emit_insn (gen_shcompact_incoming_args ());
4827 void
4828 sh_expand_epilogue ()
4830 HOST_WIDE_INT live_regs_mask[(FIRST_PSEUDO_REGISTER + 31) / 32];
4831 int d, i;
4832 int d_rounding = 0;
4834 int save_flags = target_flags;
4835 int frame_size;
4837 calc_live_regs (&d, live_regs_mask);
4839 if (TARGET_SH5 && d % (STACK_BOUNDARY / BITS_PER_UNIT))
4840 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
4841 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
4843 frame_size = rounded_frame_size (d) - d_rounding;
4845 if (frame_pointer_needed)
4847 output_stack_adjust (frame_size, frame_pointer_rtx, 7, emit_insn);
4849 /* We must avoid moving the stack pointer adjustment past code
4850 which reads from the local frame, else an interrupt could
4851 occur after the SP adjustment and clobber data in the local
4852 frame. */
4853 emit_insn (gen_blockage ());
4854 emit_insn (GEN_MOV (stack_pointer_rtx, frame_pointer_rtx));
4856 else if (frame_size)
4858 /* We must avoid moving the stack pointer adjustment past code
4859 which reads from the local frame, else an interrupt could
4860 occur after the SP adjustment and clobber data in the local
4861 frame. */
4862 emit_insn (gen_blockage ());
4863 output_stack_adjust (frame_size, stack_pointer_rtx, 7, emit_insn);
4866 if (SHMEDIA_REGS_STACK_ADJUST ())
4868 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
4869 gen_rtx_SYMBOL_REF (Pmode,
4870 TARGET_FPU_ANY
4871 ? "__GCC_pop_shmedia_regs"
4872 : "__GCC_pop_shmedia_regs_nofpu"));
4873 /* This must NOT go through the PLT, otherwise mach and macl
4874 may be clobbered. */
4875 emit_insn (gen_shmedia_save_restore_regs_compact
4876 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
4879 /* Pop all the registers. */
4881 if (target_flags != save_flags)
4882 emit_insn (gen_toggle_sz ());
4883 if (TARGET_SH5)
4885 int offset = d_rounding;
4886 int offset_in_r0 = -1;
4887 int sp_in_r0 = 0;
4888 int align;
4889 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
4891 /* We loop twice: first, we save 8-byte aligned registers in the
4892 higher addresses, that are known to be aligned. Then, we
4893 proceed to saving 32-bit registers that don't need 8-byte
4894 alignment. */
4895 for (align = 0; align <= 1; align++)
4896 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4897 if (live_regs_mask[i/32] & (1 << (i % 32)))
4899 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
4900 int reg = i;
4901 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
4903 if (mode == SFmode && (i % 2) == 0
4904 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
4905 && (live_regs_mask[(i ^ 1) / 32] & (1 << ((i ^ 1) % 32))))
4907 mode = DFmode;
4908 i++;
4911 /* If we're doing the aligned pass and this is not aligned,
4912 or we're doing the unaligned pass and this is aligned,
4913 skip it. */
4914 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT)
4915 == 0) != align)
4916 continue;
4918 reg_rtx = gen_rtx_REG (mode, reg);
4920 mem_rtx = gen_rtx_MEM (mode,
4921 gen_rtx_PLUS (Pmode,
4922 stack_pointer_rtx,
4923 GEN_INT (offset)));
4925 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
4927 mem_rtx = NULL_RTX;
4929 try_post_inc:
4931 if (HAVE_POST_INCREMENT
4932 && (offset == offset_in_r0
4933 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
4934 && mem_rtx == NULL_RTX)
4935 || i == PR_REG || SPECIAL_REGISTER_P (i)))
4937 post_inc = gen_rtx_MEM (mode,
4938 gen_rtx_POST_INC (Pmode, r0));
4940 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
4941 post_inc_ok);
4943 post_inc = NULL_RTX;
4945 break;
4947 post_inc_ok:
4948 mem_rtx = NULL_RTX;
4950 while (0);
4952 if (mem_rtx != NULL_RTX)
4953 goto addr_ok;
4955 if (offset_in_r0 == -1)
4957 emit_move_insn (r0, GEN_INT (offset));
4958 offset_in_r0 = offset;
4960 else if (offset != offset_in_r0)
4962 emit_move_insn (r0,
4963 gen_rtx_PLUS
4964 (Pmode, r0,
4965 GEN_INT (offset - offset_in_r0)));
4966 offset_in_r0 += offset - offset_in_r0;
4969 if (post_inc != NULL_RTX)
4971 if (! sp_in_r0)
4973 emit_move_insn (r0,
4974 gen_rtx_PLUS
4975 (Pmode, r0, stack_pointer_rtx));
4976 sp_in_r0 = 1;
4979 mem_rtx = post_inc;
4981 offset_in_r0 += GET_MODE_SIZE (mode);
4983 else if (sp_in_r0)
4984 mem_rtx = gen_rtx_MEM (mode, r0);
4985 else
4986 mem_rtx = gen_rtx_MEM (mode,
4987 gen_rtx_PLUS (Pmode,
4988 stack_pointer_rtx,
4989 r0));
4991 if ((i == PR_REG || SPECIAL_REGISTER_P (i))
4992 && mem_rtx != post_inc)
4993 abort ();
4995 addr_ok:
4996 if ((i == PR_REG || SPECIAL_REGISTER_P (i))
4997 && mem_rtx != post_inc)
4999 insn = emit_move_insn (r0, mem_rtx);
5000 mem_rtx = r0;
5002 else if (TARGET_REGISTER_P (i))
5004 rtx r1 = gen_rtx_REG (mode, R1_REG);
5006 insn = emit_move_insn (r1, mem_rtx);
5007 mem_rtx = r1;
5010 insn = emit_move_insn (reg_rtx, mem_rtx);
5012 offset += GET_MODE_SIZE (mode);
5015 if (offset != d + d_rounding)
5016 abort ();
5018 goto finish;
5020 else
5021 d = 0;
5022 if (live_regs_mask[PR_REG / 32] & (1 << (PR_REG % 32)))
5023 pop (PR_REG);
5024 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5026 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
5028 if (j != PR_REG && live_regs_mask[j / 32] & (1 << (j % 32)))
5029 pop (j);
5031 finish:
5032 if (target_flags != save_flags)
5033 emit_insn (gen_toggle_sz ());
5034 target_flags = save_flags;
5036 output_stack_adjust (extra_push + current_function_pretend_args_size
5037 + d + d_rounding
5038 + current_function_args_info.stack_regs * 8,
5039 stack_pointer_rtx, 7, emit_insn);
5041 /* Switch back to the normal stack if necessary. */
5042 if (sp_switch)
5043 emit_insn (gen_sp_switch_2 ());
5045 /* Tell flow the insn that pops PR isn't dead. */
5046 /* PR_REG will never be live in SHmedia mode, and we don't need to
5047 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
5048 by the return pattern. */
5049 if (live_regs_mask[PR_REG / 32] & (1 << (PR_REG % 32)))
5050 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
5053 static int sh_need_epilogue_known = 0;
5056 sh_need_epilogue ()
5058 if (! sh_need_epilogue_known)
5060 rtx epilogue;
5062 start_sequence ();
5063 sh_expand_epilogue ();
5064 epilogue = get_insns ();
5065 end_sequence ();
5066 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
5068 return sh_need_epilogue_known > 0;
5071 /* Clear variables at function end. */
5073 static void
5074 sh_output_function_epilogue (file, size)
5075 FILE *file ATTRIBUTE_UNUSED;
5076 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
5078 trap_exit = pragma_interrupt = pragma_trapa = pragma_nosave_low_regs = 0;
5079 sh_need_epilogue_known = 0;
5080 sp_switch = NULL_RTX;
5084 sh_builtin_saveregs ()
5086 /* First unnamed integer register. */
5087 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
5088 /* Number of integer registers we need to save. */
5089 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
5090 /* First unnamed SFmode float reg */
5091 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
5092 /* Number of SFmode float regs to save. */
5093 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
5094 rtx regbuf, fpregs;
5095 int bufsize, regno;
5096 HOST_WIDE_INT alias_set;
5098 if (TARGET_SH5)
5100 if (n_intregs)
5102 int pushregs = n_intregs;
5104 while (pushregs < NPARM_REGS (SImode) - 1
5105 && (CALL_COOKIE_INT_REG_GET
5106 (current_function_args_info.call_cookie,
5107 NPARM_REGS (SImode) - pushregs)
5108 == 1))
5110 current_function_args_info.call_cookie
5111 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
5112 - pushregs, 1);
5113 pushregs++;
5116 if (pushregs == NPARM_REGS (SImode))
5117 current_function_args_info.call_cookie
5118 |= (CALL_COOKIE_INT_REG (0, 1)
5119 | CALL_COOKIE_STACKSEQ (pushregs - 1));
5120 else
5121 current_function_args_info.call_cookie
5122 |= CALL_COOKIE_STACKSEQ (pushregs);
5124 current_function_pretend_args_size += 8 * n_intregs;
5126 if (TARGET_SHCOMPACT)
5127 return const0_rtx;
5130 if (! TARGET_SH3E && ! TARGET_SH4 && ! TARGET_SH5)
5132 error ("__builtin_saveregs not supported by this subtarget");
5133 return const0_rtx;
5136 if (TARGET_SHMEDIA)
5137 n_floatregs = 0;
5139 /* Allocate block of memory for the regs. */
5140 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
5141 Or can assign_stack_local accept a 0 SIZE argument? */
5142 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
5144 if (TARGET_SHMEDIA)
5145 regbuf = gen_rtx_MEM (BLKmode,
5146 gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
5147 else if (n_floatregs & 1)
5149 rtx addr;
5151 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
5152 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
5153 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
5154 regbuf = change_address (regbuf, BLKmode, addr);
5156 else
5157 regbuf = assign_stack_local (BLKmode, bufsize, 0);
5158 alias_set = get_varargs_alias_set ();
5159 set_mem_alias_set (regbuf, alias_set);
5161 /* Save int args.
5162 This is optimized to only save the regs that are necessary. Explicitly
5163 named args need not be saved. */
5164 if (n_intregs > 0)
5165 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
5166 adjust_address (regbuf, BLKmode,
5167 n_floatregs * UNITS_PER_WORD),
5168 n_intregs, n_intregs * UNITS_PER_WORD);
5170 if (TARGET_SHMEDIA)
5171 /* Return the address of the regbuf. */
5172 return XEXP (regbuf, 0);
5174 /* Save float args.
5175 This is optimized to only save the regs that are necessary. Explicitly
5176 named args need not be saved.
5177 We explicitly build a pointer to the buffer because it halves the insn
5178 count when not optimizing (otherwise the pointer is built for each reg
5179 saved).
5180 We emit the moves in reverse order so that we can use predecrement. */
5182 fpregs = gen_reg_rtx (Pmode);
5183 emit_move_insn (fpregs, XEXP (regbuf, 0));
5184 emit_insn (gen_addsi3 (fpregs, fpregs,
5185 GEN_INT (n_floatregs * UNITS_PER_WORD)));
5186 if (TARGET_SH4)
5188 rtx mem;
5189 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
5191 emit_insn (gen_addsi3 (fpregs, fpregs,
5192 GEN_INT (-2 * UNITS_PER_WORD)));
5193 mem = gen_rtx_MEM (DFmode, fpregs);
5194 set_mem_alias_set (mem, alias_set);
5195 emit_move_insn (mem,
5196 gen_rtx (REG, DFmode, BASE_ARG_REG (DFmode) + regno));
5198 regno = first_floatreg;
5199 if (regno & 1)
5201 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD)));
5202 mem = gen_rtx_MEM (SFmode, fpregs);
5203 set_mem_alias_set (mem, alias_set);
5204 emit_move_insn (mem,
5205 gen_rtx (REG, SFmode, BASE_ARG_REG (SFmode) + regno
5206 - (TARGET_LITTLE_ENDIAN != 0)));
5209 else
5210 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
5212 rtx mem;
5214 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD)));
5215 mem = gen_rtx_MEM (SFmode, fpregs);
5216 set_mem_alias_set (mem, alias_set);
5217 emit_move_insn (mem,
5218 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
5221 /* Return the address of the regbuf. */
5222 return XEXP (regbuf, 0);
5225 /* Define the `__builtin_va_list' type for the ABI. */
5227 tree
5228 sh_build_va_list ()
5230 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
5231 tree record;
5233 if (TARGET_SH5 || (! TARGET_SH3E && ! TARGET_SH4) || TARGET_HITACHI)
5234 return ptr_type_node;
5236 record = make_node (RECORD_TYPE);
5238 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
5239 ptr_type_node);
5240 f_next_o_limit = build_decl (FIELD_DECL,
5241 get_identifier ("__va_next_o_limit"),
5242 ptr_type_node);
5243 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
5244 ptr_type_node);
5245 f_next_fp_limit = build_decl (FIELD_DECL,
5246 get_identifier ("__va_next_fp_limit"),
5247 ptr_type_node);
5248 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
5249 ptr_type_node);
5251 DECL_FIELD_CONTEXT (f_next_o) = record;
5252 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
5253 DECL_FIELD_CONTEXT (f_next_fp) = record;
5254 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
5255 DECL_FIELD_CONTEXT (f_next_stack) = record;
5257 TYPE_FIELDS (record) = f_next_o;
5258 TREE_CHAIN (f_next_o) = f_next_o_limit;
5259 TREE_CHAIN (f_next_o_limit) = f_next_fp;
5260 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
5261 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
5263 layout_type (record);
5265 return record;
5268 /* Implement `va_start' for varargs and stdarg. */
5270 void
5271 sh_va_start (valist, nextarg)
5272 tree valist;
5273 rtx nextarg;
5275 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
5276 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
5277 tree t, u;
5278 int nfp, nint;
5280 if (TARGET_SH5)
5282 expand_builtin_saveregs ();
5283 std_expand_builtin_va_start (valist, nextarg);
5284 return;
5287 if ((! TARGET_SH3E && ! TARGET_SH4) || TARGET_HITACHI)
5289 std_expand_builtin_va_start (valist, nextarg);
5290 return;
5293 f_next_o = TYPE_FIELDS (va_list_type_node);
5294 f_next_o_limit = TREE_CHAIN (f_next_o);
5295 f_next_fp = TREE_CHAIN (f_next_o_limit);
5296 f_next_fp_limit = TREE_CHAIN (f_next_fp);
5297 f_next_stack = TREE_CHAIN (f_next_fp_limit);
5299 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o);
5300 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
5301 valist, f_next_o_limit);
5302 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp);
5303 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
5304 valist, f_next_fp_limit);
5305 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
5306 valist, f_next_stack);
5308 /* Call __builtin_saveregs. */
5309 u = make_tree (ptr_type_node, expand_builtin_saveregs ());
5310 t = build (MODIFY_EXPR, ptr_type_node, next_fp, u);
5311 TREE_SIDE_EFFECTS (t) = 1;
5312 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5314 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
5315 if (nfp < 8)
5316 nfp = 8 - nfp;
5317 else
5318 nfp = 0;
5319 u = fold (build (PLUS_EXPR, ptr_type_node, u,
5320 build_int_2 (UNITS_PER_WORD * nfp, 0)));
5321 t = build (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
5322 TREE_SIDE_EFFECTS (t) = 1;
5323 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5325 t = build (MODIFY_EXPR, ptr_type_node, next_o, u);
5326 TREE_SIDE_EFFECTS (t) = 1;
5327 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5329 nint = current_function_args_info.arg_count[SH_ARG_INT];
5330 if (nint < 4)
5331 nint = 4 - nint;
5332 else
5333 nint = 0;
5334 u = fold (build (PLUS_EXPR, ptr_type_node, u,
5335 build_int_2 (UNITS_PER_WORD * nint, 0)));
5336 t = build (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
5337 TREE_SIDE_EFFECTS (t) = 1;
5338 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5340 u = make_tree (ptr_type_node, nextarg);
5341 t = build (MODIFY_EXPR, ptr_type_node, next_stack, u);
5342 TREE_SIDE_EFFECTS (t) = 1;
5343 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5346 /* Implement `va_arg'. */
5349 sh_va_arg (valist, type)
5350 tree valist, type;
5352 HOST_WIDE_INT size, rsize;
5353 tree tmp, pptr_type_node;
5354 rtx addr_rtx, r;
5355 rtx result;
5356 int pass_by_ref = MUST_PASS_IN_STACK (TYPE_MODE (type), type);
5358 size = int_size_in_bytes (type);
5359 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5360 pptr_type_node = build_pointer_type (ptr_type_node);
5362 if (pass_by_ref)
5363 type = build_pointer_type (type);
5365 if (! TARGET_SH5 && (TARGET_SH3E || TARGET_SH4) && ! TARGET_HITACHI)
5367 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
5368 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
5369 int pass_as_float;
5370 rtx lab_false, lab_over;
5372 f_next_o = TYPE_FIELDS (va_list_type_node);
5373 f_next_o_limit = TREE_CHAIN (f_next_o);
5374 f_next_fp = TREE_CHAIN (f_next_o_limit);
5375 f_next_fp_limit = TREE_CHAIN (f_next_fp);
5376 f_next_stack = TREE_CHAIN (f_next_fp_limit);
5378 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o);
5379 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
5380 valist, f_next_o_limit);
5381 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp),
5382 valist, f_next_fp);
5383 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
5384 valist, f_next_fp_limit);
5385 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
5386 valist, f_next_stack);
5388 if (TARGET_SH4)
5390 pass_as_float = ((TREE_CODE (type) == REAL_TYPE && size <= 8)
5391 || (TREE_CODE (type) == COMPLEX_TYPE
5392 && TREE_CODE (TREE_TYPE (type)) == REAL_TYPE
5393 && size <= 16));
5395 else
5397 pass_as_float = (TREE_CODE (type) == REAL_TYPE && size == 4);
5400 addr_rtx = gen_reg_rtx (Pmode);
5401 lab_false = gen_label_rtx ();
5402 lab_over = gen_label_rtx ();
5404 if (pass_as_float)
5406 int first_floatreg
5407 = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
5408 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
5410 emit_cmp_and_jump_insns (expand_expr (next_fp, NULL_RTX, Pmode,
5411 EXPAND_NORMAL),
5412 expand_expr (next_fp_limit, NULL_RTX,
5413 Pmode, EXPAND_NORMAL),
5414 GE, const1_rtx, Pmode, 1, lab_false);
5416 if (TYPE_ALIGN (type) > BITS_PER_WORD
5417 || (((TREE_CODE (type) == REAL_TYPE && size == 8) || size == 16)
5418 && (n_floatregs & 1)))
5420 tmp = build (BIT_AND_EXPR, ptr_type_node, next_fp,
5421 build_int_2 (UNITS_PER_WORD, 0));
5422 tmp = build (PLUS_EXPR, ptr_type_node, next_fp, tmp);
5423 tmp = build (MODIFY_EXPR, ptr_type_node, next_fp, tmp);
5424 TREE_SIDE_EFFECTS (tmp) = 1;
5425 expand_expr (tmp, const0_rtx, VOIDmode, EXPAND_NORMAL);
5428 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
5429 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
5430 if (r != addr_rtx)
5431 emit_move_insn (addr_rtx, r);
5433 emit_jump_insn (gen_jump (lab_over));
5434 emit_barrier ();
5435 emit_label (lab_false);
5437 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
5438 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
5439 if (r != addr_rtx)
5440 emit_move_insn (addr_rtx, r);
5442 else
5444 tmp = build (PLUS_EXPR, ptr_type_node, next_o,
5445 build_int_2 (rsize, 0));
5447 emit_cmp_and_jump_insns (expand_expr (tmp, NULL_RTX, Pmode,
5448 EXPAND_NORMAL),
5449 expand_expr (next_o_limit, NULL_RTX,
5450 Pmode, EXPAND_NORMAL),
5451 GT, const1_rtx, Pmode, 1, lab_false);
5453 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
5454 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
5455 if (r != addr_rtx)
5456 emit_move_insn (addr_rtx, r);
5458 emit_jump_insn (gen_jump (lab_over));
5459 emit_barrier ();
5460 emit_label (lab_false);
5462 if (size > 4 && ! TARGET_SH4)
5464 tmp = build (MODIFY_EXPR, ptr_type_node, next_o, next_o_limit);
5465 TREE_SIDE_EFFECTS (tmp) = 1;
5466 expand_expr (tmp, const0_rtx, VOIDmode, EXPAND_NORMAL);
5469 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
5470 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
5471 if (r != addr_rtx)
5472 emit_move_insn (addr_rtx, r);
5475 emit_label (lab_over);
5477 tmp = make_tree (pptr_type_node, addr_rtx);
5478 valist = build1 (INDIRECT_REF, ptr_type_node, tmp);
5481 /* ??? In va-sh.h, there had been code to make values larger than
5482 size 8 indirect. This does not match the FUNCTION_ARG macros. */
5484 result = std_expand_builtin_va_arg (valist, type);
5485 if (pass_by_ref)
5487 #ifdef POINTERS_EXTEND_UNSIGNED
5488 if (GET_MODE (addr) != Pmode)
5489 addr = convert_memory_address (Pmode, result);
5490 #endif
5491 result = gen_rtx_MEM (ptr_mode, force_reg (Pmode, result));
5492 set_mem_alias_set (result, get_varargs_alias_set ());
5494 /* ??? expand_builtin_va_arg will also set the alias set of the dereferenced
5495 argument to the varargs alias set. */
5496 return result;
5499 /* Define the offset between two registers, one to be eliminated, and
5500 the other its replacement, at the start of a routine. */
5503 initial_elimination_offset (from, to)
5504 int from;
5505 int to;
5507 int regs_saved;
5508 int regs_saved_rounding = 0;
5509 int total_saved_regs_space;
5510 int total_auto_space;
5511 int save_flags = target_flags;
5512 int copy_flags;
5514 HOST_WIDE_INT live_regs_mask[(FIRST_PSEUDO_REGISTER + 31) / 32];
5515 calc_live_regs (&regs_saved, live_regs_mask);
5516 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
5517 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
5518 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5519 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
5521 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
5522 copy_flags = target_flags;
5523 target_flags = save_flags;
5525 total_saved_regs_space = regs_saved + regs_saved_rounding;
5527 if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
5528 return total_saved_regs_space + total_auto_space
5529 + current_function_args_info.byref_regs * 8;
5531 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
5532 return total_saved_regs_space + total_auto_space
5533 + current_function_args_info.byref_regs * 8;
5535 /* Initial gap between fp and sp is 0. */
5536 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
5537 return 0;
5539 if (from == RETURN_ADDRESS_POINTER_REGNUM
5540 && (to == FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM))
5542 if (TARGET_SH5)
5544 int i, n = total_saved_regs_space;
5545 int align;
5546 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
5548 n += total_auto_space;
5550 /* If it wasn't saved, there's not much we can do. */
5551 if ((live_regs_mask[pr_reg / 32] & (1 << (pr_reg % 32))) == 0)
5552 return n;
5554 target_flags = copy_flags;
5556 /* We loop twice: first, check 8-byte aligned registers,
5557 that are stored in the higher addresses, that are known
5558 to be aligned. Then, check 32-bit registers that don't
5559 need 8-byte alignment. */
5560 for (align = 1; align >= 0; align--)
5561 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
5562 if (live_regs_mask[i/32] & (1 << (i % 32)))
5564 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5566 if (mode == SFmode && (i % 2) == 1
5567 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5568 && (live_regs_mask[(i ^ 1) / 32]
5569 & (1 << ((i ^ 1) % 32))))
5571 mode = DFmode;
5572 i--;
5575 /* If we're doing the aligned pass and this is not aligned,
5576 or we're doing the unaligned pass and this is aligned,
5577 skip it. */
5578 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT)
5579 == 0) != align)
5580 continue;
5582 n -= GET_MODE_SIZE (mode);
5584 if (i == pr_reg)
5586 target_flags = save_flags;
5587 return n;
5591 abort ();
5593 else
5594 return total_auto_space;
5597 abort ();
5600 /* Handle machine specific pragmas to be semi-compatible with Hitachi
5601 compiler. */
5603 void
5604 sh_pr_interrupt (pfile)
5605 cpp_reader *pfile ATTRIBUTE_UNUSED;
5607 pragma_interrupt = 1;
5610 void
5611 sh_pr_trapa (pfile)
5612 cpp_reader *pfile ATTRIBUTE_UNUSED;
5614 pragma_interrupt = pragma_trapa = 1;
5617 void
5618 sh_pr_nosave_low_regs (pfile)
5619 cpp_reader *pfile ATTRIBUTE_UNUSED;
5621 pragma_nosave_low_regs = 1;
5624 /* Generate 'handle_interrupt' attribute for decls */
5626 static void
5627 sh_insert_attributes (node, attributes)
5628 tree node;
5629 tree * attributes;
5631 if (! pragma_interrupt
5632 || TREE_CODE (node) != FUNCTION_DECL)
5633 return;
5635 /* We are only interested in fields. */
5636 if (TREE_CODE_CLASS (TREE_CODE (node)) != 'd')
5637 return;
5639 /* Add a 'handle_interrupt' attribute. */
5640 * attributes = tree_cons (get_identifier ("interrupt_handler"), NULL, * attributes);
5642 return;
5645 /* Supported attributes:
5647 interrupt_handler -- specifies this function is an interrupt handler.
5649 sp_switch -- specifies an alternate stack for an interrupt handler
5650 to run on.
5652 trap_exit -- use a trapa to exit an interrupt function instead of
5653 an rte instruction. */
5655 const struct attribute_spec sh_attribute_table[] =
5657 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
5658 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
5659 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
5660 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
5661 { NULL, 0, 0, false, false, false, NULL }
5664 /* Handle an "interrupt_handler" attribute; arguments as in
5665 struct attribute_spec.handler. */
5666 static tree
5667 sh_handle_interrupt_handler_attribute (node, name, args, flags, no_add_attrs)
5668 tree *node;
5669 tree name;
5670 tree args ATTRIBUTE_UNUSED;
5671 int flags ATTRIBUTE_UNUSED;
5672 bool *no_add_attrs;
5674 if (TREE_CODE (*node) != FUNCTION_DECL)
5676 warning ("`%s' attribute only applies to functions",
5677 IDENTIFIER_POINTER (name));
5678 *no_add_attrs = true;
5680 else if (TARGET_SHCOMPACT)
5682 error ("attribute interrupt_handler is not compatible with -m5-compact");
5683 *no_add_attrs = true;
5686 return NULL_TREE;
5689 /* Handle an "sp_switch" attribute; arguments as in
5690 struct attribute_spec.handler. */
5691 static tree
5692 sh_handle_sp_switch_attribute (node, name, args, flags, no_add_attrs)
5693 tree *node;
5694 tree name;
5695 tree args;
5696 int flags ATTRIBUTE_UNUSED;
5697 bool *no_add_attrs;
5699 if (TREE_CODE (*node) != FUNCTION_DECL)
5701 warning ("`%s' attribute only applies to functions",
5702 IDENTIFIER_POINTER (name));
5703 *no_add_attrs = true;
5705 else if (!pragma_interrupt)
5707 /* The sp_switch attribute only has meaning for interrupt functions. */
5708 warning ("`%s' attribute only applies to interrupt functions",
5709 IDENTIFIER_POINTER (name));
5710 *no_add_attrs = true;
5712 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
5714 /* The argument must be a constant string. */
5715 warning ("`%s' attribute argument not a string constant",
5716 IDENTIFIER_POINTER (name));
5717 *no_add_attrs = true;
5719 else
5721 sp_switch = gen_rtx_SYMBOL_REF (VOIDmode,
5722 TREE_STRING_POINTER (TREE_VALUE (args)));
5725 return NULL_TREE;
5728 /* Handle an "trap_exit" attribute; arguments as in
5729 struct attribute_spec.handler. */
5730 static tree
5731 sh_handle_trap_exit_attribute (node, name, args, flags, no_add_attrs)
5732 tree *node;
5733 tree name;
5734 tree args;
5735 int flags ATTRIBUTE_UNUSED;
5736 bool *no_add_attrs;
5738 if (TREE_CODE (*node) != FUNCTION_DECL)
5740 warning ("`%s' attribute only applies to functions",
5741 IDENTIFIER_POINTER (name));
5742 *no_add_attrs = true;
5744 else if (!pragma_interrupt)
5746 /* The trap_exit attribute only has meaning for interrupt functions. */
5747 warning ("`%s' attribute only applies to interrupt functions",
5748 IDENTIFIER_POINTER (name));
5749 *no_add_attrs = true;
5751 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
5753 /* The argument must be a constant integer. */
5754 warning ("`%s' attribute argument not an integer constant",
5755 IDENTIFIER_POINTER (name));
5756 *no_add_attrs = true;
5758 else
5760 trap_exit = TREE_INT_CST_LOW (TREE_VALUE (args));
5763 return NULL_TREE;
5767 sh_cfun_interrupt_handler_p ()
5769 return (lookup_attribute ("interrupt_handler",
5770 DECL_ATTRIBUTES (current_function_decl))
5771 != NULL_TREE);
5774 /* Predicates used by the templates. */
5776 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
5777 Used only in general_movsrc_operand. */
5780 system_reg_operand (op, mode)
5781 rtx op;
5782 enum machine_mode mode ATTRIBUTE_UNUSED;
5784 switch (REGNO (op))
5786 case PR_REG:
5787 case MACL_REG:
5788 case MACH_REG:
5789 return 1;
5791 return 0;
5794 /* Returns 1 if OP can be source of a simple move operation.
5795 Same as general_operand, but a LABEL_REF is valid, PRE_DEC is
5796 invalid as are subregs of system registers. */
5799 general_movsrc_operand (op, mode)
5800 rtx op;
5801 enum machine_mode mode;
5803 if (GET_CODE (op) == MEM)
5805 rtx inside = XEXP (op, 0);
5806 if (GET_CODE (inside) == CONST)
5807 inside = XEXP (inside, 0);
5809 if (GET_CODE (inside) == LABEL_REF)
5810 return 1;
5812 if (GET_CODE (inside) == PLUS
5813 && GET_CODE (XEXP (inside, 0)) == LABEL_REF
5814 && GET_CODE (XEXP (inside, 1)) == CONST_INT)
5815 return 1;
5817 /* Only post inc allowed. */
5818 if (GET_CODE (inside) == PRE_DEC)
5819 return 0;
5822 if ((mode == QImode || mode == HImode)
5823 && (GET_CODE (op) == SUBREG
5824 && GET_CODE (XEXP (op, 0)) == REG
5825 && system_reg_operand (XEXP (op, 0), mode)))
5826 return 0;
5828 return general_operand (op, mode);
5831 /* Returns 1 if OP can be a destination of a move.
5832 Same as general_operand, but no preinc allowed. */
5835 general_movdst_operand (op, mode)
5836 rtx op;
5837 enum machine_mode mode;
5839 /* Only pre dec allowed. */
5840 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == POST_INC)
5841 return 0;
5843 return general_operand (op, mode);
5846 /* Returns 1 if OP is a normal arithmetic register. */
5849 arith_reg_operand (op, mode)
5850 rtx op;
5851 enum machine_mode mode;
5853 if (register_operand (op, mode))
5855 int regno;
5857 if (GET_CODE (op) == REG)
5858 regno = REGNO (op);
5859 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
5860 regno = REGNO (SUBREG_REG (op));
5861 else
5862 return 1;
5864 return (regno != T_REG && regno != PR_REG
5865 && ! TARGET_REGISTER_P (regno)
5866 && (regno != FPUL_REG || TARGET_SH4)
5867 && regno != MACH_REG && regno != MACL_REG);
5869 return 0;
5872 /* Like above, but for DImode destinations: forbid paradoxical DImode subregs,
5873 because this would lead to missing sign extensions when truncating from
5874 DImode to SImode. */
5876 arith_reg_dest (op, mode)
5877 rtx op;
5878 enum machine_mode mode;
5880 if (mode == DImode && GET_CODE (op) == SUBREG
5881 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) < 8)
5882 return 0;
5883 return arith_reg_operand (op, mode);
5887 int_gpr_dest (op, mode)
5888 rtx op;
5889 enum machine_mode mode ATTRIBUTE_UNUSED;
5891 enum machine_mode op_mode = GET_MODE (op);
5893 if (GET_MODE_CLASS (op_mode) != MODE_INT
5894 || GET_MODE_SIZE (op_mode) >= UNITS_PER_WORD)
5895 return 0;
5896 if (! reload_completed)
5897 return 0;
5898 return true_regnum (op) <= LAST_GENERAL_REG;
5902 fp_arith_reg_operand (op, mode)
5903 rtx op;
5904 enum machine_mode mode;
5906 if (register_operand (op, mode))
5908 int regno;
5910 if (GET_CODE (op) == REG)
5911 regno = REGNO (op);
5912 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
5913 regno = REGNO (SUBREG_REG (op));
5914 else
5915 return 1;
5917 return (regno >= FIRST_PSEUDO_REGISTER
5918 || FP_REGISTER_P (regno));
5920 return 0;
5923 /* Returns 1 if OP is a valid source operand for an arithmetic insn. */
5926 arith_operand (op, mode)
5927 rtx op;
5928 enum machine_mode mode;
5930 if (arith_reg_operand (op, mode))
5931 return 1;
5933 if (TARGET_SHMEDIA)
5935 /* FIXME: We should be checking whether the CONST_INT fits in a
5936 CONST_OK_FOR_J here, but this causes reload_cse to crash when
5937 attempting to transform a sequence of two 64-bit sets of the
5938 same register from literal constants into a set and an add,
5939 when the difference is too wide for an add. */
5940 if (GET_CODE (op) == CONST_INT
5941 || EXTRA_CONSTRAINT_S (op))
5942 return 1;
5943 else
5944 return 0;
5946 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
5947 return 1;
5949 return 0;
5952 /* Returns 1 if OP is a valid source operand for a compare insn. */
5955 arith_reg_or_0_operand (op, mode)
5956 rtx op;
5957 enum machine_mode mode;
5959 if (arith_reg_operand (op, mode))
5960 return 1;
5962 if (EXTRA_CONSTRAINT_U (op))
5963 return 1;
5965 return 0;
5968 /* Return 1 if OP is a valid source operand for an SHmedia operation
5969 that takes either a register or a 6-bit immediate. */
5972 shmedia_6bit_operand (op, mode)
5973 rtx op;
5974 enum machine_mode mode;
5976 return (arith_reg_operand (op, mode)
5977 || (GET_CODE (op) == CONST_INT && CONST_OK_FOR_O (INTVAL (op))));
5980 /* Returns 1 if OP is a valid source operand for a logical operation. */
5983 logical_operand (op, mode)
5984 rtx op;
5985 enum machine_mode mode;
5987 if (arith_reg_operand (op, mode))
5988 return 1;
5990 if (TARGET_SHMEDIA)
5992 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_P (INTVAL (op)))
5993 return 1;
5994 else
5995 return 0;
5997 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
5998 return 1;
6000 return 0;
6004 and_operand (op, mode)
6005 rtx op;
6006 enum machine_mode mode;
6008 if (logical_operand (op, mode))
6009 return 1;
6011 /* Check mshflo.l / mshflhi.l opportunities. */
6012 if (TARGET_SHMEDIA
6013 && mode == DImode
6014 && GET_CODE (op) == CONST_INT
6015 && (INTVAL (op) == (unsigned) 0xffffffff
6016 || INTVAL (op) == (HOST_WIDE_INT) -1 << 32))
6017 return 1;
6019 return 0;
6022 /* Nonzero if OP is a floating point value with value 0.0. */
6025 fp_zero_operand (op)
6026 rtx op;
6028 REAL_VALUE_TYPE r;
6030 if (GET_MODE (op) != SFmode)
6031 return 0;
6033 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
6034 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
6037 /* Nonzero if OP is a floating point value with value 1.0. */
6040 fp_one_operand (op)
6041 rtx op;
6043 REAL_VALUE_TYPE r;
6045 if (GET_MODE (op) != SFmode)
6046 return 0;
6048 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
6049 return REAL_VALUES_EQUAL (r, dconst1);
6052 /* For -m4 and -m4-single-only, mode switching is used. If we are
6053 compiling without -mfmovd, movsf_ie isn't taken into account for
6054 mode switching. We could check in machine_dependent_reorg for
6055 cases where we know we are in single precision mode, but there is
6056 interface to find that out during reload, so we must avoid
6057 choosing an fldi alternative during reload and thus failing to
6058 allocate a scratch register for the constant loading. */
6060 fldi_ok ()
6062 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
6066 tertiary_reload_operand (op, mode)
6067 rtx op;
6068 enum machine_mode mode ATTRIBUTE_UNUSED;
6070 enum rtx_code code = GET_CODE (op);
6071 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
6075 fpscr_operand (op, mode)
6076 rtx op;
6077 enum machine_mode mode ATTRIBUTE_UNUSED;
6079 return (GET_CODE (op) == REG && REGNO (op) == FPSCR_REG
6080 && GET_MODE (op) == PSImode);
6084 fpul_operand (op, mode)
6085 rtx op;
6086 enum machine_mode mode;
6088 if (TARGET_SHMEDIA)
6089 return fp_arith_reg_operand (op, mode);
6091 return (GET_CODE (op) == REG
6092 && (REGNO (op) == FPUL_REG || REGNO (op) >= FIRST_PSEUDO_REGISTER)
6093 && GET_MODE (op) == mode);
6097 symbol_ref_operand (op, mode)
6098 rtx op;
6099 enum machine_mode mode ATTRIBUTE_UNUSED;
6101 return (GET_CODE (op) == SYMBOL_REF);
6105 commutative_float_operator (op, mode)
6106 rtx op;
6107 enum machine_mode mode;
6109 if (GET_MODE (op) != mode)
6110 return 0;
6111 switch (GET_CODE (op))
6113 case PLUS:
6114 case MULT:
6115 return 1;
6116 default:
6117 break;
6119 return 0;
6123 noncommutative_float_operator (op, mode)
6124 rtx op;
6125 enum machine_mode mode;
6127 if (GET_MODE (op) != mode)
6128 return 0;
6129 switch (GET_CODE (op))
6131 case MINUS:
6132 case DIV:
6133 return 1;
6134 default:
6135 break;
6137 return 0;
6141 unary_float_operator (op, mode)
6142 rtx op;
6143 enum machine_mode mode;
6145 if (GET_MODE (op) != mode)
6146 return 0;
6147 switch (GET_CODE (op))
6149 case ABS:
6150 case NEG:
6151 case SQRT:
6152 return 1;
6153 default:
6154 break;
6156 return 0;
6160 binary_float_operator (op, mode)
6161 rtx op;
6162 enum machine_mode mode;
6164 if (GET_MODE (op) != mode)
6165 return 0;
6166 switch (GET_CODE (op))
6168 case PLUS:
6169 case MINUS:
6170 case MULT:
6171 case DIV:
6172 return 1;
6173 default:
6174 break;
6176 return 0;
6180 equality_comparison_operator (op, mode)
6181 rtx op;
6182 enum machine_mode mode;
6184 return ((mode == VOIDmode || GET_MODE (op) == mode)
6185 && (GET_CODE (op) == EQ || GET_CODE (op) == NE));
6188 int greater_comparison_operator (op, mode)
6189 rtx op;
6190 enum machine_mode mode;
6192 if (mode != VOIDmode && GET_MODE (op) == mode)
6193 return 0;
6194 switch (GET_CODE (op))
6196 case GT:
6197 case GE:
6198 case GTU:
6199 case GEU:
6200 return 1;
6201 default:
6202 return 0;
6206 int less_comparison_operator (op, mode)
6207 rtx op;
6208 enum machine_mode mode;
6210 if (mode != VOIDmode && GET_MODE (op) == mode)
6211 return 0;
6212 switch (GET_CODE (op))
6214 case LT:
6215 case LE:
6216 case LTU:
6217 case LEU:
6218 return 1;
6219 default:
6220 return 0;
6224 /* Accept pseudos and branch target registers. */
6226 target_reg_operand (op, mode)
6227 rtx op;
6228 enum machine_mode mode;
6230 if (mode != DImode
6231 || GET_MODE (op) != DImode)
6232 return 0;
6234 if (GET_CODE (op) == SUBREG)
6235 op = XEXP (op, 0);
6237 if (GET_CODE (op) != REG)
6238 return 0;
6240 /* We must protect ourselves from matching pseudos that are virtual
6241 register, because they will eventually be replaced with hardware
6242 registers that aren't branch-target registers. */
6243 if (REGNO (op) > LAST_VIRTUAL_REGISTER
6244 || TARGET_REGISTER_P (REGNO (op)))
6245 return 1;
6247 return 0;
6250 /* Same as target_reg_operand, except that label_refs and symbol_refs
6251 are accepted before reload. */
6253 target_operand (op, mode)
6254 rtx op;
6255 enum machine_mode mode;
6257 if (mode != DImode)
6258 return 0;
6260 if ((GET_MODE (op) == DImode || GET_MODE (op) == VOIDmode)
6261 && EXTRA_CONSTRAINT_T (op))
6262 return ! reload_completed;
6264 return target_reg_operand (op, mode);
6268 mextr_bit_offset (op, mode)
6269 rtx op;
6270 enum machine_mode mode ATTRIBUTE_UNUSED;
6272 HOST_WIDE_INT i;
6274 if (GET_CODE (op) != CONST_INT)
6275 return 0;
6276 i = INTVAL (op);
6277 return i >= 1*8 && i <= 7*8 && (i & 7) == 0;
6281 extend_reg_operand (op, mode)
6282 rtx op;
6283 enum machine_mode mode;
6285 return (GET_CODE (op) == TRUNCATE
6286 ? arith_operand
6287 : arith_reg_operand) (op, mode);
6291 trunc_hi_operand (op, mode)
6292 rtx op;
6293 enum machine_mode mode;
6295 enum machine_mode op_mode = GET_MODE (op);
6297 if (op_mode != SImode && op_mode != DImode
6298 && op_mode != V4HImode && op_mode != V2SImode)
6299 return 0;
6300 return extend_reg_operand (op, mode);
6304 extend_reg_or_0_operand (op, mode)
6305 rtx op;
6306 enum machine_mode mode;
6308 return (GET_CODE (op) == TRUNCATE
6309 ? arith_operand
6310 : arith_reg_or_0_operand) (op, mode);
6314 general_extend_operand (op, mode)
6315 rtx op;
6316 enum machine_mode mode;
6318 return (GET_CODE (op) == TRUNCATE
6319 ? arith_operand
6320 : nonimmediate_operand) (op, mode);
6324 inqhi_operand (op, mode)
6325 rtx op;
6326 enum machine_mode mode;
6328 if (GET_CODE (op) != TRUNCATE || mode != GET_MODE (op))
6329 return 0;
6330 op = XEXP (op, 0);
6331 /* Can't use true_regnum here because copy_cost wants to know about
6332 SECONDARY_INPUT_RELOAD_CLASS. */
6333 return GET_CODE (op) == REG && FP_REGISTER_P (REGNO (op));
6337 sh_rep_vec (v, mode)
6338 rtx v;
6339 enum machine_mode mode;
6341 int i;
6342 rtx x, y;
6344 if ((GET_CODE (v) != CONST_VECTOR && GET_CODE (v) != PARALLEL)
6345 || (GET_MODE (v) != mode && mode != VOIDmode))
6346 return 0;
6347 i = XVECLEN (v, 0) - 2;
6348 x = XVECEXP (v, 0, i + 1);
6349 if (GET_MODE_UNIT_SIZE (mode) == 1)
6351 y = XVECEXP (v, 0, i);
6352 for (i -= 2 ; i >= 0; i -= 2)
6353 if (! rtx_equal_p (XVECEXP (v, 0, i + 1), x)
6354 || ! rtx_equal_p (XVECEXP (v, 0, i), y))
6355 return 0;
6357 else
6358 for (; i >= 0; i--)
6359 if (XVECEXP (v, 0, i) != x)
6360 return 0;
6361 return 1;
6364 /* Determine if V is a constant vector matching MODE with only one element
6365 that is not a sign extension. Two byte-sized elements count as one. */
6367 sh_1el_vec (v, mode)
6368 rtx v;
6369 enum machine_mode mode;
6371 int unit_size;
6372 int i, last, least, sign_ix;
6373 rtx sign;
6375 if (GET_CODE (v) != CONST_VECTOR
6376 || (GET_MODE (v) != mode && mode != VOIDmode))
6377 return 0;
6378 /* Determine numbers of last and of least significat elements. */
6379 last = XVECLEN (v, 0) - 1;
6380 least = TARGET_LITTLE_ENDIAN ? 0 : last;
6381 if (GET_CODE (XVECEXP (v, 0, least)) != CONST_INT)
6382 return 0;
6383 sign_ix = least;
6384 if (GET_MODE_UNIT_SIZE (mode) == 1)
6385 sign_ix = TARGET_LITTLE_ENDIAN ? 1 : last - 1;
6386 if (GET_CODE (XVECEXP (v, 0, sign_ix)) != CONST_INT)
6387 return 0;
6388 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (v));
6389 sign = (INTVAL (XVECEXP (v, 0, sign_ix)) >> (unit_size * BITS_PER_UNIT - 1)
6390 ? constm1_rtx : const0_rtx);
6391 i = XVECLEN (v, 0) - 1;
6393 if (i != least && i != sign_ix && XVECEXP (v, 0, i) != sign)
6394 return 0;
6395 while (--i);
6396 return 1;
6400 sh_const_vec (v, mode)
6401 rtx v;
6402 enum machine_mode mode;
6404 int i;
6406 if (GET_CODE (v) != CONST_VECTOR
6407 || (GET_MODE (v) != mode && mode != VOIDmode))
6408 return 0;
6409 i = XVECLEN (v, 0) - 1;
6410 for (; i >= 0; i--)
6411 if (GET_CODE (XVECEXP (v, 0, i)) != CONST_INT)
6412 return 0;
6413 return 1;
6416 /* Return the destination address of a branch. */
6418 static int
6419 branch_dest (branch)
6420 rtx branch;
6422 rtx dest = SET_SRC (PATTERN (branch));
6423 int dest_uid;
6425 if (GET_CODE (dest) == IF_THEN_ELSE)
6426 dest = XEXP (dest, 1);
6427 dest = XEXP (dest, 0);
6428 dest_uid = INSN_UID (dest);
6429 return INSN_ADDRESSES (dest_uid);
6432 /* Return nonzero if REG is not used after INSN.
6433 We assume REG is a reload reg, and therefore does
6434 not live past labels. It may live past calls or jumps though. */
6436 reg_unused_after (reg, insn)
6437 rtx reg;
6438 rtx insn;
6440 enum rtx_code code;
6441 rtx set;
6443 /* If the reg is set by this instruction, then it is safe for our
6444 case. Disregard the case where this is a store to memory, since
6445 we are checking a register used in the store address. */
6446 set = single_set (insn);
6447 if (set && GET_CODE (SET_DEST (set)) != MEM
6448 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
6449 return 1;
6451 while ((insn = NEXT_INSN (insn)))
6453 code = GET_CODE (insn);
6455 #if 0
6456 /* If this is a label that existed before reload, then the register
6457 if dead here. However, if this is a label added by reorg, then
6458 the register may still be live here. We can't tell the difference,
6459 so we just ignore labels completely. */
6460 if (code == CODE_LABEL)
6461 return 1;
6462 /* else */
6463 #endif
6465 if (code == JUMP_INSN)
6466 return 0;
6468 /* If this is a sequence, we must handle them all at once.
6469 We could have for instance a call that sets the target register,
6470 and an insn in a delay slot that uses the register. In this case,
6471 we must return 0. */
6472 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
6474 int i;
6475 int retval = 0;
6477 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
6479 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
6480 rtx set = single_set (this_insn);
6482 if (GET_CODE (this_insn) == CALL_INSN)
6483 code = CALL_INSN;
6484 else if (GET_CODE (this_insn) == JUMP_INSN)
6486 if (INSN_ANNULLED_BRANCH_P (this_insn))
6487 return 0;
6488 code = JUMP_INSN;
6491 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
6492 return 0;
6493 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
6495 if (GET_CODE (SET_DEST (set)) != MEM)
6496 retval = 1;
6497 else
6498 return 0;
6500 if (set == 0
6501 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
6502 return 0;
6504 if (retval == 1)
6505 return 1;
6506 else if (code == JUMP_INSN)
6507 return 0;
6509 else if (GET_RTX_CLASS (code) == 'i')
6511 rtx set = single_set (insn);
6513 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
6514 return 0;
6515 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
6516 return GET_CODE (SET_DEST (set)) != MEM;
6517 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
6518 return 0;
6521 if (code == CALL_INSN && call_used_regs[REGNO (reg)])
6522 return 1;
6524 return 1;
6527 #include "ggc.h"
6529 static GTY(()) rtx fpscr_rtx;
6531 get_fpscr_rtx ()
6533 if (! fpscr_rtx)
6535 fpscr_rtx = gen_rtx (REG, PSImode, FPSCR_REG);
6536 REG_USERVAR_P (fpscr_rtx) = 1;
6537 mark_user_reg (fpscr_rtx);
6539 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
6540 mark_user_reg (fpscr_rtx);
6541 return fpscr_rtx;
6544 void
6545 emit_sf_insn (pat)
6546 rtx pat;
6548 emit_insn (pat);
6551 void
6552 emit_df_insn (pat)
6553 rtx pat;
6555 emit_insn (pat);
6558 void
6559 expand_sf_unop (fun, operands)
6560 rtx (*fun) PARAMS ((rtx, rtx, rtx));
6561 rtx *operands;
6563 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
6566 void
6567 expand_sf_binop (fun, operands)
6568 rtx (*fun) PARAMS ((rtx, rtx, rtx, rtx));
6569 rtx *operands;
6571 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
6572 get_fpscr_rtx ()));
6575 void
6576 expand_df_unop (fun, operands)
6577 rtx (*fun) PARAMS ((rtx, rtx, rtx));
6578 rtx *operands;
6580 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
6583 void
6584 expand_df_binop (fun, operands)
6585 rtx (*fun) PARAMS ((rtx, rtx, rtx, rtx));
6586 rtx *operands;
6588 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
6589 get_fpscr_rtx ()));
6592 /* ??? gcc does flow analysis strictly after common subexpression
6593 elimination. As a result, common subespression elimination fails
6594 when there are some intervening statements setting the same register.
6595 If we did nothing about this, this would hurt the precision switching
6596 for SH4 badly. There is some cse after reload, but it is unable to
6597 undo the extra register pressure from the unused instructions, and
6598 it cannot remove auto-increment loads.
6600 A C code example that shows this flow/cse weakness for (at least) SH
6601 and sparc (as of gcc ss-970706) is this:
6603 double
6604 f(double a)
6606 double d;
6607 d = 0.1;
6608 a += d;
6609 d = 1.1;
6610 d = 0.1;
6611 a *= d;
6612 return a;
6615 So we add another pass before common subexpression elimination, to
6616 remove assignments that are dead due to a following assignment in the
6617 same basic block. */
6619 static void
6620 mark_use (x, reg_set_block)
6621 rtx x, *reg_set_block;
6623 enum rtx_code code;
6625 if (! x)
6626 return;
6627 code = GET_CODE (x);
6628 switch (code)
6630 case REG:
6632 int regno = REGNO (x);
6633 int nregs = (regno < FIRST_PSEUDO_REGISTER
6634 ? HARD_REGNO_NREGS (regno, GET_MODE (x))
6635 : 1);
6638 reg_set_block[regno + nregs - 1] = 0;
6640 while (--nregs);
6641 break;
6643 case SET:
6645 rtx dest = SET_DEST (x);
6647 if (GET_CODE (dest) == SUBREG)
6648 dest = SUBREG_REG (dest);
6649 if (GET_CODE (dest) != REG)
6650 mark_use (dest, reg_set_block);
6651 mark_use (SET_SRC (x), reg_set_block);
6652 break;
6654 case CLOBBER:
6655 break;
6656 default:
6658 const char *fmt = GET_RTX_FORMAT (code);
6659 int i, j;
6660 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
6662 if (fmt[i] == 'e')
6663 mark_use (XEXP (x, i), reg_set_block);
6664 else if (fmt[i] == 'E')
6665 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
6666 mark_use (XVECEXP (x, i, j), reg_set_block);
6668 break;
6673 static rtx get_free_reg PARAMS ((HARD_REG_SET));
6675 /* This function returns a register to use to load the address to load
6676 the fpscr from. Currently it always returns r1 or r7, but when we are
6677 able to use pseudo registers after combine, or have a better mechanism
6678 for choosing a register, it should be done here. */
6679 /* REGS_LIVE is the liveness information for the point for which we
6680 need this allocation. In some bare-bones exit blocks, r1 is live at the
6681 start. We can even have all of r0..r3 being live:
6682 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
6683 INSN before which new insns are placed with will clobber the register
6684 we return. If a basic block consists only of setting the return value
6685 register to a pseudo and using that register, the return value is not
6686 live before or after this block, yet we we'll insert our insns right in
6687 the middle. */
6689 static rtx
6690 get_free_reg (regs_live)
6691 HARD_REG_SET regs_live;
6693 if (! TEST_HARD_REG_BIT (regs_live, 1))
6694 return gen_rtx_REG (Pmode, 1);
6696 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
6697 there shouldn't be anything but a jump before the function end. */
6698 if (! TEST_HARD_REG_BIT (regs_live, 7))
6699 return gen_rtx_REG (Pmode, 7);
6701 abort ();
6704 /* This function will set the fpscr from memory.
6705 MODE is the mode we are setting it to. */
6706 void
6707 fpscr_set_from_mem (mode, regs_live)
6708 int mode;
6709 HARD_REG_SET regs_live;
6711 enum attr_fp_mode fp_mode = mode;
6712 rtx addr_reg = get_free_reg (regs_live);
6714 if (fp_mode == (enum attr_fp_mode) NORMAL_MODE (FP_MODE))
6715 emit_insn (gen_fpu_switch1 (addr_reg));
6716 else
6717 emit_insn (gen_fpu_switch0 (addr_reg));
6720 /* Is the given character a logical line separator for the assembler? */
6721 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
6722 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
6723 #endif
6726 sh_insn_length_adjustment (insn)
6727 rtx insn;
6729 /* Instructions with unfilled delay slots take up an extra two bytes for
6730 the nop in the delay slot. */
6731 if (((GET_CODE (insn) == INSN
6732 && GET_CODE (PATTERN (insn)) != USE
6733 && GET_CODE (PATTERN (insn)) != CLOBBER)
6734 || GET_CODE (insn) == CALL_INSN
6735 || (GET_CODE (insn) == JUMP_INSN
6736 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
6737 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
6738 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
6739 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
6740 return 2;
6742 /* sh-dsp parallel processing insn take four bytes instead of two. */
6744 if (GET_CODE (insn) == INSN)
6746 int sum = 0;
6747 rtx body = PATTERN (insn);
6748 const char *template;
6749 char c;
6750 int maybe_label = 1;
6752 if (GET_CODE (body) == ASM_INPUT)
6753 template = XSTR (body, 0);
6754 else if (asm_noperands (body) >= 0)
6755 template
6756 = decode_asm_operands (body, NULL, NULL, NULL, NULL);
6757 else
6758 return 0;
6761 int ppi_adjust = 0;
6764 c = *template++;
6765 while (c == ' ' || c == '\t');
6766 /* all sh-dsp parallel-processing insns start with p.
6767 The only non-ppi sh insn starting with p is pref.
6768 The only ppi starting with pr is prnd. */
6769 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
6770 ppi_adjust = 2;
6771 /* The repeat pseudo-insn expands two three insns, a total of
6772 six bytes in size. */
6773 else if ((c == 'r' || c == 'R')
6774 && ! strncasecmp ("epeat", template, 5))
6775 ppi_adjust = 4;
6776 while (c && c != '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c))
6778 /* If this is a label, it is obviously not a ppi insn. */
6779 if (c == ':' && maybe_label)
6781 ppi_adjust = 0;
6782 break;
6784 else if (c == '\'' || c == '"')
6785 maybe_label = 0;
6786 c = *template++;
6788 sum += ppi_adjust;
6789 maybe_label = c != ':';
6791 while (c);
6792 return sum;
6794 return 0;
6797 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
6798 isn't protected by a PIC unspec. */
6800 nonpic_symbol_mentioned_p (x)
6801 rtx x;
6803 register const char *fmt;
6804 register int i;
6806 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
6807 || GET_CODE (x) == PC)
6808 return 1;
6810 /* We don't want to look into the possible MEM location of a
6811 CONST_DOUBLE, since we're not going to use it, in general. */
6812 if (GET_CODE (x) == CONST_DOUBLE)
6813 return 0;
6815 if (GET_CODE (x) == UNSPEC
6816 && (XINT (x, 1) == UNSPEC_PIC
6817 || XINT (x, 1) == UNSPEC_GOT
6818 || XINT (x, 1) == UNSPEC_GOTOFF
6819 || XINT (x, 1) == UNSPEC_GOTPLT
6820 || XINT (x, 1) == UNSPEC_PLT))
6821 return 0;
6823 fmt = GET_RTX_FORMAT (GET_CODE (x));
6824 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6826 if (fmt[i] == 'E')
6828 register int j;
6830 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
6831 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
6832 return 1;
6834 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
6835 return 1;
6838 return 0;
6841 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
6842 @GOTOFF in `reg'. */
6844 legitimize_pic_address (orig, mode, reg)
6845 rtx orig;
6846 enum machine_mode mode ATTRIBUTE_UNUSED;
6847 rtx reg;
6849 if (GET_CODE (orig) == LABEL_REF
6850 || (GET_CODE (orig) == SYMBOL_REF
6851 && (CONSTANT_POOL_ADDRESS_P (orig)
6852 /* SYMBOL_REF_FLAG is set on static symbols. */
6853 || SYMBOL_REF_FLAG (orig))))
6855 if (reg == 0)
6856 reg = gen_reg_rtx (Pmode);
6858 emit_insn (gen_symGOTOFF2reg (reg, orig));
6859 return reg;
6861 else if (GET_CODE (orig) == SYMBOL_REF)
6863 if (reg == 0)
6864 reg = gen_reg_rtx (Pmode);
6866 emit_insn (gen_symGOT2reg (reg, orig));
6867 return reg;
6869 return orig;
6872 /* Mark the use of a constant in the literal table. If the constant
6873 has multiple labels, make it unique. */
6874 static rtx
6875 mark_constant_pool_use (x)
6876 rtx x;
6878 rtx insn, lab, pattern;
6880 if (x == NULL)
6881 return x;
6883 switch (GET_CODE (x))
6885 case LABEL_REF:
6886 x = XEXP (x, 0);
6887 case CODE_LABEL:
6888 break;
6889 default:
6890 return x;
6893 /* Get the first label in the list of labels for the same constant
6894 and delete another labels in the list. */
6895 lab = x;
6896 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
6898 if (GET_CODE (insn) != CODE_LABEL
6899 || LABEL_REFS (insn) != NEXT_INSN (insn))
6900 break;
6901 lab = insn;
6904 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
6905 INSN_DELETED_P (insn) = 1;
6907 /* Mark constants in a window. */
6908 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
6910 if (GET_CODE (insn) != INSN)
6911 continue;
6913 pattern = PATTERN (insn);
6914 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
6915 continue;
6917 switch (XINT (pattern, 1))
6919 case UNSPECV_CONST2:
6920 case UNSPECV_CONST4:
6921 case UNSPECV_CONST8:
6922 XVECEXP (pattern, 0, 1) = const1_rtx;
6923 break;
6924 case UNSPECV_WINDOW_END:
6925 if (XVECEXP (pattern, 0, 0) == x)
6926 return lab;
6927 break;
6928 case UNSPECV_CONST_END:
6929 return lab;
6930 default:
6931 break;
6935 return lab;
6938 /* Return true if it's possible to redirect BRANCH1 to the destination
6939 of an unconditional jump BRANCH2. We only want to do this if the
6940 resulting branch will have a short displacement. */
6941 int
6942 sh_can_redirect_branch (branch1, branch2)
6943 rtx branch1;
6944 rtx branch2;
6946 if (flag_expensive_optimizations && simplejump_p (branch2))
6948 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
6949 rtx insn;
6950 int distance;
6952 for (distance = 0, insn = NEXT_INSN (branch1);
6953 insn && distance < 256;
6954 insn = PREV_INSN (insn))
6956 if (insn == dest)
6957 return 1;
6958 else
6959 distance += get_attr_length (insn);
6961 for (distance = 0, insn = NEXT_INSN (branch1);
6962 insn && distance < 256;
6963 insn = NEXT_INSN (insn))
6965 if (insn == dest)
6966 return 1;
6967 else
6968 distance += get_attr_length (insn);
6971 return 0;
6974 /* Return nonzero if register old_reg can be renamed to register new_reg. */
6976 sh_hard_regno_rename_ok (old_reg, new_reg)
6977 unsigned int old_reg ATTRIBUTE_UNUSED;
6978 unsigned int new_reg;
6981 /* Interrupt functions can only use registers that have already been
6982 saved by the prologue, even if they would normally be
6983 call-clobbered. */
6985 if (sh_cfun_interrupt_handler_p () && !regs_ever_live[new_reg])
6986 return 0;
6988 return 1;
6991 /* A C statement (sans semicolon) to update the integer variable COST
6992 based on the relationship between INSN that is dependent on
6993 DEP_INSN through the dependence LINK. The default is to make no
6994 adjustment to COST. This can be used for example to specify to
6995 the scheduler that an output- or anti-dependence does not incur
6996 the same cost as a data-dependence. */
6997 static int
6998 sh_adjust_cost (insn, link, dep_insn, cost)
6999 rtx insn;
7000 rtx link ATTRIBUTE_UNUSED;
7001 rtx dep_insn;
7002 int cost;
7004 rtx reg, use_pat;
7006 if (TARGET_SHMEDIA)
7008 /* On SHmedia, if the dependence is an anti-dependence or
7009 output-dependence, there is no cost. */
7010 if (REG_NOTE_KIND (link) != 0)
7011 cost = 0;
7013 if (get_attr_is_mac_media (insn)
7014 && get_attr_is_mac_media (dep_insn))
7015 cost = 1;
7017 else if (REG_NOTE_KIND (link) == 0)
7019 enum attr_type dep_type, type;
7021 if (recog_memoized (insn) < 0
7022 || recog_memoized (dep_insn) < 0)
7023 return;
7025 dep_type = get_attr_type (dep_insn);
7026 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
7027 cost--;
7028 if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
7029 && (type = get_attr_type (insn)) != TYPE_CALL
7030 && type != TYPE_SFUNC)
7031 cost--;
7033 /* The only input for a call that is timing-critical is the
7034 function's address. */
7035 if (GET_CODE(insn) == CALL_INSN)
7037 rtx call = PATTERN (insn);
7039 if (GET_CODE (call) == PARALLEL)
7040 call = XVECEXP (call, 0 ,0);
7041 if (GET_CODE (call) == SET)
7042 call = SET_SRC (call);
7043 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
7044 && ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn))
7045 cost = 0;
7047 /* Likewise, the most timing critical input for an sfuncs call
7048 is the function address. However, sfuncs typically start
7049 using their arguments pretty quickly.
7050 Assume a four cycle delay before they are needed. */
7051 /* All sfunc calls are parallels with at least four components.
7052 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
7053 else if (GET_CODE (PATTERN (insn)) == PARALLEL
7054 && XVECLEN (PATTERN (insn), 0) >= 4
7055 && (reg = sfunc_uses_reg (insn)))
7057 if (! reg_set_p (reg, dep_insn))
7058 cost -= 4;
7060 /* When the preceding instruction loads the shift amount of
7061 the following SHAD/SHLD, the latency of the load is increased
7062 by 1 cycle. */
7063 else if (TARGET_SH4
7064 && get_attr_type (insn) == TYPE_DYN_SHIFT
7065 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
7066 && reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)),
7067 XEXP (SET_SRC (single_set(insn)),
7068 1)))
7069 cost++;
7070 /* When an LS group instruction with a latency of less than
7071 3 cycles is followed by a double-precision floating-point
7072 instruction, FIPR, or FTRV, the latency of the first
7073 instruction is increased to 3 cycles. */
7074 else if (cost < 3
7075 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
7076 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
7077 cost = 3;
7078 /* The lsw register of a double-precision computation is ready one
7079 cycle earlier. */
7080 else if (reload_completed
7081 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
7082 && (use_pat = single_set (insn))
7083 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
7084 SET_SRC (use_pat)))
7085 cost -= 1;
7087 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
7088 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
7089 cost -= 1;
7091 /* An anti-dependence penalty of two applies if the first insn is a double
7092 precision fadd / fsub / fmul. */
7093 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI
7094 && recog_memoized (dep_insn) >= 0
7095 && get_attr_type (dep_insn) == TYPE_DFP_ARITH
7096 /* A lot of alleged anti-flow dependences are fake,
7097 so check this one is real. */
7098 && flow_dependent_p (dep_insn, insn))
7099 cost = 2;
7102 return cost;
7105 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
7106 if DEP_INSN is anti-flow dependent on INSN. */
7107 static int
7108 flow_dependent_p (insn, dep_insn)
7109 rtx insn, dep_insn;
7111 rtx tmp = PATTERN (insn);
7113 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
7114 return tmp == NULL_RTX;
7117 /* A helper function for flow_dependent_p called through note_stores. */
7118 static void
7119 flow_dependent_p_1 (x, pat, data)
7120 rtx x;
7121 rtx pat ATTRIBUTE_UNUSED;
7122 void *data;
7124 rtx * pinsn = (rtx *) data;
7126 if (*pinsn && reg_referenced_p (x, *pinsn))
7127 *pinsn = NULL_RTX;
7130 /* For use by ALLOCATE_INITIAL_VALUE. Note that sh.md contains some
7131 'special function' patterns (type sfunc) that clobber pr, but that
7132 do not look like function calls to leaf_function_p. Hence we must
7133 do this extra check. */
7135 sh_pr_n_sets ()
7137 return REG_N_SETS (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
7140 /* This Function returns nonzero if the DFA based scheduler interface
7141 is to be used. At present this is supported for the SH4 only. */
7142 static int
7143 sh_use_dfa_interface()
7145 if (TARGET_HARD_SH4)
7146 return 1;
7147 else
7148 return 0;
7151 /* This function returns "2" to indicate dual issue for the SH4
7152 processor. To be used by the DFA pipeline description. */
7153 static int
7154 sh_issue_rate()
7156 if (TARGET_SUPERSCALAR)
7157 return 2;
7158 else
7159 return 1;
7162 /* SHmedia requires registers for branches, so we can't generate new
7163 branches past reload. */
7164 static bool
7165 sh_cannot_modify_jumps_p ()
7167 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
7170 static bool
7171 sh_ms_bitfield_layout_p (record_type)
7172 tree record_type ATTRIBUTE_UNUSED;
7174 return TARGET_SH5;
7177 /* If using PIC, mark a SYMBOL_REF for a non-global symbol so that we
7178 may access it using GOTOFF instead of GOT. */
7180 static void
7181 sh_encode_section_info (decl, first)
7182 tree decl;
7183 int first;
7185 rtx rtl, symbol;
7187 if (DECL_P (decl))
7188 rtl = DECL_RTL (decl);
7189 else
7190 rtl = TREE_CST_RTL (decl);
7191 if (GET_CODE (rtl) != MEM)
7192 return;
7193 symbol = XEXP (rtl, 0);
7194 if (GET_CODE (symbol) != SYMBOL_REF)
7195 return;
7197 if (flag_pic)
7198 SYMBOL_REF_FLAG (symbol) = (*targetm.binds_local_p) (decl);
7200 if (TARGET_SH5 && first && TREE_CODE (decl) != FUNCTION_DECL)
7201 XEXP (rtl, 0) = gen_datalabel_ref (symbol);
7204 /* Undo the effects of the above. */
7206 static const char *
7207 sh_strip_name_encoding (str)
7208 const char *str;
7210 STRIP_DATALABEL_ENCODING (str, str);
7211 str += *str == '*';
7212 return str;
7217 On the SH1..SH4, the trampoline looks like
7218 2 0002 D202 mov.l l2,r2
7219 1 0000 D301 mov.l l1,r3
7220 3 0004 422B jmp @r2
7221 4 0006 0009 nop
7222 5 0008 00000000 l1: .long area
7223 6 000c 00000000 l2: .long function
7225 SH5 (compact) uses r1 instead of r3 for the static chain. */
7228 /* Emit RTL insns to initialize the variable parts of a trampoline.
7229 FNADDR is an RTX for the address of the function's pure code.
7230 CXT is an RTX for the static chain value for the function. */
7232 void
7233 sh_initialize_trampoline (tramp, fnaddr, cxt)
7234 rtx tramp, fnaddr, cxt;
7236 if (TARGET_SHMEDIA64)
7238 rtx tramp_templ;
7239 int fixed_len;
7241 rtx movi1 = GEN_INT (0xcc000010);
7242 rtx shori1 = GEN_INT (0xc8000010);
7243 rtx src, dst;
7245 /* The following trampoline works within a +- 128 KB range for cxt:
7246 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
7247 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
7248 gettr tr1,r1; blink tr0,r63 */
7249 /* Address rounding makes it hard to compute the exact bounds of the
7250 offset for this trampoline, but we have a rather generous offset
7251 range, so frame_offset should do fine as an upper bound. */
7252 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
7254 /* ??? could optimize this trampoline initialization
7255 by writing DImode words with two insns each. */
7256 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
7257 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
7258 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
7259 insn = gen_rtx_AND (DImode, insn, mask);
7260 /* Or in ptb/u .,tr1 pattern */
7261 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
7262 insn = force_operand (insn, NULL_RTX);
7263 insn = gen_lowpart (SImode, insn);
7264 emit_move_insn (gen_rtx_MEM (SImode, tramp), insn);
7265 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
7266 insn = gen_rtx_AND (DImode, insn, mask);
7267 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
7268 insn = gen_lowpart (SImode, insn);
7269 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)), insn);
7270 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
7271 insn = gen_rtx_AND (DImode, insn, mask);
7272 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
7273 insn = gen_lowpart (SImode, insn);
7274 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)), insn);
7275 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
7276 insn = gen_rtx_AND (DImode, insn, mask);
7277 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
7278 insn = gen_lowpart (SImode, insn);
7279 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
7280 insn);
7281 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
7282 insn = gen_rtx_AND (DImode, insn, mask);
7283 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
7284 insn = gen_lowpart (SImode, insn);
7285 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 16)),
7286 insn);
7287 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 20)),
7288 GEN_INT (0x6bf10600));
7289 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 24)),
7290 GEN_INT (0x4415fc10));
7291 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 28)),
7292 GEN_INT (0x4401fff0));
7293 emit_insn (gen_ic_invalidate_line (tramp));
7294 return;
7296 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
7297 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
7299 tramp_templ = gen_datalabel_ref (tramp_templ);
7300 dst = gen_rtx_MEM (BLKmode, tramp);
7301 src = gen_rtx_MEM (BLKmode, tramp_templ);
7302 set_mem_align (dst, 256);
7303 set_mem_align (src, 64);
7304 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
7306 emit_move_insn (gen_rtx_MEM (Pmode, plus_constant (tramp, fixed_len)),
7307 fnaddr);
7308 emit_move_insn (gen_rtx_MEM (Pmode,
7309 plus_constant (tramp,
7310 fixed_len
7311 + GET_MODE_SIZE (Pmode))),
7312 cxt);
7313 emit_insn (gen_ic_invalidate_line (tramp));
7314 return;
7316 else if (TARGET_SHMEDIA)
7318 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
7319 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
7320 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
7321 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
7322 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
7323 rotated 10 right, and higer 16 bit of every 32 selected. */
7324 rtx movishori
7325 = force_reg (V2HImode, (simplify_gen_subreg
7326 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
7327 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
7328 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
7330 tramp = force_reg (Pmode, tramp);
7331 fnaddr = force_reg (SImode, fnaddr);
7332 cxt = force_reg (SImode, cxt);
7333 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
7334 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
7335 movishori));
7336 emit_insn (gen_rotldi3_mextr (quad0, quad0,
7337 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
7338 emit_insn (gen_ashldi3_media (quad0, quad0, GEN_INT (2)));
7339 emit_move_insn (gen_rtx_MEM (DImode, tramp), quad0);
7340 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
7341 gen_rtx_SUBREG (V2HImode, cxt, 0),
7342 movishori));
7343 emit_insn (gen_rotldi3_mextr (cxtload, cxtload,
7344 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
7345 emit_insn (gen_ashldi3_media (cxtload, cxtload, GEN_INT (2)));
7346 if (TARGET_LITTLE_ENDIAN)
7348 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
7349 emit_insn (gen_mextr4 (quad2, cxtload, blink));
7351 else
7353 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
7354 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
7356 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 8)), quad1);
7357 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), quad2);
7358 emit_insn (gen_ic_invalidate_line (tramp));
7359 return;
7361 else if (TARGET_SHCOMPACT)
7363 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
7364 return;
7366 emit_move_insn (gen_rtx_MEM (SImode, tramp),
7367 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
7368 SImode));
7369 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
7370 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
7371 SImode));
7372 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
7373 cxt);
7374 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
7375 fnaddr);
7376 if (TARGET_HARVARD)
7378 if (TARGET_USERMODE)
7379 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__ic_invalidate"),
7380 0, VOIDmode, 1, tramp, SImode);
7381 else
7382 emit_insn (gen_ic_invalidate_line (tramp));
7387 /* Machine specific built-in functions. */
7389 struct builtin_description
7391 const enum insn_code icode;
7392 const char *const name;
7393 int signature;
7396 /* describe number and signedness of arguments; arg[0] == result
7397 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
7398 static const char signature_args[][4] =
7400 #define SH_BLTIN_V2SI2 0
7401 { 4, 4 },
7402 #define SH_BLTIN_V4HI2 1
7403 { 4, 4 },
7404 #define SH_BLTIN_V2SI3 2
7405 { 4, 4, 4 },
7406 #define SH_BLTIN_V4HI3 3
7407 { 4, 4, 4 },
7408 #define SH_BLTIN_V8QI3 4
7409 { 4, 4, 4 },
7410 #define SH_BLTIN_MAC_HISI 5
7411 { 1, 4, 4, 1 },
7412 #define SH_BLTIN_SH_HI 6
7413 { 4, 4, 1 },
7414 #define SH_BLTIN_SH_SI 7
7415 { 4, 4, 1 },
7416 #define SH_BLTIN_V4HI2V2SI 8
7417 { 4, 4, 4 },
7418 #define SH_BLTIN_V4HI2V8QI 9
7419 { 4, 4, 4 },
7420 #define SH_BLTIN_SISF 10
7421 { 4, 2 },
7422 #define SH_BLTIN_LDUA_L 11
7423 { 2, 8 },
7424 #define SH_BLTIN_LDUA_Q 12
7425 { 1, 8 },
7426 #define SH_BLTIN_STUA_L 13
7427 { 0, 8, 2 },
7428 #define SH_BLTIN_STUA_Q 14
7429 { 0, 8, 1 },
7430 #define SH_BLTIN_UDI 15
7431 { 0, 8, 1 },
7432 #define SH_BLTIN_NUM_SHARED_SIGNATURES 16
7433 #define SH_BLTIN_2 16
7434 #define SH_BLTIN_SU 16
7435 { 1, 2 },
7436 #define SH_BLTIN_3 17
7437 #define SH_BLTIN_SUS 17
7438 { 2, 2, 1 },
7439 #define SH_BLTIN_PSSV 18
7440 { 0, 8, 2, 2 },
7441 #define SH_BLTIN_XXUU 19
7442 #define SH_BLTIN_UUUU 19
7443 { 1, 1, 1, 1 },
7444 #define SH_BLTIN_PV 20
7445 { 0, 8 },
7447 /* mcmv: operands considered unsigned. */
7448 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
7449 /* mperm: control value considered unsigned int. */
7450 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
7451 /* mshards_q: returns signed short. */
7452 /* nsb: takes long long arg, returns unsigned char. */
7453 static const struct builtin_description bdesc[] =
7455 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
7456 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
7457 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
7458 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
7459 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
7460 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
7461 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
7462 #if 0
7463 { CODE_FOR_alloco32, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
7464 { CODE_FOR_alloco64, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
7465 #endif
7466 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
7467 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
7468 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
7469 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
7470 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
7471 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
7472 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
7473 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
7474 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
7475 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
7476 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_UDI },
7477 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_UDI },
7478 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_UDI },
7479 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_UDI },
7480 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_UDI },
7481 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_UDI },
7482 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_UDI },
7483 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
7484 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
7485 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
7486 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
7487 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
7488 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
7489 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
7490 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
7491 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
7492 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
7493 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
7494 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
7495 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
7496 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
7497 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
7498 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
7499 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
7500 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
7501 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
7502 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
7503 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
7504 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
7505 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
7506 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
7507 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
7508 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
7509 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
7510 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
7511 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
7512 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
7513 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
7514 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
7515 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
7516 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
7517 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
7518 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
7519 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
7520 #if 0
7521 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
7522 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
7523 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
7524 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
7525 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
7526 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
7527 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
7528 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
7529 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
7530 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
7531 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
7532 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
7533 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
7534 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
7535 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
7536 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
7537 #endif
7538 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
7539 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
7540 #if 0
7541 { CODE_FOR_prefetch32,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
7542 { CODE_FOR_prefetch64,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV }
7543 #endif
7546 static void
7547 sh_media_init_builtins ()
7549 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
7550 const struct builtin_description *d;
7552 memset (shared, 0, sizeof shared);
7553 for (d = bdesc; d - bdesc < (int) (sizeof bdesc / sizeof bdesc[0]); d++)
7555 tree type, arg_type;
7556 int signature = d->signature;
7557 int i;
7559 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
7560 type = shared[signature];
7561 else
7563 int has_result = signature_args[signature][0] != 0;
7565 if (signature_args[signature][1] == 8
7566 && (insn_data[d->icode].operand[has_result].mode != Pmode))
7567 continue;
7568 if (! TARGET_FPU_ANY
7569 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
7570 continue;
7571 type = void_list_node;
7572 for (i = 3; ; i--)
7574 int arg = signature_args[signature][i];
7575 int opno = i - 1 + has_result;
7577 if (arg == 8)
7578 arg_type = ptr_type_node;
7579 else if (arg)
7580 arg_type = ((*lang_hooks.types.type_for_mode)
7581 (insn_data[d->icode].operand[opno].mode,
7582 (arg & 1)));
7583 else if (i)
7584 continue;
7585 else
7586 arg_type = void_type_node;
7587 if (i == 0)
7588 break;
7589 type = tree_cons (NULL_TREE, arg_type, type);
7591 type = build_function_type (arg_type, type);
7592 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
7593 shared[signature] = type;
7595 builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
7596 NULL, NULL_TREE);
7600 static void
7601 sh_init_builtins ()
7603 if (TARGET_SHMEDIA)
7604 sh_media_init_builtins ();
7607 /* Expand an expression EXP that calls a built-in function,
7608 with result going to TARGET if that's convenient
7609 (and in mode MODE if that's convenient).
7610 SUBTARGET may be used as the target for computing one of EXP's operands.
7611 IGNORE is nonzero if the value is to be ignored. */
7613 static rtx
7614 sh_expand_builtin (exp, target, subtarget, mode, ignore)
7615 tree exp;
7616 rtx target;
7617 rtx subtarget ATTRIBUTE_UNUSED;
7618 enum machine_mode mode ATTRIBUTE_UNUSED;
7619 int ignore;
7621 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
7622 tree arglist = TREE_OPERAND (exp, 1);
7623 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
7624 const struct builtin_description *d = &bdesc[fcode];
7625 enum insn_code icode = d->icode;
7626 int signature = d->signature;
7627 enum machine_mode tmode = VOIDmode;
7628 int nop = 0, i;
7629 rtx op[4];
7630 rtx pat;
7632 if (signature_args[signature][0])
7634 if (ignore)
7635 return 0;
7637 tmode = insn_data[icode].operand[0].mode;
7638 if (! target
7639 || GET_MODE (target) != tmode
7640 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
7641 target = gen_reg_rtx (tmode);
7642 op[nop++] = target;
7644 else
7645 target = 0;
7647 for (i = 1; i <= 3; i++, nop++)
7649 tree arg;
7650 enum machine_mode opmode, argmode;
7652 if (! signature_args[signature][i])
7653 break;
7654 arg = TREE_VALUE (arglist);
7655 if (arg == error_mark_node)
7656 return const0_rtx;
7657 arglist = TREE_CHAIN (arglist);
7658 opmode = insn_data[icode].operand[nop].mode;
7659 argmode = TYPE_MODE (TREE_TYPE (arg));
7660 if (argmode != opmode)
7661 arg = build1 (NOP_EXPR,
7662 (*lang_hooks.types.type_for_mode) (opmode, 0), arg);
7663 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
7664 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
7665 op[nop] = copy_to_mode_reg (opmode, op[nop]);
7668 switch (nop)
7670 case 1:
7671 pat = (*insn_data[d->icode].genfun) (op[0]);
7672 break;
7673 case 2:
7674 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
7675 break;
7676 case 3:
7677 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
7678 break;
7679 case 4:
7680 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
7681 break;
7683 if (! pat)
7684 return 0;
7685 emit_insn (pat);
7686 return target;
7689 void
7690 sh_expand_unop_v2sf (code, op0, op1)
7691 enum rtx_code code;
7692 rtx op0, op1;
7694 rtx sel0 = const0_rtx;
7695 rtx sel1 = const1_rtx;
7696 rtx (*fn) PARAMS ((rtx, rtx, rtx, rtx, rtx)) = gen_unary_sf_op;
7697 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
7699 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
7700 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
7703 void
7704 sh_expand_binop_v2sf (code, op0, op1, op2)
7705 enum rtx_code code;
7706 rtx op0, op1, op2;
7708 rtx sel0 = const0_rtx;
7709 rtx sel1 = const1_rtx;
7710 rtx (*fn) PARAMS ((rtx, rtx, rtx, rtx, rtx, rtx, rtx)) = gen_binary_sf_op;
7711 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
7713 emit_insn ((*fn) (op0, op1, op2, op, sel0, sel0, sel0));
7714 emit_insn ((*fn) (op0, op1, op2, op, sel1, sel1, sel1));
7717 #include "gt-sh.h"