use rtx_insn * more in reorg.c
[official-gcc.git] / gcc / config / sh / sh-mem.cc
blobd09209004a670d72991e7f881e9afba52ab61335
1 /* Helper routines for memory move and comparison insns.
2 Copyright (C) 2013-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "machmode.h"
25 #include "rtl.h"
26 #include "hash-set.h"
27 #include "vec.h"
28 #include "double-int.h"
29 #include "input.h"
30 #include "alias.h"
31 #include "symtab.h"
32 #include "wide-int.h"
33 #include "inchash.h"
34 #include "tree.h"
35 #include "hashtab.h"
36 #include "hard-reg-set.h"
37 #include "function.h"
38 #include "flags.h"
39 #include "statistics.h"
40 #include "real.h"
41 #include "fixed-value.h"
42 #include "insn-config.h"
43 #include "expmed.h"
44 #include "dojump.h"
45 #include "explow.h"
46 #include "calls.h"
47 #include "emit-rtl.h"
48 #include "varasm.h"
49 #include "stmt.h"
50 #include "expr.h"
51 #include "tm_p.h"
52 #include "predict.h"
53 #include "dominance.h"
54 #include "cfg.h"
55 #include "cfgrtl.h"
56 #include "cfganal.h"
57 #include "lcm.h"
58 #include "cfgbuild.h"
59 #include "cfgcleanup.h"
60 #include "basic-block.h"
62 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
63 static void
64 force_into (rtx value, rtx target)
66 value = force_operand (value, target);
67 if (! rtx_equal_p (value, target))
68 emit_insn (gen_move_insn (target, value));
71 /* Emit code to perform a block move. Choose the best method.
73 OPERANDS[0] is the destination.
74 OPERANDS[1] is the source.
75 OPERANDS[2] is the size.
76 OPERANDS[3] is the alignment safe to use. */
77 bool
78 expand_block_move (rtx *operands)
80 int align = INTVAL (operands[3]);
81 int constp = (CONST_INT_P (operands[2]));
82 int bytes = (constp ? INTVAL (operands[2]) : 0);
84 if (! constp)
85 return false;
87 /* If we could use mov.l to move words and dest is word-aligned, we
88 can use movua.l for loads and still generate a relatively short
89 and efficient sequence. */
90 if (TARGET_SH4A && align < 4
91 && MEM_ALIGN (operands[0]) >= 32
92 && can_move_by_pieces (bytes, 32))
94 rtx dest = copy_rtx (operands[0]);
95 rtx src = copy_rtx (operands[1]);
96 /* We could use different pseudos for each copied word, but
97 since movua can only load into r0, it's kind of
98 pointless. */
99 rtx temp = gen_reg_rtx (SImode);
100 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
101 int copied = 0;
103 while (copied + 4 <= bytes)
105 rtx to = adjust_address (dest, SImode, copied);
106 rtx from = adjust_automodify_address (src, BLKmode,
107 src_addr, copied);
109 set_mem_size (from, 4);
110 emit_insn (gen_movua (temp, from));
111 emit_move_insn (src_addr, plus_constant (Pmode, src_addr, 4));
112 emit_move_insn (to, temp);
113 copied += 4;
116 if (copied < bytes)
117 move_by_pieces (adjust_address (dest, BLKmode, copied),
118 adjust_automodify_address (src, BLKmode,
119 src_addr, copied),
120 bytes - copied, align, 0);
122 return true;
125 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
126 alignment, or if it isn't a multiple of 4 bytes, then fail. */
127 if (align < 4 || (bytes % 4 != 0))
128 return false;
130 if (TARGET_HARD_SH4)
132 if (bytes < 12)
133 return false;
134 else if (bytes == 12)
136 rtx func_addr_rtx = gen_reg_rtx (Pmode);
137 rtx r4 = gen_rtx_REG (SImode, 4);
138 rtx r5 = gen_rtx_REG (SImode, 5);
140 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
141 force_into (XEXP (operands[0], 0), r4);
142 force_into (XEXP (operands[1], 0), r5);
143 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
144 return true;
146 else if (! optimize_size)
148 const char *entry_name;
149 rtx func_addr_rtx = gen_reg_rtx (Pmode);
150 int dwords;
151 rtx r4 = gen_rtx_REG (SImode, 4);
152 rtx r5 = gen_rtx_REG (SImode, 5);
153 rtx r6 = gen_rtx_REG (SImode, 6);
155 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
156 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
157 force_into (XEXP (operands[0], 0), r4);
158 force_into (XEXP (operands[1], 0), r5);
160 dwords = bytes >> 3;
161 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
162 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
163 return true;
165 else
166 return false;
168 if (bytes < 64)
170 char entry[30];
171 rtx func_addr_rtx = gen_reg_rtx (Pmode);
172 rtx r4 = gen_rtx_REG (SImode, 4);
173 rtx r5 = gen_rtx_REG (SImode, 5);
175 sprintf (entry, "__movmemSI%d", bytes);
176 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
177 force_into (XEXP (operands[0], 0), r4);
178 force_into (XEXP (operands[1], 0), r5);
179 emit_insn (gen_block_move_real (func_addr_rtx));
180 return true;
183 /* This is the same number of bytes as a memcpy call, but to a different
184 less common function name, so this will occasionally use more space. */
185 if (! optimize_size)
187 rtx func_addr_rtx = gen_reg_rtx (Pmode);
188 int final_switch, while_loop;
189 rtx r4 = gen_rtx_REG (SImode, 4);
190 rtx r5 = gen_rtx_REG (SImode, 5);
191 rtx r6 = gen_rtx_REG (SImode, 6);
193 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
194 force_into (XEXP (operands[0], 0), r4);
195 force_into (XEXP (operands[1], 0), r5);
197 /* r6 controls the size of the move. 16 is decremented from it
198 for each 64 bytes moved. Then the negative bit left over is used
199 as an index into a list of move instructions. e.g., a 72 byte move
200 would be set up with size(r6) = 14, for one iteration through the
201 big while loop, and a switch of -2 for the last part. */
203 final_switch = 16 - ((bytes / 4) % 16);
204 while_loop = ((bytes / 4) / 16 - 1) * 16;
205 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
206 emit_insn (gen_block_lump_real (func_addr_rtx));
207 return true;
210 return false;
213 static const int prob_unlikely = REG_BR_PROB_BASE / 10;
214 static const int prob_likely = REG_BR_PROB_BASE / 4;
216 /* Emit code to perform a strcmp.
218 OPERANDS[0] is the destination.
219 OPERANDS[1] is the first string.
220 OPERANDS[2] is the second string.
221 OPERANDS[3] is the known alignment. */
222 bool
223 sh_expand_cmpstr (rtx *operands)
225 rtx addr1 = operands[1];
226 rtx addr2 = operands[2];
227 rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0));
228 rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0));
229 rtx tmp0 = gen_reg_rtx (SImode);
230 rtx tmp1 = gen_reg_rtx (SImode);
231 rtx tmp2 = gen_reg_rtx (SImode);
232 rtx tmp3 = gen_reg_rtx (SImode);
234 rtx jump;
235 rtx_code_label *L_return = gen_label_rtx ();
236 rtx_code_label *L_loop_byte = gen_label_rtx ();
237 rtx_code_label *L_end_loop_byte = gen_label_rtx ();
238 rtx_code_label *L_loop_long = gen_label_rtx ();
239 rtx_code_label *L_end_loop_long = gen_label_rtx ();
241 int align = INTVAL (operands[3]);
243 emit_move_insn (tmp0, const0_rtx);
245 if (align < 4)
247 emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr));
248 emit_insn (gen_tstsi_t (tmp1, GEN_INT (3)));
249 jump = emit_jump_insn (gen_branch_false (L_loop_byte));
250 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
253 addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0);
254 addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0);
256 /* tmp2 is aligned, OK to load. */
257 emit_move_insn (tmp3, addr2);
258 emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4));
260 /* start long loop. */
261 emit_label (L_loop_long);
263 emit_move_insn (tmp2, tmp3);
265 /* tmp1 is aligned, OK to load. */
266 emit_move_insn (tmp1, addr1);
267 emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 4));
269 /* Is there a 0 byte ? */
270 emit_insn (gen_andsi3 (tmp3, tmp3, tmp1));
272 emit_insn (gen_cmpstr_t (tmp0, tmp3));
273 jump = emit_jump_insn (gen_branch_true (L_end_loop_long));
274 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
276 emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
278 /* tmp2 is aligned, OK to load. */
279 emit_move_insn (tmp3, addr2);
280 emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4));
282 jump = emit_jump_insn (gen_branch_true (L_loop_long));
283 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
284 /* end loop. */
286 /* Fallthu, substract words. */
287 if (TARGET_LITTLE_ENDIAN)
289 rtx low_1 = gen_lowpart (HImode, tmp1);
290 rtx low_2 = gen_lowpart (HImode, tmp2);
292 emit_insn (gen_rotlhi3_8 (low_1, low_1));
293 emit_insn (gen_rotlhi3_8 (low_2, low_2));
294 emit_insn (gen_rotlsi3_16 (tmp1, tmp1));
295 emit_insn (gen_rotlsi3_16 (tmp2, tmp2));
296 emit_insn (gen_rotlhi3_8 (low_1, low_1));
297 emit_insn (gen_rotlhi3_8 (low_2, low_2));
300 jump = emit_jump_insn (gen_jump_compact (L_return));
301 emit_barrier_after (jump);
303 emit_label (L_end_loop_long);
305 emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, -4));
306 emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, -4));
308 /* start byte loop. */
309 addr1 = adjust_address (addr1, QImode, 0);
310 addr2 = adjust_address (addr2, QImode, 0);
312 emit_label (L_loop_byte);
314 emit_insn (gen_extendqisi2 (tmp2, addr2));
315 emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 1));
317 emit_insn (gen_extendqisi2 (tmp1, addr1));
318 emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 1));
320 emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
321 jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
322 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
324 emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
325 if (flag_delayed_branch)
326 emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
327 jump = emit_jump_insn (gen_branch_true (L_loop_byte));
328 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
329 /* end loop. */
331 emit_label (L_end_loop_byte);
333 if (! flag_delayed_branch)
334 emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
335 emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1)));
337 emit_label (L_return);
339 emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
341 return true;
344 /* Emit code to perform a strncmp.
346 OPERANDS[0] is the destination.
347 OPERANDS[1] is the first string.
348 OPERANDS[2] is the second string.
349 OPERANDS[3] is the length.
350 OPERANDS[4] is the known alignment. */
351 bool
352 sh_expand_cmpnstr (rtx *operands)
354 rtx addr1 = operands[1];
355 rtx addr2 = operands[2];
356 rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0));
357 rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0));
358 rtx tmp1 = gen_reg_rtx (SImode);
359 rtx tmp2 = gen_reg_rtx (SImode);
361 rtx jump;
362 rtx_code_label *L_return = gen_label_rtx ();
363 rtx_code_label *L_loop_byte = gen_label_rtx ();
364 rtx_code_label *L_end_loop_byte = gen_label_rtx ();
366 rtx len = force_reg (SImode, operands[3]);
367 int constp = CONST_INT_P (operands[3]);
369 /* Loop on a register count. */
370 if (constp)
372 rtx tmp0 = gen_reg_rtx (SImode);
373 rtx tmp3 = gen_reg_rtx (SImode);
374 rtx lenw = gen_reg_rtx (SImode);
376 rtx_code_label *L_loop_long = gen_label_rtx ();
377 rtx_code_label *L_end_loop_long = gen_label_rtx ();
379 int align = INTVAL (operands[4]);
380 int bytes = INTVAL (operands[3]);
381 int witers = bytes / 4;
383 if (witers > 1)
385 addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0);
386 addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0);
388 emit_move_insn (tmp0, const0_rtx);
390 if (align < 4)
392 emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr));
393 emit_insn (gen_tstsi_t (tmp1, GEN_INT (3)));
394 jump = emit_jump_insn (gen_branch_false (L_loop_byte));
395 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
398 /* word count. Do we have iterations ? */
399 emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2)));
401 /* start long loop. */
402 emit_label (L_loop_long);
404 /* tmp2 is aligned, OK to load. */
405 emit_move_insn (tmp2, addr2);
406 emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr,
407 GET_MODE_SIZE (SImode)));
409 /* tmp1 is aligned, OK to load. */
410 emit_move_insn (tmp1, addr1);
411 emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr,
412 GET_MODE_SIZE (SImode)));
414 /* Is there a 0 byte ? */
415 emit_insn (gen_andsi3 (tmp3, tmp2, tmp1));
417 emit_insn (gen_cmpstr_t (tmp0, tmp3));
418 jump = emit_jump_insn (gen_branch_true (L_end_loop_long));
419 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
421 emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
422 jump = emit_jump_insn (gen_branch_false (L_end_loop_long));
423 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
425 if (TARGET_SH2)
426 emit_insn (gen_dect (lenw, lenw));
427 else
429 emit_insn (gen_addsi3 (lenw, lenw, GEN_INT (-1)));
430 emit_insn (gen_tstsi_t (lenw, lenw));
433 jump = emit_jump_insn (gen_branch_false (L_loop_long));
434 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
436 int sbytes = bytes % 4;
438 /* end loop. Reached max iterations. */
439 if (sbytes == 0)
441 emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
442 jump = emit_jump_insn (gen_jump_compact (L_return));
443 emit_barrier_after (jump);
445 else
447 /* Remaining bytes to check. */
449 addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
450 addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
452 while (sbytes--)
454 emit_insn (gen_extendqisi2 (tmp1, addr1));
455 emit_insn (gen_extendqisi2 (tmp2, addr2));
457 emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
458 jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
459 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
461 emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
462 if (flag_delayed_branch)
463 emit_insn (gen_zero_extendqisi2 (tmp2,
464 gen_lowpart (QImode,
465 tmp2)));
466 jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
467 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
469 addr1 = adjust_address (addr1, QImode,
470 GET_MODE_SIZE (QImode));
471 addr2 = adjust_address (addr2, QImode,
472 GET_MODE_SIZE (QImode));
475 jump = emit_jump_insn (gen_jump_compact( L_end_loop_byte));
476 emit_barrier_after (jump);
479 emit_label (L_end_loop_long);
481 /* Found last word. Restart it byte per byte. */
483 emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr,
484 -GET_MODE_SIZE (SImode)));
485 emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr,
486 -GET_MODE_SIZE (SImode)));
488 /* fall thru. */
491 addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
492 addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
494 while (bytes--)
496 emit_insn (gen_extendqisi2 (tmp1, addr1));
497 emit_insn (gen_extendqisi2 (tmp2, addr2));
499 emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
500 jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
501 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
503 emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
504 if (flag_delayed_branch)
505 emit_insn (gen_zero_extendqisi2 (tmp2,
506 gen_lowpart (QImode, tmp2)));
507 jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
508 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
510 addr1 = adjust_address (addr1, QImode, GET_MODE_SIZE (QImode));
511 addr2 = adjust_address (addr2, QImode, GET_MODE_SIZE (QImode));
514 jump = emit_jump_insn (gen_jump_compact( L_end_loop_byte));
515 emit_barrier_after (jump);
517 else
519 emit_insn (gen_cmpeqsi_t (len, const0_rtx));
520 emit_move_insn (operands[0], const0_rtx);
521 jump = emit_jump_insn (gen_branch_true (L_return));
522 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
525 addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
526 addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
528 emit_label (L_loop_byte);
530 emit_insn (gen_extendqisi2 (tmp2, addr2));
531 emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 1));
533 emit_insn (gen_extendqisi2 (tmp1, addr1));
534 emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 1));
536 emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
537 jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
538 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
540 emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
541 if (flag_delayed_branch)
542 emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
543 jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
544 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
546 if (TARGET_SH2)
547 emit_insn (gen_dect (len, len));
548 else
550 emit_insn (gen_addsi3 (len, len, GEN_INT (-1)));
551 emit_insn (gen_tstsi_t (len, len));
554 jump = emit_jump_insn (gen_branch_false (L_loop_byte));
555 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
556 /* end byte loop. */
558 emit_label (L_end_loop_byte);
560 if (! flag_delayed_branch)
561 emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
562 emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1)));
564 emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
566 emit_label (L_return);
568 return true;
571 /* Emit code to perform a strlen.
573 OPERANDS[0] is the destination.
574 OPERANDS[1] is the string.
575 OPERANDS[2] is the char to search.
576 OPERANDS[3] is the alignment. */
577 bool
578 sh_expand_strlen (rtx *operands)
580 rtx addr1 = operands[1];
581 rtx current_addr = copy_addr_to_reg (XEXP (addr1, 0));
582 rtx start_addr = gen_reg_rtx (Pmode);
583 rtx tmp0 = gen_reg_rtx (SImode);
584 rtx tmp1 = gen_reg_rtx (SImode);
585 rtx_code_label *L_return = gen_label_rtx ();
586 rtx_code_label *L_loop_byte = gen_label_rtx ();
588 rtx jump;
589 rtx_code_label *L_loop_long = gen_label_rtx ();
590 rtx_code_label *L_end_loop_long = gen_label_rtx ();
592 int align = INTVAL (operands[3]);
594 emit_move_insn (operands[0], GEN_INT (-1));
596 /* remember start of string. */
597 emit_move_insn (start_addr, current_addr);
599 if (align < 4)
601 emit_insn (gen_tstsi_t (current_addr, GEN_INT (3)));
602 jump = emit_jump_insn (gen_branch_false (L_loop_byte));
603 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
606 emit_move_insn (tmp0, operands[2]);
608 addr1 = adjust_automodify_address (addr1, SImode, current_addr, 0);
610 /* start long loop. */
611 emit_label (L_loop_long);
613 /* tmp1 is aligned, OK to load. */
614 emit_move_insn (tmp1, addr1);
615 emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 4));
617 /* Is there a 0 byte ? */
618 emit_insn (gen_cmpstr_t (tmp0, tmp1));
620 jump = emit_jump_insn (gen_branch_false (L_loop_long));
621 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
622 /* end loop. */
624 emit_label (L_end_loop_long);
626 emit_move_insn (current_addr, plus_constant (Pmode, current_addr, -4));
628 addr1 = adjust_address (addr1, QImode, 0);
630 /* unroll remaining bytes. */
631 for (int i = 0; i < 4; ++i)
633 emit_insn (gen_extendqisi2 (tmp1, addr1));
634 emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 1));
635 emit_insn (gen_cmpeqsi_t (tmp1, const0_rtx));
636 jump = emit_jump_insn (gen_branch_true (L_return));
637 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
640 emit_barrier_after (jump);
642 /* start byte loop. */
643 emit_label (L_loop_byte);
645 emit_insn (gen_extendqisi2 (tmp1, addr1));
646 emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 1));
648 emit_insn (gen_cmpeqsi_t (tmp1, const0_rtx));
649 jump = emit_jump_insn (gen_branch_false (L_loop_byte));
650 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
652 /* end loop. */
654 emit_label (L_return);
656 emit_insn (gen_addsi3 (start_addr, start_addr, GEN_INT (1)));
657 emit_insn (gen_subsi3 (operands[0], current_addr, start_addr));
659 return true;
662 /* Emit code to perform a memset.
664 OPERANDS[0] is the destination.
665 OPERANDS[1] is the size;
666 OPERANDS[2] is the char to search.
667 OPERANDS[3] is the alignment. */
668 void
669 sh_expand_setmem (rtx *operands)
671 rtx_code_label *L_loop_byte = gen_label_rtx ();
672 rtx_code_label *L_loop_word = gen_label_rtx ();
673 rtx_code_label *L_return = gen_label_rtx ();
674 rtx jump;
675 rtx dest = copy_rtx (operands[0]);
676 rtx dest_addr = copy_addr_to_reg (XEXP (dest, 0));
677 rtx val = force_reg (SImode, operands[2]);
678 int align = INTVAL (operands[3]);
679 rtx len = force_reg (SImode, operands[1]);
681 if (! CONST_INT_P (operands[1]))
682 return;
684 int count = INTVAL (operands[1]);
686 if (CONST_INT_P (operands[2])
687 && (INTVAL (operands[2]) == 0 || INTVAL (operands[2]) == -1) && count > 8)
689 rtx lenw = gen_reg_rtx (SImode);
691 if (align < 4)
693 emit_insn (gen_tstsi_t (dest_addr, GEN_INT (3)));
694 jump = emit_jump_insn (gen_branch_false (L_loop_byte));
695 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
698 /* word count. Do we have iterations ? */
699 emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2)));
701 dest = adjust_automodify_address (dest, SImode, dest_addr, 0);
703 /* start loop. */
704 emit_label (L_loop_word);
706 if (TARGET_SH2)
707 emit_insn (gen_dect (lenw, lenw));
708 else
710 emit_insn (gen_addsi3 (lenw, lenw, GEN_INT (-1)));
711 emit_insn (gen_tstsi_t (lenw, lenw));
714 emit_move_insn (dest, val);
715 emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr,
716 GET_MODE_SIZE (SImode)));
719 jump = emit_jump_insn (gen_branch_false (L_loop_word));
720 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
721 count = count % 4;
723 dest = adjust_address (dest, QImode, 0);
725 val = gen_lowpart (QImode, val);
727 while (count--)
729 emit_move_insn (dest, val);
730 emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr,
731 GET_MODE_SIZE (QImode)));
734 jump = emit_jump_insn (gen_jump_compact (L_return));
735 emit_barrier_after (jump);
738 dest = adjust_automodify_address (dest, QImode, dest_addr, 0);
740 /* start loop. */
741 emit_label (L_loop_byte);
743 if (TARGET_SH2)
744 emit_insn (gen_dect (len, len));
745 else
747 emit_insn (gen_addsi3 (len, len, GEN_INT (-1)));
748 emit_insn (gen_tstsi_t (len, len));
751 val = gen_lowpart (QImode, val);
752 emit_move_insn (dest, val);
753 emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr,
754 GET_MODE_SIZE (QImode)));
756 jump = emit_jump_insn (gen_branch_false (L_loop_byte));
757 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
759 emit_label (L_return);