2016-09-08 Steven G. Kargl <kargl@gcc.gnu.org>
[official-gcc.git] / gcc / config / sh / sh-mem.cc
blob6128adfde7e90123ce7530c4fc6009bd9613bc39
1 /* Helper routines for memory move and comparison insns.
2 Copyright (C) 2013-2016 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "function.h"
25 #include "basic-block.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "emit-rtl.h"
30 #include "explow.h"
31 #include "expr.h"
33 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
34 static void
35 force_into (rtx value, rtx target)
37 value = force_operand (value, target);
38 if (! rtx_equal_p (value, target))
39 emit_insn (gen_move_insn (target, value));
42 /* Emit code to perform a block move. Choose the best method.
44 OPERANDS[0] is the destination.
45 OPERANDS[1] is the source.
46 OPERANDS[2] is the size.
47 OPERANDS[3] is the alignment safe to use. */
48 bool
49 expand_block_move (rtx *operands)
51 int align = INTVAL (operands[3]);
52 int constp = (CONST_INT_P (operands[2]));
53 int bytes = (constp ? INTVAL (operands[2]) : 0);
55 if (! constp)
56 return false;
58 /* If we could use mov.l to move words and dest is word-aligned, we
59 can use movua.l for loads and still generate a relatively short
60 and efficient sequence. */
61 if (TARGET_SH4A && align < 4
62 && MEM_ALIGN (operands[0]) >= 32
63 && can_move_by_pieces (bytes, 32))
65 rtx dest = copy_rtx (operands[0]);
66 rtx src = copy_rtx (operands[1]);
67 /* We could use different pseudos for each copied word, but
68 since movua can only load into r0, it's kind of
69 pointless. */
70 rtx temp = gen_reg_rtx (SImode);
71 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
72 int copied = 0;
74 while (copied + 4 <= bytes)
76 rtx to = adjust_address (dest, SImode, copied);
77 rtx from = adjust_automodify_address (src, BLKmode,
78 src_addr, copied);
80 set_mem_size (from, 4);
81 emit_insn (gen_movua (temp, from));
82 emit_move_insn (src_addr, plus_constant (Pmode, src_addr, 4));
83 emit_move_insn (to, temp);
84 copied += 4;
87 if (copied < bytes)
88 move_by_pieces (adjust_address (dest, BLKmode, copied),
89 adjust_automodify_address (src, BLKmode,
90 src_addr, copied),
91 bytes - copied, align, 0);
93 return true;
96 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
97 alignment, or if it isn't a multiple of 4 bytes, then fail. */
98 if (align < 4 || (bytes % 4 != 0))
99 return false;
101 if (TARGET_HARD_SH4)
103 if (bytes < 12)
104 return false;
105 else if (bytes == 12)
107 rtx func_addr_rtx = gen_reg_rtx (Pmode);
108 rtx r4 = gen_rtx_REG (SImode, 4);
109 rtx r5 = gen_rtx_REG (SImode, 5);
111 rtx lab = function_symbol (func_addr_rtx, "__movmemSI12_i4",
112 SFUNC_STATIC).lab;
113 force_into (XEXP (operands[0], 0), r4);
114 force_into (XEXP (operands[1], 0), r5);
115 emit_insn (gen_block_move_real_i4 (func_addr_rtx, lab));
116 return true;
118 else if (! optimize_size)
120 rtx func_addr_rtx = gen_reg_rtx (Pmode);
121 rtx r4 = gen_rtx_REG (SImode, 4);
122 rtx r5 = gen_rtx_REG (SImode, 5);
123 rtx r6 = gen_rtx_REG (SImode, 6);
125 rtx lab = function_symbol (func_addr_rtx, bytes & 4
126 ? "__movmem_i4_odd"
127 : "__movmem_i4_even",
128 SFUNC_STATIC).lab;
129 force_into (XEXP (operands[0], 0), r4);
130 force_into (XEXP (operands[1], 0), r5);
132 int dwords = bytes >> 3;
133 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
134 emit_insn (gen_block_lump_real_i4 (func_addr_rtx, lab));
135 return true;
137 else
138 return false;
140 if (bytes < 64)
142 char entry[30];
143 rtx func_addr_rtx = gen_reg_rtx (Pmode);
144 rtx r4 = gen_rtx_REG (SImode, 4);
145 rtx r5 = gen_rtx_REG (SImode, 5);
147 sprintf (entry, "__movmemSI%d", bytes);
148 rtx lab = function_symbol (func_addr_rtx, entry, SFUNC_STATIC).lab;
149 force_into (XEXP (operands[0], 0), r4);
150 force_into (XEXP (operands[1], 0), r5);
151 emit_insn (gen_block_move_real (func_addr_rtx, lab));
152 return true;
155 /* This is the same number of bytes as a memcpy call, but to a different
156 less common function name, so this will occasionally use more space. */
157 if (! optimize_size)
159 rtx func_addr_rtx = gen_reg_rtx (Pmode);
160 int final_switch, while_loop;
161 rtx r4 = gen_rtx_REG (SImode, 4);
162 rtx r5 = gen_rtx_REG (SImode, 5);
163 rtx r6 = gen_rtx_REG (SImode, 6);
165 rtx lab = function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC).lab;
166 force_into (XEXP (operands[0], 0), r4);
167 force_into (XEXP (operands[1], 0), r5);
169 /* r6 controls the size of the move. 16 is decremented from it
170 for each 64 bytes moved. Then the negative bit left over is used
171 as an index into a list of move instructions. e.g., a 72 byte move
172 would be set up with size(r6) = 14, for one iteration through the
173 big while loop, and a switch of -2 for the last part. */
175 final_switch = 16 - ((bytes / 4) % 16);
176 while_loop = ((bytes / 4) / 16 - 1) * 16;
177 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
178 emit_insn (gen_block_lump_real (func_addr_rtx, lab));
179 return true;
182 return false;
185 static const int prob_unlikely = REG_BR_PROB_BASE / 10;
186 static const int prob_likely = REG_BR_PROB_BASE / 4;
188 /* Emit code to perform a strcmp.
190 OPERANDS[0] is the destination.
191 OPERANDS[1] is the first string.
192 OPERANDS[2] is the second string.
193 OPERANDS[3] is the known alignment. */
194 bool
195 sh_expand_cmpstr (rtx *operands)
197 rtx addr1 = operands[1];
198 rtx addr2 = operands[2];
199 rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0));
200 rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0));
201 rtx tmp0 = gen_reg_rtx (SImode);
202 rtx tmp1 = gen_reg_rtx (SImode);
203 rtx tmp2 = gen_reg_rtx (SImode);
204 rtx tmp3 = gen_reg_rtx (SImode);
206 rtx jump;
207 rtx_code_label *L_return = gen_label_rtx ();
208 rtx_code_label *L_loop_byte = gen_label_rtx ();
209 rtx_code_label *L_end_loop_byte = gen_label_rtx ();
210 rtx_code_label *L_loop_long = gen_label_rtx ();
211 rtx_code_label *L_end_loop_long = gen_label_rtx ();
213 const unsigned int addr1_alignment = MEM_ALIGN (operands[1]) / BITS_PER_UNIT;
214 const unsigned int addr2_alignment = MEM_ALIGN (operands[2]) / BITS_PER_UNIT;
216 if (addr1_alignment < 4 && addr2_alignment < 4)
218 emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr));
219 emit_insn (gen_tstsi_t (tmp1, GEN_INT (3)));
220 jump = emit_jump_insn (gen_branch_false (L_loop_byte));
221 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
223 else if (addr1_alignment < 4 && addr2_alignment >= 4)
225 emit_insn (gen_tstsi_t (s1_addr, GEN_INT (3)));
226 jump = emit_jump_insn (gen_branch_false (L_loop_byte));
227 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
229 else if (addr1_alignment >= 4 && addr2_alignment < 4)
231 emit_insn (gen_tstsi_t (s2_addr, GEN_INT (3)));
232 jump = emit_jump_insn (gen_branch_false (L_loop_byte));
233 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
236 addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0);
237 addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0);
239 /* tmp2 is aligned, OK to load. */
240 emit_move_insn (tmp3, addr2);
241 emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4));
243 /* start long loop. */
244 emit_label (L_loop_long);
246 emit_move_insn (tmp2, tmp3);
248 /* tmp1 is aligned, OK to load. */
249 emit_move_insn (tmp1, addr1);
250 emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 4));
252 /* Is there a 0 byte ? */
253 emit_insn (gen_andsi3 (tmp3, tmp3, tmp1));
255 emit_insn (gen_cmpstr_t (tmp0, tmp3));
256 jump = emit_jump_insn (gen_branch_true (L_end_loop_long));
257 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
259 emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
261 /* tmp2 is aligned, OK to load. */
262 emit_move_insn (tmp3, addr2);
263 emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4));
265 jump = emit_jump_insn (gen_branch_true (L_loop_long));
266 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
267 /* end loop. */
269 /* Fallthu, substract words. */
270 if (TARGET_LITTLE_ENDIAN)
272 rtx low_1 = gen_lowpart (HImode, tmp1);
273 rtx low_2 = gen_lowpart (HImode, tmp2);
275 emit_insn (gen_rotlhi3_8 (low_1, low_1));
276 emit_insn (gen_rotlhi3_8 (low_2, low_2));
277 emit_insn (gen_rotlsi3_16 (tmp1, tmp1));
278 emit_insn (gen_rotlsi3_16 (tmp2, tmp2));
279 emit_insn (gen_rotlhi3_8 (low_1, low_1));
280 emit_insn (gen_rotlhi3_8 (low_2, low_2));
283 jump = emit_jump_insn (gen_jump_compact (L_return));
284 emit_barrier_after (jump);
286 emit_label (L_end_loop_long);
288 emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, -4));
289 emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, -4));
291 /* start byte loop. */
292 addr1 = adjust_address (addr1, QImode, 0);
293 addr2 = adjust_address (addr2, QImode, 0);
295 emit_label (L_loop_byte);
297 emit_insn (gen_extendqisi2 (tmp2, addr2));
298 emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 1));
300 emit_insn (gen_extendqisi2 (tmp1, addr1));
301 emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 1));
303 emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
304 jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
305 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
307 emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
308 if (flag_delayed_branch)
309 emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
310 jump = emit_jump_insn (gen_branch_true (L_loop_byte));
311 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
312 /* end loop. */
314 emit_label (L_end_loop_byte);
316 if (! flag_delayed_branch)
317 emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
318 emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1)));
320 emit_label (L_return);
322 emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
324 return true;
327 /* Emit code to perform a strncmp.
329 OPERANDS[0] is the destination.
330 OPERANDS[1] is the first string.
331 OPERANDS[2] is the second string.
332 OPERANDS[3] is the length.
333 OPERANDS[4] is the known alignment. */
334 bool
335 sh_expand_cmpnstr (rtx *operands)
337 rtx addr1 = operands[1];
338 rtx addr2 = operands[2];
339 rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0));
340 rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0));
341 rtx tmp1 = gen_reg_rtx (SImode);
342 rtx tmp2 = gen_reg_rtx (SImode);
344 rtx jump;
345 rtx_code_label *L_return = gen_label_rtx ();
346 rtx_code_label *L_loop_byte = gen_label_rtx ();
347 rtx_code_label *L_end_loop_byte = gen_label_rtx ();
349 rtx len = force_reg (SImode, operands[3]);
350 int constp = CONST_INT_P (operands[3]);
352 const unsigned int addr1_alignment = MEM_ALIGN (operands[1]) / BITS_PER_UNIT;
353 const unsigned int addr2_alignment = MEM_ALIGN (operands[2]) / BITS_PER_UNIT;
355 /* Loop on a register count. */
356 if (constp)
358 rtx tmp0 = gen_reg_rtx (SImode);
359 rtx tmp3 = gen_reg_rtx (SImode);
360 rtx lenw = gen_reg_rtx (SImode);
362 rtx_code_label *L_loop_long = gen_label_rtx ();
363 rtx_code_label *L_end_loop_long = gen_label_rtx ();
365 int bytes = INTVAL (operands[3]);
366 int witers = bytes / 4;
368 if (witers > 1)
370 addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0);
371 addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0);
373 emit_move_insn (tmp0, const0_rtx);
375 if (addr1_alignment < 4 && addr2_alignment < 4)
377 emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr));
378 emit_insn (gen_tstsi_t (tmp1, GEN_INT (3)));
379 jump = emit_jump_insn (gen_branch_false (L_loop_byte));
380 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
382 else if (addr1_alignment < 4 && addr2_alignment >= 4)
384 emit_insn (gen_tstsi_t (s1_addr, GEN_INT (3)));
385 jump = emit_jump_insn (gen_branch_false (L_loop_byte));
386 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
388 else if (addr1_alignment >= 4 && addr2_alignment < 4)
390 emit_insn (gen_tstsi_t (s2_addr, GEN_INT (3)));
391 jump = emit_jump_insn (gen_branch_false (L_loop_byte));
392 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
395 /* word count. Do we have iterations ? */
396 emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2)));
398 /* start long loop. */
399 emit_label (L_loop_long);
401 /* tmp2 is aligned, OK to load. */
402 emit_move_insn (tmp2, addr2);
403 emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr,
404 GET_MODE_SIZE (SImode)));
406 /* tmp1 is aligned, OK to load. */
407 emit_move_insn (tmp1, addr1);
408 emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr,
409 GET_MODE_SIZE (SImode)));
411 /* Is there a 0 byte ? */
412 emit_insn (gen_andsi3 (tmp3, tmp2, tmp1));
414 emit_insn (gen_cmpstr_t (tmp0, tmp3));
415 jump = emit_jump_insn (gen_branch_true (L_end_loop_long));
416 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
418 emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
419 jump = emit_jump_insn (gen_branch_false (L_end_loop_long));
420 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
422 if (TARGET_SH2)
423 emit_insn (gen_dect (lenw, lenw));
424 else
426 emit_insn (gen_addsi3 (lenw, lenw, GEN_INT (-1)));
427 emit_insn (gen_tstsi_t (lenw, lenw));
430 jump = emit_jump_insn (gen_branch_false (L_loop_long));
431 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
433 int sbytes = bytes % 4;
435 /* end loop. Reached max iterations. */
436 if (sbytes == 0)
438 emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
439 jump = emit_jump_insn (gen_jump_compact (L_return));
440 emit_barrier_after (jump);
442 else
444 /* Remaining bytes to check. */
446 addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
447 addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
449 while (sbytes--)
451 emit_insn (gen_extendqisi2 (tmp1, addr1));
452 emit_insn (gen_extendqisi2 (tmp2, addr2));
454 emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
455 jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
456 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
458 emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
459 if (flag_delayed_branch)
460 emit_insn (gen_zero_extendqisi2 (tmp2,
461 gen_lowpart (QImode,
462 tmp2)));
463 jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
464 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
466 addr1 = adjust_address (addr1, QImode,
467 GET_MODE_SIZE (QImode));
468 addr2 = adjust_address (addr2, QImode,
469 GET_MODE_SIZE (QImode));
472 jump = emit_jump_insn (gen_jump_compact( L_end_loop_byte));
473 emit_barrier_after (jump);
476 emit_label (L_end_loop_long);
478 /* Found last word. Restart it byte per byte. */
480 emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr,
481 -GET_MODE_SIZE (SImode)));
482 emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr,
483 -GET_MODE_SIZE (SImode)));
485 /* fall thru. */
488 addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
489 addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
491 while (bytes--)
493 emit_insn (gen_extendqisi2 (tmp1, addr1));
494 emit_insn (gen_extendqisi2 (tmp2, addr2));
496 emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
497 jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
498 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
500 emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
501 if (flag_delayed_branch)
502 emit_insn (gen_zero_extendqisi2 (tmp2,
503 gen_lowpart (QImode, tmp2)));
504 jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
505 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
507 addr1 = adjust_address (addr1, QImode, GET_MODE_SIZE (QImode));
508 addr2 = adjust_address (addr2, QImode, GET_MODE_SIZE (QImode));
511 jump = emit_jump_insn (gen_jump_compact( L_end_loop_byte));
512 emit_barrier_after (jump);
514 else
516 emit_insn (gen_cmpeqsi_t (len, const0_rtx));
517 emit_move_insn (operands[0], const0_rtx);
518 jump = emit_jump_insn (gen_branch_true (L_return));
519 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
522 addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
523 addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
525 emit_label (L_loop_byte);
527 emit_insn (gen_extendqisi2 (tmp2, addr2));
528 emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 1));
530 emit_insn (gen_extendqisi2 (tmp1, addr1));
531 emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 1));
533 emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
534 jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
535 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
537 emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
538 if (flag_delayed_branch)
539 emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
540 jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
541 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
543 if (TARGET_SH2)
544 emit_insn (gen_dect (len, len));
545 else
547 emit_insn (gen_addsi3 (len, len, GEN_INT (-1)));
548 emit_insn (gen_tstsi_t (len, len));
551 jump = emit_jump_insn (gen_branch_false (L_loop_byte));
552 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
553 /* end byte loop. */
555 emit_label (L_end_loop_byte);
557 if (! flag_delayed_branch)
558 emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
559 emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1)));
561 emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
563 emit_label (L_return);
565 return true;
568 /* Emit code to perform a strlen.
570 OPERANDS[0] is the destination.
571 OPERANDS[1] is the string.
572 OPERANDS[2] is the char to search.
573 OPERANDS[3] is the alignment. */
574 bool
575 sh_expand_strlen (rtx *operands)
577 rtx addr1 = operands[1];
578 rtx current_addr = copy_addr_to_reg (XEXP (addr1, 0));
579 rtx start_addr = gen_reg_rtx (Pmode);
580 rtx tmp0 = gen_reg_rtx (SImode);
581 rtx tmp1 = gen_reg_rtx (SImode);
582 rtx_code_label *L_return = gen_label_rtx ();
583 rtx_code_label *L_loop_byte = gen_label_rtx ();
585 rtx jump;
586 rtx_code_label *L_loop_long = gen_label_rtx ();
587 rtx_code_label *L_end_loop_long = gen_label_rtx ();
589 int align = INTVAL (operands[3]);
591 emit_move_insn (operands[0], GEN_INT (-1));
593 /* remember start of string. */
594 emit_move_insn (start_addr, current_addr);
596 if (align < 4)
598 emit_insn (gen_tstsi_t (current_addr, GEN_INT (3)));
599 jump = emit_jump_insn (gen_branch_false (L_loop_byte));
600 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
603 emit_move_insn (tmp0, operands[2]);
605 addr1 = adjust_automodify_address (addr1, SImode, current_addr, 0);
607 /* start long loop. */
608 emit_label (L_loop_long);
610 /* tmp1 is aligned, OK to load. */
611 emit_move_insn (tmp1, addr1);
612 emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 4));
614 /* Is there a 0 byte ? */
615 emit_insn (gen_cmpstr_t (tmp0, tmp1));
617 jump = emit_jump_insn (gen_branch_false (L_loop_long));
618 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
619 /* end loop. */
621 emit_label (L_end_loop_long);
623 emit_move_insn (current_addr, plus_constant (Pmode, current_addr, -4));
625 addr1 = adjust_address (addr1, QImode, 0);
627 /* unroll remaining bytes. */
628 for (int i = 0; i < 4; ++i)
630 emit_insn (gen_extendqisi2 (tmp1, addr1));
631 emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 1));
632 emit_insn (gen_cmpeqsi_t (tmp1, const0_rtx));
633 jump = emit_jump_insn (gen_branch_true (L_return));
634 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
637 emit_barrier_after (jump);
639 /* start byte loop. */
640 emit_label (L_loop_byte);
642 emit_insn (gen_extendqisi2 (tmp1, addr1));
643 emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 1));
645 emit_insn (gen_cmpeqsi_t (tmp1, const0_rtx));
646 jump = emit_jump_insn (gen_branch_false (L_loop_byte));
647 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
649 /* end loop. */
651 emit_label (L_return);
653 emit_insn (gen_addsi3 (start_addr, start_addr, GEN_INT (1)));
654 emit_insn (gen_subsi3 (operands[0], current_addr, start_addr));
656 return true;
659 /* Emit code to perform a memset.
661 OPERANDS[0] is the destination.
662 OPERANDS[1] is the size;
663 OPERANDS[2] is the char to search.
664 OPERANDS[3] is the alignment. */
665 void
666 sh_expand_setmem (rtx *operands)
668 rtx_code_label *L_loop_byte = gen_label_rtx ();
669 rtx_code_label *L_loop_word = gen_label_rtx ();
670 rtx_code_label *L_return = gen_label_rtx ();
671 rtx jump;
672 rtx dest = copy_rtx (operands[0]);
673 rtx dest_addr = copy_addr_to_reg (XEXP (dest, 0));
674 rtx val = force_reg (SImode, operands[2]);
675 int align = INTVAL (operands[3]);
676 rtx len = force_reg (SImode, operands[1]);
678 if (! CONST_INT_P (operands[1]))
679 return;
681 int count = INTVAL (operands[1]);
683 if (CONST_INT_P (operands[2])
684 && (INTVAL (operands[2]) == 0 || INTVAL (operands[2]) == -1) && count > 8)
686 rtx lenw = gen_reg_rtx (SImode);
688 if (align < 4)
690 emit_insn (gen_tstsi_t (dest_addr, GEN_INT (3)));
691 jump = emit_jump_insn (gen_branch_false (L_loop_byte));
692 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
695 /* word count. Do we have iterations ? */
696 emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2)));
698 dest = adjust_automodify_address (dest, SImode, dest_addr, 0);
700 /* start loop. */
701 emit_label (L_loop_word);
703 if (TARGET_SH2)
704 emit_insn (gen_dect (lenw, lenw));
705 else
707 emit_insn (gen_addsi3 (lenw, lenw, GEN_INT (-1)));
708 emit_insn (gen_tstsi_t (lenw, lenw));
711 emit_move_insn (dest, val);
712 emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr,
713 GET_MODE_SIZE (SImode)));
716 jump = emit_jump_insn (gen_branch_false (L_loop_word));
717 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
718 count = count % 4;
720 dest = adjust_address (dest, QImode, 0);
722 val = gen_lowpart (QImode, val);
724 while (count--)
726 emit_move_insn (dest, val);
727 emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr,
728 GET_MODE_SIZE (QImode)));
731 jump = emit_jump_insn (gen_jump_compact (L_return));
732 emit_barrier_after (jump);
735 dest = adjust_automodify_address (dest, QImode, dest_addr, 0);
737 /* start loop. */
738 emit_label (L_loop_byte);
740 if (TARGET_SH2)
741 emit_insn (gen_dect (len, len));
742 else
744 emit_insn (gen_addsi3 (len, len, GEN_INT (-1)));
745 emit_insn (gen_tstsi_t (len, len));
748 val = gen_lowpart (QImode, val);
749 emit_move_insn (dest, val);
750 emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr,
751 GET_MODE_SIZE (QImode)));
753 jump = emit_jump_insn (gen_branch_false (L_loop_byte));
754 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
756 emit_label (L_return);