2014-03-25 Richard Biener <rguenther@suse.de>
[official-gcc.git] / gcc / config / sh / sh-mem.cc
blob45af23acb48d124c1fad261ca6d09df40c3edead
1 /* Helper routines for memory move and comparison insns.
2 Copyright (C) 2013-2014 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "machmode.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "expr.h"
28 #include "tm_p.h"
29 #include "basic-block.h"
31 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
32 static void
33 force_into (rtx value, rtx target)
35 value = force_operand (value, target);
36 if (! rtx_equal_p (value, target))
37 emit_insn (gen_move_insn (target, value));
40 /* Emit code to perform a block move. Choose the best method.
42 OPERANDS[0] is the destination.
43 OPERANDS[1] is the source.
44 OPERANDS[2] is the size.
45 OPERANDS[3] is the alignment safe to use. */
46 bool
47 expand_block_move (rtx *operands)
49 int align = INTVAL (operands[3]);
50 int constp = (CONST_INT_P (operands[2]));
51 int bytes = (constp ? INTVAL (operands[2]) : 0);
53 if (! constp)
54 return false;
56 /* If we could use mov.l to move words and dest is word-aligned, we
57 can use movua.l for loads and still generate a relatively short
58 and efficient sequence. */
59 if (TARGET_SH4A_ARCH && align < 4
60 && MEM_ALIGN (operands[0]) >= 32
61 && can_move_by_pieces (bytes, 32))
63 rtx dest = copy_rtx (operands[0]);
64 rtx src = copy_rtx (operands[1]);
65 /* We could use different pseudos for each copied word, but
66 since movua can only load into r0, it's kind of
67 pointless. */
68 rtx temp = gen_reg_rtx (SImode);
69 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
70 int copied = 0;
72 while (copied + 4 <= bytes)
74 rtx to = adjust_address (dest, SImode, copied);
75 rtx from = adjust_automodify_address (src, BLKmode,
76 src_addr, copied);
78 set_mem_size (from, 4);
79 emit_insn (gen_movua (temp, from));
80 emit_move_insn (src_addr, plus_constant (Pmode, src_addr, 4));
81 emit_move_insn (to, temp);
82 copied += 4;
85 if (copied < bytes)
86 move_by_pieces (adjust_address (dest, BLKmode, copied),
87 adjust_automodify_address (src, BLKmode,
88 src_addr, copied),
89 bytes - copied, align, 0);
91 return true;
94 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
95 alignment, or if it isn't a multiple of 4 bytes, then fail. */
96 if (align < 4 || (bytes % 4 != 0))
97 return false;
99 if (TARGET_HARD_SH4)
101 if (bytes < 12)
102 return false;
103 else if (bytes == 12)
105 rtx func_addr_rtx = gen_reg_rtx (Pmode);
106 rtx r4 = gen_rtx_REG (SImode, 4);
107 rtx r5 = gen_rtx_REG (SImode, 5);
109 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
110 force_into (XEXP (operands[0], 0), r4);
111 force_into (XEXP (operands[1], 0), r5);
112 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
113 return true;
115 else if (! optimize_size)
117 const char *entry_name;
118 rtx func_addr_rtx = gen_reg_rtx (Pmode);
119 int dwords;
120 rtx r4 = gen_rtx_REG (SImode, 4);
121 rtx r5 = gen_rtx_REG (SImode, 5);
122 rtx r6 = gen_rtx_REG (SImode, 6);
124 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
125 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
126 force_into (XEXP (operands[0], 0), r4);
127 force_into (XEXP (operands[1], 0), r5);
129 dwords = bytes >> 3;
130 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
131 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
132 return true;
134 else
135 return false;
137 if (bytes < 64)
139 char entry[30];
140 rtx func_addr_rtx = gen_reg_rtx (Pmode);
141 rtx r4 = gen_rtx_REG (SImode, 4);
142 rtx r5 = gen_rtx_REG (SImode, 5);
144 sprintf (entry, "__movmemSI%d", bytes);
145 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
146 force_into (XEXP (operands[0], 0), r4);
147 force_into (XEXP (operands[1], 0), r5);
148 emit_insn (gen_block_move_real (func_addr_rtx));
149 return true;
152 /* This is the same number of bytes as a memcpy call, but to a different
153 less common function name, so this will occasionally use more space. */
154 if (! optimize_size)
156 rtx func_addr_rtx = gen_reg_rtx (Pmode);
157 int final_switch, while_loop;
158 rtx r4 = gen_rtx_REG (SImode, 4);
159 rtx r5 = gen_rtx_REG (SImode, 5);
160 rtx r6 = gen_rtx_REG (SImode, 6);
162 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
163 force_into (XEXP (operands[0], 0), r4);
164 force_into (XEXP (operands[1], 0), r5);
166 /* r6 controls the size of the move. 16 is decremented from it
167 for each 64 bytes moved. Then the negative bit left over is used
168 as an index into a list of move instructions. e.g., a 72 byte move
169 would be set up with size(r6) = 14, for one iteration through the
170 big while loop, and a switch of -2 for the last part. */
172 final_switch = 16 - ((bytes / 4) % 16);
173 while_loop = ((bytes / 4) / 16 - 1) * 16;
174 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
175 emit_insn (gen_block_lump_real (func_addr_rtx));
176 return true;
179 return false;
182 static int prob_unlikely = REG_BR_PROB_BASE / 10;
183 static int prob_likely = REG_BR_PROB_BASE / 4;
185 /* Emit code to perform a strcmp.
187 OPERANDS[0] is the destination.
188 OPERANDS[1] is the first string.
189 OPERANDS[2] is the second string.
190 OPERANDS[3] is the known alignment. */
191 bool
192 sh_expand_cmpstr (rtx *operands)
194 rtx addr1 = operands[1];
195 rtx addr2 = operands[2];
196 rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0));
197 rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0));
198 rtx tmp0 = gen_reg_rtx (SImode);
199 rtx tmp1 = gen_reg_rtx (SImode);
200 rtx tmp2 = gen_reg_rtx (SImode);
201 rtx tmp3 = gen_reg_rtx (SImode);
203 rtx jump;
204 rtx L_return = gen_label_rtx ();
205 rtx L_loop_byte = gen_label_rtx ();
206 rtx L_end_loop_byte = gen_label_rtx ();
207 rtx L_loop_long = gen_label_rtx ();
208 rtx L_end_loop_long = gen_label_rtx ();
210 int align = INTVAL (operands[3]);
212 emit_move_insn (tmp0, const0_rtx);
214 if (align < 4)
216 emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr));
217 emit_insn (gen_tstsi_t (GEN_INT (3), tmp1));
218 jump = emit_jump_insn (gen_branch_false (L_loop_byte));
219 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
222 addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0);
223 addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0);
225 /* tmp2 is aligned, OK to load. */
226 emit_move_insn (tmp3, addr2);
227 emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4));
229 /*start long loop. */
230 emit_label (L_loop_long);
232 emit_move_insn (tmp2, tmp3);
234 /* tmp1 is aligned, OK to load. */
235 emit_move_insn (tmp1, addr1);
236 emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 4));
238 /* Is there a 0 byte ? */
239 emit_insn (gen_andsi3 (tmp3, tmp3, tmp1));
241 emit_insn (gen_cmpstr_t (tmp0, tmp3));
242 jump = emit_jump_insn (gen_branch_true (L_end_loop_long));
243 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
245 emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
247 /* tmp2 is aligned, OK to load. */
248 emit_move_insn (tmp3, addr2);
249 emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4));
251 jump = emit_jump_insn (gen_branch_true (L_loop_long));
252 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
253 /* end loop. */
255 /* Fallthu, substract words. */
256 if (TARGET_LITTLE_ENDIAN)
258 rtx low_1 = gen_lowpart (HImode, tmp1);
259 rtx low_2 = gen_lowpart (HImode, tmp2);
261 emit_insn (gen_rotlhi3_8 (low_1, low_1));
262 emit_insn (gen_rotlhi3_8 (low_2, low_2));
263 emit_insn (gen_rotlsi3_16 (tmp1, tmp1));
264 emit_insn (gen_rotlsi3_16 (tmp2, tmp2));
265 emit_insn (gen_rotlhi3_8 (low_1, low_1));
266 emit_insn (gen_rotlhi3_8 (low_2, low_2));
269 jump = emit_jump_insn (gen_jump_compact (L_return));
270 emit_barrier_after (jump);
272 emit_label (L_end_loop_long);
274 emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, -4));
275 emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, -4));
277 /* start byte loop. */
278 addr1 = adjust_address (addr1, QImode, 0);
279 addr2 = adjust_address (addr2, QImode, 0);
281 emit_label (L_loop_byte);
283 emit_insn (gen_extendqisi2 (tmp2, addr2));
284 emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 1));
286 emit_insn (gen_extendqisi2 (tmp1, addr1));
287 emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 1));
289 emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
290 jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
291 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
293 emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
294 if (flag_delayed_branch)
295 emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
296 jump = emit_jump_insn (gen_branch_true (L_loop_byte));
297 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
298 /* end loop. */
300 emit_label (L_end_loop_byte);
302 if (! flag_delayed_branch)
303 emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
304 emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1)));
306 emit_label (L_return);
308 emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
310 return true;
313 /* Emit code to perform a strncmp.
315 OPERANDS[0] is the destination.
316 OPERANDS[1] is the first string.
317 OPERANDS[2] is the second string.
318 OPERANDS[3] is the length.
319 OPERANDS[4] is the known alignment. */
320 bool
321 sh_expand_cmpnstr (rtx *operands)
323 rtx addr1 = operands[1];
324 rtx addr2 = operands[2];
325 rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0));
326 rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0));
327 rtx tmp1 = gen_reg_rtx (SImode);
328 rtx tmp2 = gen_reg_rtx (SImode);
330 rtx jump;
331 rtx L_return = gen_label_rtx ();
332 rtx L_loop_byte = gen_label_rtx ();
333 rtx L_end_loop_byte = gen_label_rtx ();
335 rtx len = force_reg (SImode, operands[3]);
336 int constp = CONST_INT_P (operands[3]);
338 /* Loop on a register count. */
339 if (constp)
341 rtx tmp0 = gen_reg_rtx (SImode);
342 rtx tmp3 = gen_reg_rtx (SImode);
343 rtx lenw = gen_reg_rtx (SImode);
345 rtx L_loop_long = gen_label_rtx ();
346 rtx L_end_loop_long = gen_label_rtx ();
348 int align = INTVAL (operands[4]);
349 int bytes = INTVAL (operands[3]);
350 int witers = bytes / 4;
352 if (witers > 1)
354 addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0);
355 addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0);
357 emit_move_insn (tmp0, const0_rtx);
359 if (align < 4)
361 emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr));
362 emit_insn (gen_tstsi_t (GEN_INT (3), tmp1));
363 jump = emit_jump_insn (gen_branch_false (L_loop_byte));
364 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
367 /* word count. Do we have iterations ? */
368 emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2)));
370 /*start long loop. */
371 emit_label (L_loop_long);
373 /* tmp2 is aligned, OK to load. */
374 emit_move_insn (tmp2, addr2);
375 emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr,
376 GET_MODE_SIZE (SImode)));
378 /* tmp1 is aligned, OK to load. */
379 emit_move_insn (tmp1, addr1);
380 emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr,
381 GET_MODE_SIZE (SImode)));
383 /* Is there a 0 byte ? */
384 emit_insn (gen_andsi3 (tmp3, tmp2, tmp1));
386 emit_insn (gen_cmpstr_t (tmp0, tmp3));
387 jump = emit_jump_insn (gen_branch_true (L_end_loop_long));
388 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
390 emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
391 jump = emit_jump_insn (gen_branch_false (L_end_loop_long));
392 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
394 if (TARGET_SH2)
395 emit_insn (gen_dect (lenw, lenw));
396 else
398 emit_insn (gen_addsi3 (lenw, lenw, GEN_INT (-1)));
399 emit_insn (gen_tstsi_t (lenw, lenw));
402 jump = emit_jump_insn (gen_branch_false (L_loop_long));
403 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
405 int sbytes = bytes % 4;
407 /* end loop. Reached max iterations. */
408 if (! sbytes)
410 jump = emit_jump_insn (gen_jump_compact (L_return));
411 emit_barrier_after (jump);
413 else
415 /* Remaining bytes to check. */
417 addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
418 addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
420 while (sbytes--)
422 emit_insn (gen_extendqisi2 (tmp1, addr1));
423 emit_insn (gen_extendqisi2 (tmp2, addr2));
425 emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
426 jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
427 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
429 emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
430 if (flag_delayed_branch)
431 emit_insn (gen_zero_extendqisi2 (tmp2,
432 gen_lowpart (QImode,
433 tmp2)));
434 jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
435 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
437 addr1 = adjust_address (addr1, QImode,
438 GET_MODE_SIZE (QImode));
439 addr2 = adjust_address (addr2, QImode,
440 GET_MODE_SIZE (QImode));
443 jump = emit_jump_insn (gen_jump_compact( L_end_loop_byte));
444 emit_barrier_after (jump);
447 emit_label (L_end_loop_long);
449 /* Found last word. Restart it byte per byte. */
451 emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr,
452 -GET_MODE_SIZE (SImode)));
453 emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr,
454 -GET_MODE_SIZE (SImode)));
456 /* fall thru. */
459 addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
460 addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
462 while (bytes--)
464 emit_insn (gen_extendqisi2 (tmp1, addr1));
465 emit_insn (gen_extendqisi2 (tmp2, addr2));
467 emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
468 jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
469 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
471 emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
472 if (flag_delayed_branch)
473 emit_insn (gen_zero_extendqisi2 (tmp2,
474 gen_lowpart (QImode, tmp2)));
475 jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
476 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
478 addr1 = adjust_address (addr1, QImode, GET_MODE_SIZE (QImode));
479 addr2 = adjust_address (addr2, QImode, GET_MODE_SIZE (QImode));
482 jump = emit_jump_insn (gen_jump_compact( L_end_loop_byte));
483 emit_barrier_after (jump);
486 addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
487 addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
489 emit_label (L_loop_byte);
491 emit_insn (gen_extendqisi2 (tmp2, addr2));
492 emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 1));
494 emit_insn (gen_extendqisi2 (tmp1, addr1));
495 emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 1));
497 emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
498 jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
499 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
501 emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
502 if (flag_delayed_branch)
503 emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
504 jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
505 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
507 if (TARGET_SH2)
508 emit_insn (gen_dect (len, len));
509 else
511 emit_insn (gen_addsi3 (len, len, GEN_INT (-1)));
512 emit_insn (gen_tstsi_t (len, len));
515 jump = emit_jump_insn (gen_branch_false (L_loop_byte));
516 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
517 /* end byte loop. */
519 emit_label (L_end_loop_byte);
521 if (! flag_delayed_branch)
522 emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
523 emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1)));
525 emit_label (L_return);
527 emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
529 return true;
532 /* Emit code to perform a strlen
534 OPERANDS[0] is the destination.
535 OPERANDS[1] is the string.
536 OPERANDS[2] is the char to search.
537 OPERANDS[3] is the alignment. */
538 bool
539 sh_expand_strlen (rtx *operands)
541 rtx addr1 = operands[1];
542 rtx current_addr = copy_addr_to_reg (XEXP (addr1, 0));
543 rtx start_addr = gen_reg_rtx (Pmode);
544 rtx tmp0 = gen_reg_rtx (SImode);
545 rtx tmp1 = gen_reg_rtx (SImode);
546 rtx L_return = gen_label_rtx ();
547 rtx L_loop_byte = gen_label_rtx ();
549 rtx jump;
550 rtx L_loop_long = gen_label_rtx ();
551 rtx L_end_loop_long = gen_label_rtx ();
553 int align = INTVAL (operands[3]);
555 emit_move_insn (operands[0], GEN_INT (-1));
557 /* remember start of string. */
558 emit_move_insn (start_addr, current_addr);
560 if (align < 4)
562 emit_insn (gen_tstsi_t (GEN_INT (3), current_addr));
563 jump = emit_jump_insn (gen_branch_false (L_loop_byte));
564 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
567 emit_move_insn (tmp0, operands[2]);
569 addr1 = adjust_automodify_address (addr1, SImode, current_addr, 0);
571 /*start long loop. */
572 emit_label (L_loop_long);
574 /* tmp1 is aligned, OK to load. */
575 emit_move_insn (tmp1, addr1);
576 emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 4));
578 /* Is there a 0 byte ? */
579 emit_insn (gen_cmpstr_t (tmp0, tmp1));
581 jump = emit_jump_insn (gen_branch_false (L_loop_long));
582 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
583 /* end loop. */
585 emit_label (L_end_loop_long);
587 emit_move_insn (current_addr, plus_constant (Pmode, current_addr, -4));
589 /* start byte loop. */
590 addr1 = adjust_address (addr1, QImode, 0);
592 emit_label (L_loop_byte);
594 emit_insn (gen_extendqisi2 (tmp1, addr1));
595 emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 1));
597 emit_insn (gen_cmpeqsi_t (tmp1, const0_rtx));
598 jump = emit_jump_insn (gen_branch_false (L_loop_byte));
599 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
601 /* end loop. */
603 emit_label (L_return);
605 emit_insn (gen_addsi3 (start_addr, start_addr, GEN_INT (1)));
607 emit_insn (gen_subsi3 (operands[0], current_addr, start_addr));
609 return true;