config.gcc (sh-*): Add sh-mem.o to extra_obj.
[official-gcc.git] / gcc / config / sh / sh-mem.cc
blobe6f0843ca308d74df69cc1446227be15c6699437
1 /* Helper routines for memory move and comparison insns.
2 Copyright (C) 2013 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "machmode.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "expr.h"
28 #include "tm_p.h"
29 #include "basic-block.h"
31 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
32 static void
33 force_into (rtx value, rtx target)
35 value = force_operand (value, target);
36 if (! rtx_equal_p (value, target))
37 emit_insn (gen_move_insn (target, value));
40 /* Emit code to perform a block move. Choose the best method.
42 OPERANDS[0] is the destination.
43 OPERANDS[1] is the source.
44 OPERANDS[2] is the size.
45 OPERANDS[3] is the alignment safe to use. */
46 bool
47 expand_block_move (rtx *operands)
49 int align = INTVAL (operands[3]);
50 int constp = (CONST_INT_P (operands[2]));
51 int bytes = (constp ? INTVAL (operands[2]) : 0);
53 if (! constp)
54 return false;
56 /* If we could use mov.l to move words and dest is word-aligned, we
57 can use movua.l for loads and still generate a relatively short
58 and efficient sequence. */
59 if (TARGET_SH4A_ARCH && align < 4
60 && MEM_ALIGN (operands[0]) >= 32
61 && can_move_by_pieces (bytes, 32))
63 rtx dest = copy_rtx (operands[0]);
64 rtx src = copy_rtx (operands[1]);
65 /* We could use different pseudos for each copied word, but
66 since movua can only load into r0, it's kind of
67 pointless. */
68 rtx temp = gen_reg_rtx (SImode);
69 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
70 int copied = 0;
72 while (copied + 4 <= bytes)
74 rtx to = adjust_address (dest, SImode, copied);
75 rtx from = adjust_automodify_address (src, BLKmode,
76 src_addr, copied);
78 set_mem_size (from, 4);
79 emit_insn (gen_movua (temp, from));
80 emit_move_insn (src_addr, plus_constant (Pmode, src_addr, 4));
81 emit_move_insn (to, temp);
82 copied += 4;
85 if (copied < bytes)
86 move_by_pieces (adjust_address (dest, BLKmode, copied),
87 adjust_automodify_address (src, BLKmode,
88 src_addr, copied),
89 bytes - copied, align, 0);
91 return true;
94 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
95 alignment, or if it isn't a multiple of 4 bytes, then fail. */
96 if (align < 4 || (bytes % 4 != 0))
97 return false;
99 if (TARGET_HARD_SH4)
101 if (bytes < 12)
102 return false;
103 else if (bytes == 12)
105 rtx func_addr_rtx = gen_reg_rtx (Pmode);
106 rtx r4 = gen_rtx_REG (SImode, 4);
107 rtx r5 = gen_rtx_REG (SImode, 5);
109 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
110 force_into (XEXP (operands[0], 0), r4);
111 force_into (XEXP (operands[1], 0), r5);
112 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
113 return true;
115 else if (! optimize_size)
117 const char *entry_name;
118 rtx func_addr_rtx = gen_reg_rtx (Pmode);
119 int dwords;
120 rtx r4 = gen_rtx_REG (SImode, 4);
121 rtx r5 = gen_rtx_REG (SImode, 5);
122 rtx r6 = gen_rtx_REG (SImode, 6);
124 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
125 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
126 force_into (XEXP (operands[0], 0), r4);
127 force_into (XEXP (operands[1], 0), r5);
129 dwords = bytes >> 3;
130 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
131 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
132 return true;
134 else
135 return false;
137 if (bytes < 64)
139 char entry[30];
140 rtx func_addr_rtx = gen_reg_rtx (Pmode);
141 rtx r4 = gen_rtx_REG (SImode, 4);
142 rtx r5 = gen_rtx_REG (SImode, 5);
144 sprintf (entry, "__movmemSI%d", bytes);
145 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
146 force_into (XEXP (operands[0], 0), r4);
147 force_into (XEXP (operands[1], 0), r5);
148 emit_insn (gen_block_move_real (func_addr_rtx));
149 return true;
152 /* This is the same number of bytes as a memcpy call, but to a different
153 less common function name, so this will occasionally use more space. */
154 if (! optimize_size)
156 rtx func_addr_rtx = gen_reg_rtx (Pmode);
157 int final_switch, while_loop;
158 rtx r4 = gen_rtx_REG (SImode, 4);
159 rtx r5 = gen_rtx_REG (SImode, 5);
160 rtx r6 = gen_rtx_REG (SImode, 6);
162 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
163 force_into (XEXP (operands[0], 0), r4);
164 force_into (XEXP (operands[1], 0), r5);
166 /* r6 controls the size of the move. 16 is decremented from it
167 for each 64 bytes moved. Then the negative bit left over is used
168 as an index into a list of move instructions. e.g., a 72 byte move
169 would be set up with size(r6) = 14, for one iteration through the
170 big while loop, and a switch of -2 for the last part. */
172 final_switch = 16 - ((bytes / 4) % 16);
173 while_loop = ((bytes / 4) / 16 - 1) * 16;
174 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
175 emit_insn (gen_block_lump_real (func_addr_rtx));
176 return true;
179 return false;
182 /* Emit code to perform a strcmp.
184 OPERANDS[0] is the destination.
185 OPERANDS[1] is the first string.
186 OPERANDS[2] is the second string.
187 OPERANDS[3] is the align. */
188 bool
189 sh_expand_cmpstr (rtx *operands)
191 rtx s1 = copy_rtx (operands[1]);
192 rtx s2 = copy_rtx (operands[2]);
193 rtx s1_addr = copy_addr_to_reg (XEXP (s1, 0));
194 rtx s2_addr = copy_addr_to_reg (XEXP (s2, 0));
195 rtx tmp0 = gen_reg_rtx (SImode);
196 rtx tmp1 = gen_reg_rtx (SImode);
197 rtx tmp2 = gen_reg_rtx (SImode);
198 rtx tmp3 = gen_reg_rtx (SImode);
200 rtx L_return = gen_label_rtx ();
201 rtx L_loop_byte = gen_label_rtx ();
202 rtx L_end_loop_byte = gen_label_rtx ();
203 rtx L_loop_long = gen_label_rtx ();
204 rtx L_end_loop_long = gen_label_rtx ();
206 rtx jump, addr1, addr2;
207 int prob_unlikely = REG_BR_PROB_BASE / 10;
208 int prob_likely = REG_BR_PROB_BASE / 4;
210 emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr));
211 emit_move_insn (tmp0, GEN_INT (3));
213 emit_insn (gen_tstsi_t (tmp0, tmp1));
215 emit_move_insn (tmp0, const0_rtx);
217 jump = emit_jump_insn (gen_branch_false (L_loop_byte));
218 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
220 addr1 = adjust_automodify_address (s1, SImode, s1_addr, 0);
221 addr2 = adjust_automodify_address (s2, SImode, s2_addr, 0);
223 /* tmp2 is aligned, OK to load. */
224 emit_move_insn (tmp3, addr2);
225 emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4));
227 /*start long loop. */
228 emit_label (L_loop_long);
230 emit_move_insn (tmp2, tmp3);
232 /* tmp1 is aligned, OK to load. */
233 emit_move_insn (tmp1, addr1);
234 emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 4));
236 /* Is there a 0 byte ? */
237 emit_insn (gen_andsi3 (tmp3, tmp3, tmp1));
239 emit_insn (gen_cmpstr_t (tmp0, tmp3));
240 jump = emit_jump_insn (gen_branch_true (L_end_loop_long));
241 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
243 emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
245 /* tmp2 is aligned, OK to load. */
246 emit_move_insn (tmp3, addr2);
247 emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4));
249 jump = emit_jump_insn (gen_branch_true (L_loop_long));
250 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
251 /* end loop. */
253 /* Fallthu, check if one of the word is greater. */
254 if (TARGET_LITTLE_ENDIAN)
256 rtx low_1 = gen_lowpart (HImode, tmp1);
257 rtx low_2 = gen_lowpart (HImode, tmp2);
259 emit_insn (gen_rotlhi3_8 (low_1, low_1));
260 emit_insn (gen_rotlhi3_8 (low_2, low_2));
261 emit_insn (gen_rotlsi3_16 (tmp1, tmp1));
262 emit_insn (gen_rotlsi3_16 (tmp2, tmp2));
263 emit_insn (gen_rotlhi3_8 (low_1, low_1));
264 emit_insn (gen_rotlhi3_8 (low_2, low_2));
267 jump = emit_jump_insn (gen_jump_compact (L_return));
268 emit_barrier_after (jump);
270 /* start byte loop. */
271 addr1 = adjust_automodify_address (s1, QImode, s1_addr, 0);
272 addr2 = adjust_automodify_address (s2, QImode, s2_addr, 0);
274 emit_label (L_end_loop_long);
276 emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, -4));
277 emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, -4));
279 emit_label (L_loop_byte);
281 emit_insn (gen_extendqisi2 (tmp2, addr2));
282 emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 1));
284 emit_insn (gen_extendqisi2 (tmp1, addr1));
285 emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 1));
287 emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
288 jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
289 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
291 emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
292 emit_jump_insn (gen_branch_true (L_loop_byte));
293 add_int_reg_note (jump, REG_BR_PROB, prob_likely);
294 /* end loop. */
296 emit_label (L_end_loop_byte);
298 emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
299 emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1)));
301 emit_label (L_return);
303 emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
305 return true;