1 /* Helper routines for memory move and comparison insns.
2 Copyright (C) 2013 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
29 #include "basic-block.h"
31 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
33 force_into (rtx value
, rtx target
)
35 value
= force_operand (value
, target
);
36 if (! rtx_equal_p (value
, target
))
37 emit_insn (gen_move_insn (target
, value
));
40 /* Emit code to perform a block move. Choose the best method.
42 OPERANDS[0] is the destination.
43 OPERANDS[1] is the source.
44 OPERANDS[2] is the size.
45 OPERANDS[3] is the alignment safe to use. */
47 expand_block_move (rtx
*operands
)
49 int align
= INTVAL (operands
[3]);
50 int constp
= (CONST_INT_P (operands
[2]));
51 int bytes
= (constp
? INTVAL (operands
[2]) : 0);
56 /* If we could use mov.l to move words and dest is word-aligned, we
57 can use movua.l for loads and still generate a relatively short
58 and efficient sequence. */
59 if (TARGET_SH4A_ARCH
&& align
< 4
60 && MEM_ALIGN (operands
[0]) >= 32
61 && can_move_by_pieces (bytes
, 32))
63 rtx dest
= copy_rtx (operands
[0]);
64 rtx src
= copy_rtx (operands
[1]);
65 /* We could use different pseudos for each copied word, but
66 since movua can only load into r0, it's kind of
68 rtx temp
= gen_reg_rtx (SImode
);
69 rtx src_addr
= copy_addr_to_reg (XEXP (src
, 0));
72 while (copied
+ 4 <= bytes
)
74 rtx to
= adjust_address (dest
, SImode
, copied
);
75 rtx from
= adjust_automodify_address (src
, BLKmode
,
78 set_mem_size (from
, 4);
79 emit_insn (gen_movua (temp
, from
));
80 emit_move_insn (src_addr
, plus_constant (Pmode
, src_addr
, 4));
81 emit_move_insn (to
, temp
);
86 move_by_pieces (adjust_address (dest
, BLKmode
, copied
),
87 adjust_automodify_address (src
, BLKmode
,
89 bytes
- copied
, align
, 0);
94 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
95 alignment, or if it isn't a multiple of 4 bytes, then fail. */
96 if (align
< 4 || (bytes
% 4 != 0))
103 else if (bytes
== 12)
105 rtx func_addr_rtx
= gen_reg_rtx (Pmode
);
106 rtx r4
= gen_rtx_REG (SImode
, 4);
107 rtx r5
= gen_rtx_REG (SImode
, 5);
109 function_symbol (func_addr_rtx
, "__movmemSI12_i4", SFUNC_STATIC
);
110 force_into (XEXP (operands
[0], 0), r4
);
111 force_into (XEXP (operands
[1], 0), r5
);
112 emit_insn (gen_block_move_real_i4 (func_addr_rtx
));
115 else if (! optimize_size
)
117 const char *entry_name
;
118 rtx func_addr_rtx
= gen_reg_rtx (Pmode
);
120 rtx r4
= gen_rtx_REG (SImode
, 4);
121 rtx r5
= gen_rtx_REG (SImode
, 5);
122 rtx r6
= gen_rtx_REG (SImode
, 6);
124 entry_name
= (bytes
& 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
125 function_symbol (func_addr_rtx
, entry_name
, SFUNC_STATIC
);
126 force_into (XEXP (operands
[0], 0), r4
);
127 force_into (XEXP (operands
[1], 0), r5
);
130 emit_insn (gen_move_insn (r6
, GEN_INT (dwords
- 1)));
131 emit_insn (gen_block_lump_real_i4 (func_addr_rtx
));
140 rtx func_addr_rtx
= gen_reg_rtx (Pmode
);
141 rtx r4
= gen_rtx_REG (SImode
, 4);
142 rtx r5
= gen_rtx_REG (SImode
, 5);
144 sprintf (entry
, "__movmemSI%d", bytes
);
145 function_symbol (func_addr_rtx
, entry
, SFUNC_STATIC
);
146 force_into (XEXP (operands
[0], 0), r4
);
147 force_into (XEXP (operands
[1], 0), r5
);
148 emit_insn (gen_block_move_real (func_addr_rtx
));
152 /* This is the same number of bytes as a memcpy call, but to a different
153 less common function name, so this will occasionally use more space. */
156 rtx func_addr_rtx
= gen_reg_rtx (Pmode
);
157 int final_switch
, while_loop
;
158 rtx r4
= gen_rtx_REG (SImode
, 4);
159 rtx r5
= gen_rtx_REG (SImode
, 5);
160 rtx r6
= gen_rtx_REG (SImode
, 6);
162 function_symbol (func_addr_rtx
, "__movmem", SFUNC_STATIC
);
163 force_into (XEXP (operands
[0], 0), r4
);
164 force_into (XEXP (operands
[1], 0), r5
);
166 /* r6 controls the size of the move. 16 is decremented from it
167 for each 64 bytes moved. Then the negative bit left over is used
168 as an index into a list of move instructions. e.g., a 72 byte move
169 would be set up with size(r6) = 14, for one iteration through the
170 big while loop, and a switch of -2 for the last part. */
172 final_switch
= 16 - ((bytes
/ 4) % 16);
173 while_loop
= ((bytes
/ 4) / 16 - 1) * 16;
174 emit_insn (gen_move_insn (r6
, GEN_INT (while_loop
+ final_switch
)));
175 emit_insn (gen_block_lump_real (func_addr_rtx
));
182 /* Emit code to perform a strcmp.
184 OPERANDS[0] is the destination.
185 OPERANDS[1] is the first string.
186 OPERANDS[2] is the second string.
187 OPERANDS[3] is the align. */
189 sh_expand_cmpstr (rtx
*operands
)
191 rtx s1
= copy_rtx (operands
[1]);
192 rtx s2
= copy_rtx (operands
[2]);
193 rtx s1_addr
= copy_addr_to_reg (XEXP (s1
, 0));
194 rtx s2_addr
= copy_addr_to_reg (XEXP (s2
, 0));
195 rtx tmp0
= gen_reg_rtx (SImode
);
196 rtx tmp1
= gen_reg_rtx (SImode
);
197 rtx tmp2
= gen_reg_rtx (SImode
);
198 rtx tmp3
= gen_reg_rtx (SImode
);
200 rtx L_return
= gen_label_rtx ();
201 rtx L_loop_byte
= gen_label_rtx ();
202 rtx L_end_loop_byte
= gen_label_rtx ();
203 rtx L_loop_long
= gen_label_rtx ();
204 rtx L_end_loop_long
= gen_label_rtx ();
206 rtx jump
, addr1
, addr2
;
207 int prob_unlikely
= REG_BR_PROB_BASE
/ 10;
208 int prob_likely
= REG_BR_PROB_BASE
/ 4;
210 emit_insn (gen_iorsi3 (tmp1
, s1_addr
, s2_addr
));
211 emit_move_insn (tmp0
, GEN_INT (3));
213 emit_insn (gen_tstsi_t (tmp0
, tmp1
));
215 emit_move_insn (tmp0
, const0_rtx
);
217 jump
= emit_jump_insn (gen_branch_false (L_loop_byte
));
218 add_int_reg_note (jump
, REG_BR_PROB
, prob_likely
);
220 addr1
= adjust_automodify_address (s1
, SImode
, s1_addr
, 0);
221 addr2
= adjust_automodify_address (s2
, SImode
, s2_addr
, 0);
223 /* tmp2 is aligned, OK to load. */
224 emit_move_insn (tmp3
, addr2
);
225 emit_move_insn (s2_addr
, plus_constant (Pmode
, s2_addr
, 4));
227 /*start long loop. */
228 emit_label (L_loop_long
);
230 emit_move_insn (tmp2
, tmp3
);
232 /* tmp1 is aligned, OK to load. */
233 emit_move_insn (tmp1
, addr1
);
234 emit_move_insn (s1_addr
, plus_constant (Pmode
, s1_addr
, 4));
236 /* Is there a 0 byte ? */
237 emit_insn (gen_andsi3 (tmp3
, tmp3
, tmp1
));
239 emit_insn (gen_cmpstr_t (tmp0
, tmp3
));
240 jump
= emit_jump_insn (gen_branch_true (L_end_loop_long
));
241 add_int_reg_note (jump
, REG_BR_PROB
, prob_unlikely
);
243 emit_insn (gen_cmpeqsi_t (tmp1
, tmp2
));
245 /* tmp2 is aligned, OK to load. */
246 emit_move_insn (tmp3
, addr2
);
247 emit_move_insn (s2_addr
, plus_constant (Pmode
, s2_addr
, 4));
249 jump
= emit_jump_insn (gen_branch_true (L_loop_long
));
250 add_int_reg_note (jump
, REG_BR_PROB
, prob_likely
);
253 /* Fallthu, check if one of the word is greater. */
254 if (TARGET_LITTLE_ENDIAN
)
256 rtx low_1
= gen_lowpart (HImode
, tmp1
);
257 rtx low_2
= gen_lowpart (HImode
, tmp2
);
259 emit_insn (gen_rotlhi3_8 (low_1
, low_1
));
260 emit_insn (gen_rotlhi3_8 (low_2
, low_2
));
261 emit_insn (gen_rotlsi3_16 (tmp1
, tmp1
));
262 emit_insn (gen_rotlsi3_16 (tmp2
, tmp2
));
263 emit_insn (gen_rotlhi3_8 (low_1
, low_1
));
264 emit_insn (gen_rotlhi3_8 (low_2
, low_2
));
267 jump
= emit_jump_insn (gen_jump_compact (L_return
));
268 emit_barrier_after (jump
);
270 /* start byte loop. */
271 addr1
= adjust_automodify_address (s1
, QImode
, s1_addr
, 0);
272 addr2
= adjust_automodify_address (s2
, QImode
, s2_addr
, 0);
274 emit_label (L_end_loop_long
);
276 emit_move_insn (s1_addr
, plus_constant (Pmode
, s1_addr
, -4));
277 emit_move_insn (s2_addr
, plus_constant (Pmode
, s2_addr
, -4));
279 emit_label (L_loop_byte
);
281 emit_insn (gen_extendqisi2 (tmp2
, addr2
));
282 emit_move_insn (s2_addr
, plus_constant (Pmode
, s2_addr
, 1));
284 emit_insn (gen_extendqisi2 (tmp1
, addr1
));
285 emit_move_insn (s1_addr
, plus_constant (Pmode
, s1_addr
, 1));
287 emit_insn (gen_cmpeqsi_t (tmp2
, const0_rtx
));
288 jump
= emit_jump_insn (gen_branch_true (L_end_loop_byte
));
289 add_int_reg_note (jump
, REG_BR_PROB
, prob_unlikely
);
291 emit_insn (gen_cmpeqsi_t (tmp1
, tmp2
));
292 emit_jump_insn (gen_branch_true (L_loop_byte
));
293 add_int_reg_note (jump
, REG_BR_PROB
, prob_likely
);
296 emit_label (L_end_loop_byte
);
298 emit_insn (gen_zero_extendqisi2 (tmp2
, gen_lowpart (QImode
, tmp2
)));
299 emit_insn (gen_zero_extendqisi2 (tmp1
, gen_lowpart (QImode
, tmp1
)));
301 emit_label (L_return
);
303 emit_insn (gen_subsi3 (operands
[0], tmp1
, tmp2
));