Fix 31018 -- move TARGET_xxx in i386.md to tuning options
[official-gcc.git] / gcc / config / i386 / i386.c
blobd4101cb4acdda1865e1adffbaa51b1e1efd7c3ce
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
51 #include "dwarf2.h"
52 #include "tm-constrs.h"
53 #include "params.h"
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
57 #endif
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
65 : 4)
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
71 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
73 static const
74 struct processor_costs size_cost = { /* costs for tuning for size */
75 COSTS_N_BYTES (2), /* cost of an add instruction */
76 COSTS_N_BYTES (3), /* cost of a lea instruction */
77 COSTS_N_BYTES (2), /* variable shift costs */
78 COSTS_N_BYTES (3), /* constant shift costs */
79 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
80 COSTS_N_BYTES (3), /* HI */
81 COSTS_N_BYTES (3), /* SI */
82 COSTS_N_BYTES (3), /* DI */
83 COSTS_N_BYTES (5)}, /* other */
84 0, /* cost of multiply per each bit set */
85 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 COSTS_N_BYTES (3), /* cost of movsx */
91 COSTS_N_BYTES (3), /* cost of movzx */
92 0, /* "large" insn */
93 2, /* MOVE_RATIO */
94 2, /* cost for loading QImode using movzbl */
95 {2, 2, 2}, /* cost of loading integer registers
96 in QImode, HImode and SImode.
97 Relative to reg-reg move (2). */
98 {2, 2, 2}, /* cost of storing integer registers */
99 2, /* cost of reg,reg fld/fst */
100 {2, 2, 2}, /* cost of loading fp registers
101 in SFmode, DFmode and XFmode */
102 {2, 2, 2}, /* cost of storing fp registers
103 in SFmode, DFmode and XFmode */
104 3, /* cost of moving MMX register */
105 {3, 3}, /* cost of loading MMX registers
106 in SImode and DImode */
107 {3, 3}, /* cost of storing MMX registers
108 in SImode and DImode */
109 3, /* cost of moving SSE register */
110 {3, 3, 3}, /* cost of loading SSE registers
111 in SImode, DImode and TImode */
112 {3, 3, 3}, /* cost of storing SSE registers
113 in SImode, DImode and TImode */
114 3, /* MMX or SSE register to integer */
115 0, /* size of prefetch block */
116 0, /* number of parallel prefetches */
117 2, /* Branch cost */
118 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
119 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
120 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
121 COSTS_N_BYTES (2), /* cost of FABS instruction. */
122 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
123 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
124 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
125 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
126 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
127 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}}
130 /* Processor costs (relative to an add) */
131 static const
132 struct processor_costs i386_cost = { /* 386 specific costs */
133 COSTS_N_INSNS (1), /* cost of an add instruction */
134 COSTS_N_INSNS (1), /* cost of a lea instruction */
135 COSTS_N_INSNS (3), /* variable shift costs */
136 COSTS_N_INSNS (2), /* constant shift costs */
137 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
138 COSTS_N_INSNS (6), /* HI */
139 COSTS_N_INSNS (6), /* SI */
140 COSTS_N_INSNS (6), /* DI */
141 COSTS_N_INSNS (6)}, /* other */
142 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
143 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
144 COSTS_N_INSNS (23), /* HI */
145 COSTS_N_INSNS (23), /* SI */
146 COSTS_N_INSNS (23), /* DI */
147 COSTS_N_INSNS (23)}, /* other */
148 COSTS_N_INSNS (3), /* cost of movsx */
149 COSTS_N_INSNS (2), /* cost of movzx */
150 15, /* "large" insn */
151 3, /* MOVE_RATIO */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of storing fp registers
161 in SFmode, DFmode and XFmode */
162 2, /* cost of moving MMX register */
163 {4, 8}, /* cost of loading MMX registers
164 in SImode and DImode */
165 {4, 8}, /* cost of storing MMX registers
166 in SImode and DImode */
167 2, /* cost of moving SSE register */
168 {4, 8, 16}, /* cost of loading SSE registers
169 in SImode, DImode and TImode */
170 {4, 8, 16}, /* cost of storing SSE registers
171 in SImode, DImode and TImode */
172 3, /* MMX or SSE register to integer */
173 0, /* size of prefetch block */
174 0, /* number of parallel prefetches */
175 1, /* Branch cost */
176 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
177 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
178 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
179 COSTS_N_INSNS (22), /* cost of FABS instruction. */
180 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
181 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
182 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
183 DUMMY_STRINGOP_ALGS},
184 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
185 DUMMY_STRINGOP_ALGS},
188 static const
189 struct processor_costs i486_cost = { /* 486 specific costs */
190 COSTS_N_INSNS (1), /* cost of an add instruction */
191 COSTS_N_INSNS (1), /* cost of a lea instruction */
192 COSTS_N_INSNS (3), /* variable shift costs */
193 COSTS_N_INSNS (2), /* constant shift costs */
194 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
195 COSTS_N_INSNS (12), /* HI */
196 COSTS_N_INSNS (12), /* SI */
197 COSTS_N_INSNS (12), /* DI */
198 COSTS_N_INSNS (12)}, /* other */
199 1, /* cost of multiply per each bit set */
200 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
201 COSTS_N_INSNS (40), /* HI */
202 COSTS_N_INSNS (40), /* SI */
203 COSTS_N_INSNS (40), /* DI */
204 COSTS_N_INSNS (40)}, /* other */
205 COSTS_N_INSNS (3), /* cost of movsx */
206 COSTS_N_INSNS (2), /* cost of movzx */
207 15, /* "large" insn */
208 3, /* MOVE_RATIO */
209 4, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {8, 8, 8}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {8, 8, 8}, /* cost of storing fp registers
218 in SFmode, DFmode and XFmode */
219 2, /* cost of moving MMX register */
220 {4, 8}, /* cost of loading MMX registers
221 in SImode and DImode */
222 {4, 8}, /* cost of storing MMX registers
223 in SImode and DImode */
224 2, /* cost of moving SSE register */
225 {4, 8, 16}, /* cost of loading SSE registers
226 in SImode, DImode and TImode */
227 {4, 8, 16}, /* cost of storing SSE registers
228 in SImode, DImode and TImode */
229 3, /* MMX or SSE register to integer */
230 0, /* size of prefetch block */
231 0, /* number of parallel prefetches */
232 1, /* Branch cost */
233 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
234 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
235 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
236 COSTS_N_INSNS (3), /* cost of FABS instruction. */
237 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
238 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
239 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
240 DUMMY_STRINGOP_ALGS},
241 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
242 DUMMY_STRINGOP_ALGS}
245 static const
246 struct processor_costs pentium_cost = {
247 COSTS_N_INSNS (1), /* cost of an add instruction */
248 COSTS_N_INSNS (1), /* cost of a lea instruction */
249 COSTS_N_INSNS (4), /* variable shift costs */
250 COSTS_N_INSNS (1), /* constant shift costs */
251 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
252 COSTS_N_INSNS (11), /* HI */
253 COSTS_N_INSNS (11), /* SI */
254 COSTS_N_INSNS (11), /* DI */
255 COSTS_N_INSNS (11)}, /* other */
256 0, /* cost of multiply per each bit set */
257 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
258 COSTS_N_INSNS (25), /* HI */
259 COSTS_N_INSNS (25), /* SI */
260 COSTS_N_INSNS (25), /* DI */
261 COSTS_N_INSNS (25)}, /* other */
262 COSTS_N_INSNS (3), /* cost of movsx */
263 COSTS_N_INSNS (2), /* cost of movzx */
264 8, /* "large" insn */
265 6, /* MOVE_RATIO */
266 6, /* cost for loading QImode using movzbl */
267 {2, 4, 2}, /* cost of loading integer registers
268 in QImode, HImode and SImode.
269 Relative to reg-reg move (2). */
270 {2, 4, 2}, /* cost of storing integer registers */
271 2, /* cost of reg,reg fld/fst */
272 {2, 2, 6}, /* cost of loading fp registers
273 in SFmode, DFmode and XFmode */
274 {4, 4, 6}, /* cost of storing fp registers
275 in SFmode, DFmode and XFmode */
276 8, /* cost of moving MMX register */
277 {8, 8}, /* cost of loading MMX registers
278 in SImode and DImode */
279 {8, 8}, /* cost of storing MMX registers
280 in SImode and DImode */
281 2, /* cost of moving SSE register */
282 {4, 8, 16}, /* cost of loading SSE registers
283 in SImode, DImode and TImode */
284 {4, 8, 16}, /* cost of storing SSE registers
285 in SImode, DImode and TImode */
286 3, /* MMX or SSE register to integer */
287 0, /* size of prefetch block */
288 0, /* number of parallel prefetches */
289 2, /* Branch cost */
290 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
291 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
292 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
293 COSTS_N_INSNS (1), /* cost of FABS instruction. */
294 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
295 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
296 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
297 DUMMY_STRINGOP_ALGS},
298 {{libcall, {{-1, rep_prefix_4_byte}}},
299 DUMMY_STRINGOP_ALGS}
302 static const
303 struct processor_costs pentiumpro_cost = {
304 COSTS_N_INSNS (1), /* cost of an add instruction */
305 COSTS_N_INSNS (1), /* cost of a lea instruction */
306 COSTS_N_INSNS (1), /* variable shift costs */
307 COSTS_N_INSNS (1), /* constant shift costs */
308 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
309 COSTS_N_INSNS (4), /* HI */
310 COSTS_N_INSNS (4), /* SI */
311 COSTS_N_INSNS (4), /* DI */
312 COSTS_N_INSNS (4)}, /* other */
313 0, /* cost of multiply per each bit set */
314 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
315 COSTS_N_INSNS (17), /* HI */
316 COSTS_N_INSNS (17), /* SI */
317 COSTS_N_INSNS (17), /* DI */
318 COSTS_N_INSNS (17)}, /* other */
319 COSTS_N_INSNS (1), /* cost of movsx */
320 COSTS_N_INSNS (1), /* cost of movzx */
321 8, /* "large" insn */
322 6, /* MOVE_RATIO */
323 2, /* cost for loading QImode using movzbl */
324 {4, 4, 4}, /* cost of loading integer registers
325 in QImode, HImode and SImode.
326 Relative to reg-reg move (2). */
327 {2, 2, 2}, /* cost of storing integer registers */
328 2, /* cost of reg,reg fld/fst */
329 {2, 2, 6}, /* cost of loading fp registers
330 in SFmode, DFmode and XFmode */
331 {4, 4, 6}, /* cost of storing fp registers
332 in SFmode, DFmode and XFmode */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {2, 2, 8}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 3, /* MMX or SSE register to integer */
344 32, /* size of prefetch block */
345 6, /* number of parallel prefetches */
346 2, /* Branch cost */
347 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
348 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
349 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
350 COSTS_N_INSNS (2), /* cost of FABS instruction. */
351 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
352 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
353 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
354 the alignment). For small blocks inline loop is still a noticeable win, for bigger
355 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
356 more expensive startup time in CPU, but after 4K the difference is down in the noise.
358 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
359 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
360 DUMMY_STRINGOP_ALGS},
361 {{rep_prefix_4_byte, {{1024, unrolled_loop},
362 {8192, rep_prefix_4_byte}, {-1, libcall}}},
363 DUMMY_STRINGOP_ALGS}
366 static const
367 struct processor_costs geode_cost = {
368 COSTS_N_INSNS (1), /* cost of an add instruction */
369 COSTS_N_INSNS (1), /* cost of a lea instruction */
370 COSTS_N_INSNS (2), /* variable shift costs */
371 COSTS_N_INSNS (1), /* constant shift costs */
372 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
373 COSTS_N_INSNS (4), /* HI */
374 COSTS_N_INSNS (7), /* SI */
375 COSTS_N_INSNS (7), /* DI */
376 COSTS_N_INSNS (7)}, /* other */
377 0, /* cost of multiply per each bit set */
378 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
379 COSTS_N_INSNS (23), /* HI */
380 COSTS_N_INSNS (39), /* SI */
381 COSTS_N_INSNS (39), /* DI */
382 COSTS_N_INSNS (39)}, /* other */
383 COSTS_N_INSNS (1), /* cost of movsx */
384 COSTS_N_INSNS (1), /* cost of movzx */
385 8, /* "large" insn */
386 4, /* MOVE_RATIO */
387 1, /* cost for loading QImode using movzbl */
388 {1, 1, 1}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {1, 1, 1}, /* cost of storing integer registers */
392 1, /* cost of reg,reg fld/fst */
393 {1, 1, 1}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {4, 6, 6}, /* cost of storing fp registers
396 in SFmode, DFmode and XFmode */
398 1, /* cost of moving MMX register */
399 {1, 1}, /* cost of loading MMX registers
400 in SImode and DImode */
401 {1, 1}, /* cost of storing MMX registers
402 in SImode and DImode */
403 1, /* cost of moving SSE register */
404 {1, 1, 1}, /* cost of loading SSE registers
405 in SImode, DImode and TImode */
406 {1, 1, 1}, /* cost of storing SSE registers
407 in SImode, DImode and TImode */
408 1, /* MMX or SSE register to integer */
409 32, /* size of prefetch block */
410 1, /* number of parallel prefetches */
411 1, /* Branch cost */
412 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
413 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
414 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
415 COSTS_N_INSNS (1), /* cost of FABS instruction. */
416 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
417 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
418 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
419 DUMMY_STRINGOP_ALGS},
420 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
421 DUMMY_STRINGOP_ALGS}
424 static const
425 struct processor_costs k6_cost = {
426 COSTS_N_INSNS (1), /* cost of an add instruction */
427 COSTS_N_INSNS (2), /* cost of a lea instruction */
428 COSTS_N_INSNS (1), /* variable shift costs */
429 COSTS_N_INSNS (1), /* constant shift costs */
430 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
431 COSTS_N_INSNS (3), /* HI */
432 COSTS_N_INSNS (3), /* SI */
433 COSTS_N_INSNS (3), /* DI */
434 COSTS_N_INSNS (3)}, /* other */
435 0, /* cost of multiply per each bit set */
436 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
437 COSTS_N_INSNS (18), /* HI */
438 COSTS_N_INSNS (18), /* SI */
439 COSTS_N_INSNS (18), /* DI */
440 COSTS_N_INSNS (18)}, /* other */
441 COSTS_N_INSNS (2), /* cost of movsx */
442 COSTS_N_INSNS (2), /* cost of movzx */
443 8, /* "large" insn */
444 4, /* MOVE_RATIO */
445 3, /* cost for loading QImode using movzbl */
446 {4, 5, 4}, /* cost of loading integer registers
447 in QImode, HImode and SImode.
448 Relative to reg-reg move (2). */
449 {2, 3, 2}, /* cost of storing integer registers */
450 4, /* cost of reg,reg fld/fst */
451 {6, 6, 6}, /* cost of loading fp registers
452 in SFmode, DFmode and XFmode */
453 {4, 4, 4}, /* cost of storing fp registers
454 in SFmode, DFmode and XFmode */
455 2, /* cost of moving MMX register */
456 {2, 2}, /* cost of loading MMX registers
457 in SImode and DImode */
458 {2, 2}, /* cost of storing MMX registers
459 in SImode and DImode */
460 2, /* cost of moving SSE register */
461 {2, 2, 8}, /* cost of loading SSE registers
462 in SImode, DImode and TImode */
463 {2, 2, 8}, /* cost of storing SSE registers
464 in SImode, DImode and TImode */
465 6, /* MMX or SSE register to integer */
466 32, /* size of prefetch block */
467 1, /* number of parallel prefetches */
468 1, /* Branch cost */
469 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
470 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
471 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
472 COSTS_N_INSNS (2), /* cost of FABS instruction. */
473 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
474 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
475 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
476 DUMMY_STRINGOP_ALGS},
477 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
478 DUMMY_STRINGOP_ALGS}
481 static const
482 struct processor_costs athlon_cost = {
483 COSTS_N_INSNS (1), /* cost of an add instruction */
484 COSTS_N_INSNS (2), /* cost of a lea instruction */
485 COSTS_N_INSNS (1), /* variable shift costs */
486 COSTS_N_INSNS (1), /* constant shift costs */
487 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
488 COSTS_N_INSNS (5), /* HI */
489 COSTS_N_INSNS (5), /* SI */
490 COSTS_N_INSNS (5), /* DI */
491 COSTS_N_INSNS (5)}, /* other */
492 0, /* cost of multiply per each bit set */
493 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
494 COSTS_N_INSNS (26), /* HI */
495 COSTS_N_INSNS (42), /* SI */
496 COSTS_N_INSNS (74), /* DI */
497 COSTS_N_INSNS (74)}, /* other */
498 COSTS_N_INSNS (1), /* cost of movsx */
499 COSTS_N_INSNS (1), /* cost of movzx */
500 8, /* "large" insn */
501 9, /* MOVE_RATIO */
502 4, /* cost for loading QImode using movzbl */
503 {3, 4, 3}, /* cost of loading integer registers
504 in QImode, HImode and SImode.
505 Relative to reg-reg move (2). */
506 {3, 4, 3}, /* cost of storing integer registers */
507 4, /* cost of reg,reg fld/fst */
508 {4, 4, 12}, /* cost of loading fp registers
509 in SFmode, DFmode and XFmode */
510 {6, 6, 8}, /* cost of storing fp registers
511 in SFmode, DFmode and XFmode */
512 2, /* cost of moving MMX register */
513 {4, 4}, /* cost of loading MMX registers
514 in SImode and DImode */
515 {4, 4}, /* cost of storing MMX registers
516 in SImode and DImode */
517 2, /* cost of moving SSE register */
518 {4, 4, 6}, /* cost of loading SSE registers
519 in SImode, DImode and TImode */
520 {4, 4, 5}, /* cost of storing SSE registers
521 in SImode, DImode and TImode */
522 5, /* MMX or SSE register to integer */
523 64, /* size of prefetch block */
524 6, /* number of parallel prefetches */
525 5, /* Branch cost */
526 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
527 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
528 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
529 COSTS_N_INSNS (2), /* cost of FABS instruction. */
530 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
531 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
532 /* For some reason, Athlon deals better with REP prefix (relative to loops)
533 compared to K8. Alignment becomes important after 8 bytes for memcpy and
534 128 bytes for memset. */
535 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
536 DUMMY_STRINGOP_ALGS},
537 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
538 DUMMY_STRINGOP_ALGS}
541 static const
542 struct processor_costs k8_cost = {
543 COSTS_N_INSNS (1), /* cost of an add instruction */
544 COSTS_N_INSNS (2), /* cost of a lea instruction */
545 COSTS_N_INSNS (1), /* variable shift costs */
546 COSTS_N_INSNS (1), /* constant shift costs */
547 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
548 COSTS_N_INSNS (4), /* HI */
549 COSTS_N_INSNS (3), /* SI */
550 COSTS_N_INSNS (4), /* DI */
551 COSTS_N_INSNS (5)}, /* other */
552 0, /* cost of multiply per each bit set */
553 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
554 COSTS_N_INSNS (26), /* HI */
555 COSTS_N_INSNS (42), /* SI */
556 COSTS_N_INSNS (74), /* DI */
557 COSTS_N_INSNS (74)}, /* other */
558 COSTS_N_INSNS (1), /* cost of movsx */
559 COSTS_N_INSNS (1), /* cost of movzx */
560 8, /* "large" insn */
561 9, /* MOVE_RATIO */
562 4, /* cost for loading QImode using movzbl */
563 {3, 4, 3}, /* cost of loading integer registers
564 in QImode, HImode and SImode.
565 Relative to reg-reg move (2). */
566 {3, 4, 3}, /* cost of storing integer registers */
567 4, /* cost of reg,reg fld/fst */
568 {4, 4, 12}, /* cost of loading fp registers
569 in SFmode, DFmode and XFmode */
570 {6, 6, 8}, /* cost of storing fp registers
571 in SFmode, DFmode and XFmode */
572 2, /* cost of moving MMX register */
573 {3, 3}, /* cost of loading MMX registers
574 in SImode and DImode */
575 {4, 4}, /* cost of storing MMX registers
576 in SImode and DImode */
577 2, /* cost of moving SSE register */
578 {4, 3, 6}, /* cost of loading SSE registers
579 in SImode, DImode and TImode */
580 {4, 4, 5}, /* cost of storing SSE registers
581 in SImode, DImode and TImode */
582 5, /* MMX or SSE register to integer */
583 64, /* size of prefetch block */
584 /* New AMD processors never drop prefetches; if they cannot be performed
585 immediately, they are queued. We set number of simultaneous prefetches
586 to a large constant to reflect this (it probably is not a good idea not
587 to limit number of prefetches at all, as their execution also takes some
588 time). */
589 100, /* number of parallel prefetches */
590 5, /* Branch cost */
591 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
592 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
593 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
594 COSTS_N_INSNS (2), /* cost of FABS instruction. */
595 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
596 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
597 /* K8 has optimized REP instruction for medium sized blocks, but for very small
598 blocks it is better to use loop. For large blocks, libcall can do
599 nontemporary accesses and beat inline considerably. */
600 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
601 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
602 {{libcall, {{8, loop}, {24, unrolled_loop},
603 {2048, rep_prefix_4_byte}, {-1, libcall}}},
604 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
607 struct processor_costs amdfam10_cost = {
608 COSTS_N_INSNS (1), /* cost of an add instruction */
609 COSTS_N_INSNS (2), /* cost of a lea instruction */
610 COSTS_N_INSNS (1), /* variable shift costs */
611 COSTS_N_INSNS (1), /* constant shift costs */
612 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
613 COSTS_N_INSNS (4), /* HI */
614 COSTS_N_INSNS (3), /* SI */
615 COSTS_N_INSNS (4), /* DI */
616 COSTS_N_INSNS (5)}, /* other */
617 0, /* cost of multiply per each bit set */
618 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
619 COSTS_N_INSNS (35), /* HI */
620 COSTS_N_INSNS (51), /* SI */
621 COSTS_N_INSNS (83), /* DI */
622 COSTS_N_INSNS (83)}, /* other */
623 COSTS_N_INSNS (1), /* cost of movsx */
624 COSTS_N_INSNS (1), /* cost of movzx */
625 8, /* "large" insn */
626 9, /* MOVE_RATIO */
627 4, /* cost for loading QImode using movzbl */
628 {3, 4, 3}, /* cost of loading integer registers
629 in QImode, HImode and SImode.
630 Relative to reg-reg move (2). */
631 {3, 4, 3}, /* cost of storing integer registers */
632 4, /* cost of reg,reg fld/fst */
633 {4, 4, 12}, /* cost of loading fp registers
634 in SFmode, DFmode and XFmode */
635 {6, 6, 8}, /* cost of storing fp registers
636 in SFmode, DFmode and XFmode */
637 2, /* cost of moving MMX register */
638 {3, 3}, /* cost of loading MMX registers
639 in SImode and DImode */
640 {4, 4}, /* cost of storing MMX registers
641 in SImode and DImode */
642 2, /* cost of moving SSE register */
643 {4, 4, 3}, /* cost of loading SSE registers
644 in SImode, DImode and TImode */
645 {4, 4, 5}, /* cost of storing SSE registers
646 in SImode, DImode and TImode */
647 3, /* MMX or SSE register to integer */
648 /* On K8
649 MOVD reg64, xmmreg Double FSTORE 4
650 MOVD reg32, xmmreg Double FSTORE 4
651 On AMDFAM10
652 MOVD reg64, xmmreg Double FADD 3
653 1/1 1/1
654 MOVD reg32, xmmreg Double FADD 3
655 1/1 1/1 */
656 64, /* size of prefetch block */
657 /* New AMD processors never drop prefetches; if they cannot be performed
658 immediately, they are queued. We set number of simultaneous prefetches
659 to a large constant to reflect this (it probably is not a good idea not
660 to limit number of prefetches at all, as their execution also takes some
661 time). */
662 100, /* number of parallel prefetches */
663 5, /* Branch cost */
664 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
665 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
666 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
667 COSTS_N_INSNS (2), /* cost of FABS instruction. */
668 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
669 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
671 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
672 very small blocks it is better to use loop. For large blocks, libcall can
673 do nontemporary accesses and beat inline considerably. */
674 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
675 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
676 {{libcall, {{8, loop}, {24, unrolled_loop},
677 {2048, rep_prefix_4_byte}, {-1, libcall}}},
678 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
681 static const
682 struct processor_costs pentium4_cost = {
683 COSTS_N_INSNS (1), /* cost of an add instruction */
684 COSTS_N_INSNS (3), /* cost of a lea instruction */
685 COSTS_N_INSNS (4), /* variable shift costs */
686 COSTS_N_INSNS (4), /* constant shift costs */
687 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
688 COSTS_N_INSNS (15), /* HI */
689 COSTS_N_INSNS (15), /* SI */
690 COSTS_N_INSNS (15), /* DI */
691 COSTS_N_INSNS (15)}, /* other */
692 0, /* cost of multiply per each bit set */
693 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
694 COSTS_N_INSNS (56), /* HI */
695 COSTS_N_INSNS (56), /* SI */
696 COSTS_N_INSNS (56), /* DI */
697 COSTS_N_INSNS (56)}, /* other */
698 COSTS_N_INSNS (1), /* cost of movsx */
699 COSTS_N_INSNS (1), /* cost of movzx */
700 16, /* "large" insn */
701 6, /* MOVE_RATIO */
702 2, /* cost for loading QImode using movzbl */
703 {4, 5, 4}, /* cost of loading integer registers
704 in QImode, HImode and SImode.
705 Relative to reg-reg move (2). */
706 {2, 3, 2}, /* cost of storing integer registers */
707 2, /* cost of reg,reg fld/fst */
708 {2, 2, 6}, /* cost of loading fp registers
709 in SFmode, DFmode and XFmode */
710 {4, 4, 6}, /* cost of storing fp registers
711 in SFmode, DFmode and XFmode */
712 2, /* cost of moving MMX register */
713 {2, 2}, /* cost of loading MMX registers
714 in SImode and DImode */
715 {2, 2}, /* cost of storing MMX registers
716 in SImode and DImode */
717 12, /* cost of moving SSE register */
718 {12, 12, 12}, /* cost of loading SSE registers
719 in SImode, DImode and TImode */
720 {2, 2, 8}, /* cost of storing SSE registers
721 in SImode, DImode and TImode */
722 10, /* MMX or SSE register to integer */
723 64, /* size of prefetch block */
724 6, /* number of parallel prefetches */
725 2, /* Branch cost */
726 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
727 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
728 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
729 COSTS_N_INSNS (2), /* cost of FABS instruction. */
730 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
731 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
732 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
733 DUMMY_STRINGOP_ALGS},
734 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
735 {-1, libcall}}},
736 DUMMY_STRINGOP_ALGS},
739 static const
740 struct processor_costs nocona_cost = {
741 COSTS_N_INSNS (1), /* cost of an add instruction */
742 COSTS_N_INSNS (1), /* cost of a lea instruction */
743 COSTS_N_INSNS (1), /* variable shift costs */
744 COSTS_N_INSNS (1), /* constant shift costs */
745 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
746 COSTS_N_INSNS (10), /* HI */
747 COSTS_N_INSNS (10), /* SI */
748 COSTS_N_INSNS (10), /* DI */
749 COSTS_N_INSNS (10)}, /* other */
750 0, /* cost of multiply per each bit set */
751 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
752 COSTS_N_INSNS (66), /* HI */
753 COSTS_N_INSNS (66), /* SI */
754 COSTS_N_INSNS (66), /* DI */
755 COSTS_N_INSNS (66)}, /* other */
756 COSTS_N_INSNS (1), /* cost of movsx */
757 COSTS_N_INSNS (1), /* cost of movzx */
758 16, /* "large" insn */
759 17, /* MOVE_RATIO */
760 4, /* cost for loading QImode using movzbl */
761 {4, 4, 4}, /* cost of loading integer registers
762 in QImode, HImode and SImode.
763 Relative to reg-reg move (2). */
764 {4, 4, 4}, /* cost of storing integer registers */
765 3, /* cost of reg,reg fld/fst */
766 {12, 12, 12}, /* cost of loading fp registers
767 in SFmode, DFmode and XFmode */
768 {4, 4, 4}, /* cost of storing fp registers
769 in SFmode, DFmode and XFmode */
770 6, /* cost of moving MMX register */
771 {12, 12}, /* cost of loading MMX registers
772 in SImode and DImode */
773 {12, 12}, /* cost of storing MMX registers
774 in SImode and DImode */
775 6, /* cost of moving SSE register */
776 {12, 12, 12}, /* cost of loading SSE registers
777 in SImode, DImode and TImode */
778 {12, 12, 12}, /* cost of storing SSE registers
779 in SImode, DImode and TImode */
780 8, /* MMX or SSE register to integer */
781 128, /* size of prefetch block */
782 8, /* number of parallel prefetches */
783 1, /* Branch cost */
784 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
785 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
786 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
787 COSTS_N_INSNS (3), /* cost of FABS instruction. */
788 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
789 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
790 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
791 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
792 {100000, unrolled_loop}, {-1, libcall}}}},
793 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
794 {-1, libcall}}},
795 {libcall, {{24, loop}, {64, unrolled_loop},
796 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
799 static const
800 struct processor_costs core2_cost = {
801 COSTS_N_INSNS (1), /* cost of an add instruction */
802 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
803 COSTS_N_INSNS (1), /* variable shift costs */
804 COSTS_N_INSNS (1), /* constant shift costs */
805 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
806 COSTS_N_INSNS (3), /* HI */
807 COSTS_N_INSNS (3), /* SI */
808 COSTS_N_INSNS (3), /* DI */
809 COSTS_N_INSNS (3)}, /* other */
810 0, /* cost of multiply per each bit set */
811 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
812 COSTS_N_INSNS (22), /* HI */
813 COSTS_N_INSNS (22), /* SI */
814 COSTS_N_INSNS (22), /* DI */
815 COSTS_N_INSNS (22)}, /* other */
816 COSTS_N_INSNS (1), /* cost of movsx */
817 COSTS_N_INSNS (1), /* cost of movzx */
818 8, /* "large" insn */
819 16, /* MOVE_RATIO */
820 2, /* cost for loading QImode using movzbl */
821 {6, 6, 6}, /* cost of loading integer registers
822 in QImode, HImode and SImode.
823 Relative to reg-reg move (2). */
824 {4, 4, 4}, /* cost of storing integer registers */
825 2, /* cost of reg,reg fld/fst */
826 {6, 6, 6}, /* cost of loading fp registers
827 in SFmode, DFmode and XFmode */
828 {4, 4, 4}, /* cost of loading integer registers */
829 2, /* cost of moving MMX register */
830 {6, 6}, /* cost of loading MMX registers
831 in SImode and DImode */
832 {4, 4}, /* cost of storing MMX registers
833 in SImode and DImode */
834 2, /* cost of moving SSE register */
835 {6, 6, 6}, /* cost of loading SSE registers
836 in SImode, DImode and TImode */
837 {4, 4, 4}, /* cost of storing SSE registers
838 in SImode, DImode and TImode */
839 2, /* MMX or SSE register to integer */
840 128, /* size of prefetch block */
841 8, /* number of parallel prefetches */
842 3, /* Branch cost */
843 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
844 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
845 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
846 COSTS_N_INSNS (1), /* cost of FABS instruction. */
847 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
848 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
849 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
850 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
851 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
852 {{libcall, {{8, loop}, {15, unrolled_loop},
853 {2048, rep_prefix_4_byte}, {-1, libcall}}},
854 {libcall, {{24, loop}, {32, unrolled_loop},
855 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
858 /* Generic64 should produce code tuned for Nocona and K8. */
859 static const
860 struct processor_costs generic64_cost = {
861 COSTS_N_INSNS (1), /* cost of an add instruction */
862 /* On all chips taken into consideration lea is 2 cycles and more. With
863 this cost however our current implementation of synth_mult results in
864 use of unnecessary temporary registers causing regression on several
865 SPECfp benchmarks. */
866 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
867 COSTS_N_INSNS (1), /* variable shift costs */
868 COSTS_N_INSNS (1), /* constant shift costs */
869 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
870 COSTS_N_INSNS (4), /* HI */
871 COSTS_N_INSNS (3), /* SI */
872 COSTS_N_INSNS (4), /* DI */
873 COSTS_N_INSNS (2)}, /* other */
874 0, /* cost of multiply per each bit set */
875 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
876 COSTS_N_INSNS (26), /* HI */
877 COSTS_N_INSNS (42), /* SI */
878 COSTS_N_INSNS (74), /* DI */
879 COSTS_N_INSNS (74)}, /* other */
880 COSTS_N_INSNS (1), /* cost of movsx */
881 COSTS_N_INSNS (1), /* cost of movzx */
882 8, /* "large" insn */
883 17, /* MOVE_RATIO */
884 4, /* cost for loading QImode using movzbl */
885 {4, 4, 4}, /* cost of loading integer registers
886 in QImode, HImode and SImode.
887 Relative to reg-reg move (2). */
888 {4, 4, 4}, /* cost of storing integer registers */
889 4, /* cost of reg,reg fld/fst */
890 {12, 12, 12}, /* cost of loading fp registers
891 in SFmode, DFmode and XFmode */
892 {6, 6, 8}, /* cost of storing fp registers
893 in SFmode, DFmode and XFmode */
894 2, /* cost of moving MMX register */
895 {8, 8}, /* cost of loading MMX registers
896 in SImode and DImode */
897 {8, 8}, /* cost of storing MMX registers
898 in SImode and DImode */
899 2, /* cost of moving SSE register */
900 {8, 8, 8}, /* cost of loading SSE registers
901 in SImode, DImode and TImode */
902 {8, 8, 8}, /* cost of storing SSE registers
903 in SImode, DImode and TImode */
904 5, /* MMX or SSE register to integer */
905 64, /* size of prefetch block */
906 6, /* number of parallel prefetches */
907 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
908 is increased to perhaps more appropriate value of 5. */
909 3, /* Branch cost */
910 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
911 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
912 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
913 COSTS_N_INSNS (8), /* cost of FABS instruction. */
914 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
915 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
916 {DUMMY_STRINGOP_ALGS,
917 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
918 {DUMMY_STRINGOP_ALGS,
919 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
922 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
923 static const
924 struct processor_costs generic32_cost = {
925 COSTS_N_INSNS (1), /* cost of an add instruction */
926 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
927 COSTS_N_INSNS (1), /* variable shift costs */
928 COSTS_N_INSNS (1), /* constant shift costs */
929 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
930 COSTS_N_INSNS (4), /* HI */
931 COSTS_N_INSNS (3), /* SI */
932 COSTS_N_INSNS (4), /* DI */
933 COSTS_N_INSNS (2)}, /* other */
934 0, /* cost of multiply per each bit set */
935 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
936 COSTS_N_INSNS (26), /* HI */
937 COSTS_N_INSNS (42), /* SI */
938 COSTS_N_INSNS (74), /* DI */
939 COSTS_N_INSNS (74)}, /* other */
940 COSTS_N_INSNS (1), /* cost of movsx */
941 COSTS_N_INSNS (1), /* cost of movzx */
942 8, /* "large" insn */
943 17, /* MOVE_RATIO */
944 4, /* cost for loading QImode using movzbl */
945 {4, 4, 4}, /* cost of loading integer registers
946 in QImode, HImode and SImode.
947 Relative to reg-reg move (2). */
948 {4, 4, 4}, /* cost of storing integer registers */
949 4, /* cost of reg,reg fld/fst */
950 {12, 12, 12}, /* cost of loading fp registers
951 in SFmode, DFmode and XFmode */
952 {6, 6, 8}, /* cost of storing fp registers
953 in SFmode, DFmode and XFmode */
954 2, /* cost of moving MMX register */
955 {8, 8}, /* cost of loading MMX registers
956 in SImode and DImode */
957 {8, 8}, /* cost of storing MMX registers
958 in SImode and DImode */
959 2, /* cost of moving SSE register */
960 {8, 8, 8}, /* cost of loading SSE registers
961 in SImode, DImode and TImode */
962 {8, 8, 8}, /* cost of storing SSE registers
963 in SImode, DImode and TImode */
964 5, /* MMX or SSE register to integer */
965 64, /* size of prefetch block */
966 6, /* number of parallel prefetches */
967 3, /* Branch cost */
968 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
969 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
970 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
971 COSTS_N_INSNS (8), /* cost of FABS instruction. */
972 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
973 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
974 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
975 DUMMY_STRINGOP_ALGS},
976 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
977 DUMMY_STRINGOP_ALGS},
980 const struct processor_costs *ix86_cost = &pentium_cost;
982 /* Processor feature/optimization bitmasks. */
983 #define m_386 (1<<PROCESSOR_I386)
984 #define m_486 (1<<PROCESSOR_I486)
985 #define m_PENT (1<<PROCESSOR_PENTIUM)
986 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
987 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
988 #define m_NOCONA (1<<PROCESSOR_NOCONA)
989 #define m_CORE2 (1<<PROCESSOR_CORE2)
991 #define m_GEODE (1<<PROCESSOR_GEODE)
992 #define m_K6 (1<<PROCESSOR_K6)
993 #define m_K6_GEODE (m_K6 | m_GEODE)
994 #define m_K8 (1<<PROCESSOR_K8)
995 #define m_ATHLON (1<<PROCESSOR_ATHLON)
996 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
997 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
998 #define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
1000 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1001 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1003 /* Generic instruction choice should be common subset of supported CPUs
1004 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1005 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1007 /* Feature tests against the various tunings. */
1008 unsigned int ix86_tune_features[X86_TUNE_LAST] = {
1009 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1010 negatively, so enabling for Generic64 seems like good code size
1011 tradeoff. We can't enable it for 32bit generic because it does not
1012 work well with PPro base chips. */
1013 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC64,
1015 /* X86_TUNE_PUSH_MEMORY */
1016 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
1017 | m_NOCONA | m_CORE2 | m_GENERIC,
1019 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1020 m_486 | m_PENT,
1022 /* X86_TUNE_USE_BIT_TEST */
1023 m_386,
1025 /* X86_TUNE_UNROLL_STRLEN */
1026 m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6 | m_CORE2 | m_GENERIC,
1028 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1029 m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
1030 | m_NOCONA | m_CORE2 | m_GENERIC,
1032 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1033 on simulation result. But after P4 was made, no performance benefit
1034 was observed with branch hints. It also increases the code size.
1035 As a result, icc never generates branch hints. */
1038 /* X86_TUNE_DOUBLE_WITH_ADD */
1039 ~m_386,
1041 /* X86_TUNE_USE_SAHF */
1042 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1043 | m_NOCONA | m_CORE2 | m_GENERIC,
1045 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1046 partial dependencies. */
1047 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
1048 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1050 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1051 register stalls on Generic32 compilation setting as well. However
1052 in current implementation the partial register stalls are not eliminated
1053 very well - they can be introduced via subregs synthesized by combine
1054 and can happen in caller/callee saving sequences. Because this option
1055 pays back little on PPro based chips and is in conflict with partial reg
1056 dependencies used by Athlon/P4 based chips, it is better to leave it off
1057 for generic32 for now. */
1058 m_PPRO,
1060 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1061 m_CORE2 | m_GENERIC,
1063 /* X86_TUNE_USE_HIMODE_FIOP */
1064 m_386 | m_486 | m_K6_GEODE,
1066 /* X86_TUNE_USE_SIMODE_FIOP */
1067 ~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT | m_CORE2 | m_GENERIC),
1069 /* X86_TUNE_USE_MOV0 */
1070 m_K6,
1072 /* X86_TUNE_USE_CLTD */
1073 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1075 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1076 m_PENT4,
1078 /* X86_TUNE_SPLIT_LONG_MOVES */
1079 m_PPRO,
1081 /* X86_TUNE_READ_MODIFY_WRITE */
1082 ~m_PENT,
1084 /* X86_TUNE_READ_MODIFY */
1085 ~(m_PENT | m_PPRO),
1087 /* X86_TUNE_PROMOTE_QIMODE */
1088 m_K6_GEODE | m_PENT | m_386 | m_486 | m_ATHLON_K8_AMDFAM10 | m_CORE2
1089 | m_GENERIC /* | m_PENT4 ? */,
1091 /* X86_TUNE_FAST_PREFIX */
1092 ~(m_PENT | m_486 | m_386),
1094 /* X86_TUNE_SINGLE_STRINGOP */
1095 m_386 | m_PENT4 | m_NOCONA,
1097 /* X86_TUNE_QIMODE_MATH */
1100 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1101 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1102 might be considered for Generic32 if our scheme for avoiding partial
1103 stalls was more effective. */
1104 ~m_PPRO,
1106 /* X86_TUNE_PROMOTE_QI_REGS */
1109 /* X86_TUNE_PROMOTE_HI_REGS */
1110 m_PPRO,
1112 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1113 m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1115 /* X86_TUNE_ADD_ESP_8 */
1116 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386
1117 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1119 /* X86_TUNE_SUB_ESP_4 */
1120 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1122 /* X86_TUNE_SUB_ESP_8 */
1123 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486
1124 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1126 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1127 for DFmode copies */
1128 ~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1129 | m_GENERIC | m_GEODE),
1131 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1132 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1134 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1135 conflict here in between PPro/Pentium4 based chips that thread 128bit
1136 SSE registers as single units versus K8 based chips that divide SSE
1137 registers to two 64bit halves. This knob promotes all store destinations
1138 to be 128bit to allow register renaming on 128bit SSE units, but usually
1139 results in one extra microop on 64bit SSE units. Experimental results
1140 shows that disabling this option on P4 brings over 20% SPECfp regression,
1141 while enabling it on K8 brings roughly 2.4% regression that can be partly
1142 masked by careful scheduling of moves. */
1143 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1145 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1146 m_AMDFAM10,
1148 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1149 are resolved on SSE register parts instead of whole registers, so we may
1150 maintain just lower part of scalar values in proper format leaving the
1151 upper part undefined. */
1152 m_ATHLON_K8,
1154 /* X86_TUNE_SSE_TYPELESS_STORES */
1155 m_ATHLON_K8_AMDFAM10,
1157 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1158 m_PPRO | m_PENT4 | m_NOCONA,
1160 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1161 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1163 /* X86_TUNE_PROLOGUE_USING_MOVE */
1164 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1166 /* X86_TUNE_EPILOGUE_USING_MOVE */
1167 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1169 /* X86_TUNE_SHIFT1 */
1170 ~m_486,
1172 /* X86_TUNE_USE_FFREEP */
1173 m_ATHLON_K8_AMDFAM10,
1175 /* X86_TUNE_INTER_UNIT_MOVES */
1176 ~(m_ATHLON_K8_AMDFAM10 | m_GENERIC),
1178 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1179 than 4 branch instructions in the 16 byte window. */
1180 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1182 /* X86_TUNE_SCHEDULE */
1183 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1185 /* X86_TUNE_USE_BT */
1186 m_ATHLON_K8_AMDFAM10,
1188 /* X86_TUNE_USE_INCDEC */
1189 ~(m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC),
1191 /* X86_TUNE_PAD_RETURNS */
1192 m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC,
1194 /* X86_TUNE_EXT_80387_CONSTANTS */
1195 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1197 /* X86_TUNE_SHORTEN_X87_SSE */
1198 ~m_K8,
1200 /* X86_TUNE_AVOID_VECTOR_DECODE */
1201 m_K8 | m_GENERIC64,
1203 /* X86_TUNE_SLOW_IMUL_IMM32_MEM (imul of 32-bit constant and memory is vector
1204 path on AMD machines) */
1205 m_K8 | m_GENERIC64 | m_AMDFAM10,
1207 /* X86_TUNE_SLOW_IMUL_IMM8 (imul of 8-bit constant is vector path on AMD
1208 machines) */
1209 m_K8 | m_GENERIC64 | m_AMDFAM10,
1211 /* X86_TUNE_MOVE_M1_VIA_OR (on pentiums, it is faster to load -1 via OR than
1212 a MOV) */
1213 m_PENT,
1215 /* X86_TUNE_NOT_UNPAIRABLE (NOT is not pairable on Pentium, while XOR is, but
1216 one byte longer). */
1217 m_PENT,
1219 /* X86_TUNE_NOT_VECTORMODE (On AMD K6, NOT is vector decoded with memory
1220 operand that cannot be represented using a modRM byte. The XOR
1221 replacement is long decoded, so this split helps here as well). */
1222 m_K6,
1225 /* Feature tests against the various architecture variations. */
1226 unsigned int ix86_arch_features[X86_ARCH_LAST] = {
1227 /* X86_ARCH_CMOVE */
1228 m_PPRO | m_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA,
1230 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1231 ~m_386,
1233 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1234 ~(m_386 | m_486),
1236 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1237 ~m_386,
1239 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1240 ~m_386,
1243 static const unsigned int x86_accumulate_outgoing_args
1244 = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1246 static const unsigned int x86_arch_always_fancy_math_387
1247 = m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4
1248 | m_NOCONA | m_CORE2 | m_GENERIC;
1250 static enum stringop_alg stringop_alg = no_stringop;
1252 /* In case the average insn count for single function invocation is
1253 lower than this constant, emit fast (but longer) prologue and
1254 epilogue code. */
1255 #define FAST_PROLOGUE_INSN_COUNT 20
1257 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1258 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1259 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1260 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1262 /* Array of the smallest class containing reg number REGNO, indexed by
1263 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1265 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1267 /* ax, dx, cx, bx */
1268 AREG, DREG, CREG, BREG,
1269 /* si, di, bp, sp */
1270 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1271 /* FP registers */
1272 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1273 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1274 /* arg pointer */
1275 NON_Q_REGS,
1276 /* flags, fpsr, fpcr, frame */
1277 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1278 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1279 SSE_REGS, SSE_REGS,
1280 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1281 MMX_REGS, MMX_REGS,
1282 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1283 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1284 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1285 SSE_REGS, SSE_REGS,
1288 /* The "default" register map used in 32bit mode. */
1290 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1292 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1293 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1294 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1295 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1296 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1297 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1298 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1301 static int const x86_64_int_parameter_registers[6] =
1303 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1304 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1307 static int const x86_64_int_return_registers[4] =
1309 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
1312 /* The "default" register map used in 64bit mode. */
1313 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1315 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1316 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1317 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1318 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1319 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1320 8,9,10,11,12,13,14,15, /* extended integer registers */
1321 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1324 /* Define the register numbers to be used in Dwarf debugging information.
1325 The SVR4 reference port C compiler uses the following register numbers
1326 in its Dwarf output code:
1327 0 for %eax (gcc regno = 0)
1328 1 for %ecx (gcc regno = 2)
1329 2 for %edx (gcc regno = 1)
1330 3 for %ebx (gcc regno = 3)
1331 4 for %esp (gcc regno = 7)
1332 5 for %ebp (gcc regno = 6)
1333 6 for %esi (gcc regno = 4)
1334 7 for %edi (gcc regno = 5)
1335 The following three DWARF register numbers are never generated by
1336 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1337 believes these numbers have these meanings.
1338 8 for %eip (no gcc equivalent)
1339 9 for %eflags (gcc regno = 17)
1340 10 for %trapno (no gcc equivalent)
1341 It is not at all clear how we should number the FP stack registers
1342 for the x86 architecture. If the version of SDB on x86/svr4 were
1343 a bit less brain dead with respect to floating-point then we would
1344 have a precedent to follow with respect to DWARF register numbers
1345 for x86 FP registers, but the SDB on x86/svr4 is so completely
1346 broken with respect to FP registers that it is hardly worth thinking
1347 of it as something to strive for compatibility with.
1348 The version of x86/svr4 SDB I have at the moment does (partially)
1349 seem to believe that DWARF register number 11 is associated with
1350 the x86 register %st(0), but that's about all. Higher DWARF
1351 register numbers don't seem to be associated with anything in
1352 particular, and even for DWARF regno 11, SDB only seems to under-
1353 stand that it should say that a variable lives in %st(0) (when
1354 asked via an `=' command) if we said it was in DWARF regno 11,
1355 but SDB still prints garbage when asked for the value of the
1356 variable in question (via a `/' command).
1357 (Also note that the labels SDB prints for various FP stack regs
1358 when doing an `x' command are all wrong.)
1359 Note that these problems generally don't affect the native SVR4
1360 C compiler because it doesn't allow the use of -O with -g and
1361 because when it is *not* optimizing, it allocates a memory
1362 location for each floating-point variable, and the memory
1363 location is what gets described in the DWARF AT_location
1364 attribute for the variable in question.
1365 Regardless of the severe mental illness of the x86/svr4 SDB, we
1366 do something sensible here and we use the following DWARF
1367 register numbers. Note that these are all stack-top-relative
1368 numbers.
1369 11 for %st(0) (gcc regno = 8)
1370 12 for %st(1) (gcc regno = 9)
1371 13 for %st(2) (gcc regno = 10)
1372 14 for %st(3) (gcc regno = 11)
1373 15 for %st(4) (gcc regno = 12)
1374 16 for %st(5) (gcc regno = 13)
1375 17 for %st(6) (gcc regno = 14)
1376 18 for %st(7) (gcc regno = 15)
1378 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1380 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1381 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1382 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1383 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1384 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1385 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1386 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1389 /* Test and compare insns in i386.md store the information needed to
1390 generate branch and scc insns here. */
1392 rtx ix86_compare_op0 = NULL_RTX;
1393 rtx ix86_compare_op1 = NULL_RTX;
1394 rtx ix86_compare_emitted = NULL_RTX;
1396 /* Size of the register save area. */
1397 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1399 /* Define the structure for the machine field in struct function. */
1401 struct stack_local_entry GTY(())
1403 unsigned short mode;
1404 unsigned short n;
1405 rtx rtl;
1406 struct stack_local_entry *next;
1409 /* Structure describing stack frame layout.
1410 Stack grows downward:
1412 [arguments]
1413 <- ARG_POINTER
1414 saved pc
1416 saved frame pointer if frame_pointer_needed
1417 <- HARD_FRAME_POINTER
1418 [saved regs]
1420 [padding1] \
1422 [va_arg registers] (
1423 > to_allocate <- FRAME_POINTER
1424 [frame] (
1426 [padding2] /
1428 struct ix86_frame
1430 int nregs;
1431 int padding1;
1432 int va_arg_size;
1433 HOST_WIDE_INT frame;
1434 int padding2;
1435 int outgoing_arguments_size;
1436 int red_zone_size;
1438 HOST_WIDE_INT to_allocate;
1439 /* The offsets relative to ARG_POINTER. */
1440 HOST_WIDE_INT frame_pointer_offset;
1441 HOST_WIDE_INT hard_frame_pointer_offset;
1442 HOST_WIDE_INT stack_pointer_offset;
1444 /* When save_regs_using_mov is set, emit prologue using
1445 move instead of push instructions. */
1446 bool save_regs_using_mov;
1449 /* Code model option. */
1450 enum cmodel ix86_cmodel;
1451 /* Asm dialect. */
1452 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1453 /* TLS dialects. */
1454 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1456 /* Which unit we are generating floating point math for. */
1457 enum fpmath_unit ix86_fpmath;
1459 /* Which cpu are we scheduling for. */
1460 enum processor_type ix86_tune;
1462 /* Which instruction set architecture to use. */
1463 enum processor_type ix86_arch;
1465 /* true if sse prefetch instruction is not NOOP. */
1466 int x86_prefetch_sse;
1468 /* ix86_regparm_string as a number */
1469 static int ix86_regparm;
1471 /* -mstackrealign option */
1472 extern int ix86_force_align_arg_pointer;
1473 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1475 /* Preferred alignment for stack boundary in bits. */
1476 unsigned int ix86_preferred_stack_boundary;
1478 /* Values 1-5: see jump.c */
1479 int ix86_branch_cost;
1481 /* Variables which are this size or smaller are put in the data/bss
1482 or ldata/lbss sections. */
1484 int ix86_section_threshold = 65536;
1486 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1487 char internal_label_prefix[16];
1488 int internal_label_prefix_len;
1490 static bool ix86_handle_option (size_t, const char *, int);
1491 static void output_pic_addr_const (FILE *, rtx, int);
1492 static void put_condition_code (enum rtx_code, enum machine_mode,
1493 int, int, FILE *);
1494 static const char *get_some_local_dynamic_name (void);
1495 static int get_some_local_dynamic_name_1 (rtx *, void *);
1496 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1497 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1498 rtx *);
1499 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1500 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1501 enum machine_mode);
1502 static rtx get_thread_pointer (int);
1503 static rtx legitimize_tls_address (rtx, enum tls_model, int);
1504 static void get_pc_thunk_name (char [32], unsigned int);
1505 static rtx gen_push (rtx);
1506 static int ix86_flags_dependent (rtx, rtx, enum attr_type);
1507 static int ix86_agi_dependent (rtx, rtx, enum attr_type);
1508 static struct machine_function * ix86_init_machine_status (void);
1509 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1510 static int ix86_nsaved_regs (void);
1511 static void ix86_emit_save_regs (void);
1512 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1513 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1514 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1515 static HOST_WIDE_INT ix86_GOT_alias_set (void);
1516 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1517 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1518 static int ix86_issue_rate (void);
1519 static int ix86_adjust_cost (rtx, rtx, rtx, int);
1520 static int ia32_multipass_dfa_lookahead (void);
1521 static void ix86_init_mmx_sse_builtins (void);
1522 static rtx x86_this_parameter (tree);
1523 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1524 HOST_WIDE_INT, tree);
1525 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1526 static void x86_file_start (void);
1527 static void ix86_reorg (void);
1528 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1529 static tree ix86_build_builtin_va_list (void);
1530 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1531 tree, int *, int);
1532 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1533 static bool ix86_scalar_mode_supported_p (enum machine_mode);
1534 static bool ix86_vector_mode_supported_p (enum machine_mode);
1536 static int ix86_address_cost (rtx);
1537 static bool ix86_cannot_force_const_mem (rtx);
1538 static rtx ix86_delegitimize_address (rtx);
1540 static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1542 struct builtin_description;
1543 static rtx ix86_expand_sse_comi (const struct builtin_description *,
1544 tree, rtx);
1545 static rtx ix86_expand_sse_compare (const struct builtin_description *,
1546 tree, rtx);
1547 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1548 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1549 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1550 static rtx ix86_expand_store_builtin (enum insn_code, tree);
1551 static rtx safe_vector_operand (rtx, enum machine_mode);
1552 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1553 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1554 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1555 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1556 static int ix86_fp_comparison_cost (enum rtx_code code);
1557 static unsigned int ix86_select_alt_pic_regnum (void);
1558 static int ix86_save_reg (unsigned int, int);
1559 static void ix86_compute_frame_layout (struct ix86_frame *);
1560 static int ix86_comp_type_attributes (tree, tree);
1561 static int ix86_function_regparm (tree, tree);
1562 const struct attribute_spec ix86_attribute_table[];
1563 static bool ix86_function_ok_for_sibcall (tree, tree);
1564 static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1565 static int ix86_value_regno (enum machine_mode, tree, tree);
1566 static bool contains_128bit_aligned_vector_p (tree);
1567 static rtx ix86_struct_value_rtx (tree, int);
1568 static bool ix86_ms_bitfield_layout_p (tree);
1569 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1570 static int extended_reg_mentioned_1 (rtx *, void *);
1571 static bool ix86_rtx_costs (rtx, int, int, int *);
1572 static int min_insn_size (rtx);
1573 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1574 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1575 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1576 tree, bool);
1577 static void ix86_init_builtins (void);
1578 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1579 static tree ix86_builtin_vectorized_function (enum built_in_function, tree, tree);
1580 static tree ix86_builtin_conversion (enum tree_code, tree);
1581 static const char *ix86_mangle_fundamental_type (tree);
1582 static tree ix86_stack_protect_fail (void);
1583 static rtx ix86_internal_arg_pointer (void);
1584 static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1585 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1586 rtx, rtx, int);
1588 /* This function is only used on Solaris. */
1589 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1590 ATTRIBUTE_UNUSED;
1592 /* Register class used for passing given 64bit part of the argument.
1593 These represent classes as documented by the PS ABI, with the exception
1594 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1595 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1597 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1598 whenever possible (upper half does contain padding).
1600 enum x86_64_reg_class
1602 X86_64_NO_CLASS,
1603 X86_64_INTEGER_CLASS,
1604 X86_64_INTEGERSI_CLASS,
1605 X86_64_SSE_CLASS,
1606 X86_64_SSESF_CLASS,
1607 X86_64_SSEDF_CLASS,
1608 X86_64_SSEUP_CLASS,
1609 X86_64_X87_CLASS,
1610 X86_64_X87UP_CLASS,
1611 X86_64_COMPLEX_X87_CLASS,
1612 X86_64_MEMORY_CLASS
1614 static const char * const x86_64_reg_class_name[] = {
1615 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1616 "sseup", "x87", "x87up", "cplx87", "no"
1619 #define MAX_CLASSES 4
1621 /* Table of constants used by fldpi, fldln2, etc.... */
1622 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1623 static bool ext_80387_constants_init = 0;
1624 static void init_ext_80387_constants (void);
1625 static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1626 static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1627 static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1628 static section *x86_64_elf_select_section (tree decl, int reloc,
1629 unsigned HOST_WIDE_INT align)
1630 ATTRIBUTE_UNUSED;
1632 /* Initialize the GCC target structure. */
1633 #undef TARGET_ATTRIBUTE_TABLE
1634 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1635 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1636 # undef TARGET_MERGE_DECL_ATTRIBUTES
1637 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1638 #endif
1640 #undef TARGET_COMP_TYPE_ATTRIBUTES
1641 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1643 #undef TARGET_INIT_BUILTINS
1644 #define TARGET_INIT_BUILTINS ix86_init_builtins
1645 #undef TARGET_EXPAND_BUILTIN
1646 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1648 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1649 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION ix86_builtin_vectorized_function
1650 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
1651 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_builtin_conversion
1653 #undef TARGET_ASM_FUNCTION_EPILOGUE
1654 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1656 #undef TARGET_ENCODE_SECTION_INFO
1657 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1658 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1659 #else
1660 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1661 #endif
1663 #undef TARGET_ASM_OPEN_PAREN
1664 #define TARGET_ASM_OPEN_PAREN ""
1665 #undef TARGET_ASM_CLOSE_PAREN
1666 #define TARGET_ASM_CLOSE_PAREN ""
1668 #undef TARGET_ASM_ALIGNED_HI_OP
1669 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1670 #undef TARGET_ASM_ALIGNED_SI_OP
1671 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1672 #ifdef ASM_QUAD
1673 #undef TARGET_ASM_ALIGNED_DI_OP
1674 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1675 #endif
1677 #undef TARGET_ASM_UNALIGNED_HI_OP
1678 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1679 #undef TARGET_ASM_UNALIGNED_SI_OP
1680 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1681 #undef TARGET_ASM_UNALIGNED_DI_OP
1682 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1684 #undef TARGET_SCHED_ADJUST_COST
1685 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1686 #undef TARGET_SCHED_ISSUE_RATE
1687 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1688 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1689 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1690 ia32_multipass_dfa_lookahead
1692 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1693 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1695 #ifdef HAVE_AS_TLS
1696 #undef TARGET_HAVE_TLS
1697 #define TARGET_HAVE_TLS true
1698 #endif
1699 #undef TARGET_CANNOT_FORCE_CONST_MEM
1700 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1701 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1702 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1704 #undef TARGET_DELEGITIMIZE_ADDRESS
1705 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1707 #undef TARGET_MS_BITFIELD_LAYOUT_P
1708 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1710 #if TARGET_MACHO
1711 #undef TARGET_BINDS_LOCAL_P
1712 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1713 #endif
1715 #undef TARGET_ASM_OUTPUT_MI_THUNK
1716 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1717 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1718 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1720 #undef TARGET_ASM_FILE_START
1721 #define TARGET_ASM_FILE_START x86_file_start
1723 #undef TARGET_DEFAULT_TARGET_FLAGS
1724 #define TARGET_DEFAULT_TARGET_FLAGS \
1725 (TARGET_DEFAULT \
1726 | TARGET_64BIT_DEFAULT \
1727 | TARGET_SUBTARGET_DEFAULT \
1728 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1730 #undef TARGET_HANDLE_OPTION
1731 #define TARGET_HANDLE_OPTION ix86_handle_option
1733 #undef TARGET_RTX_COSTS
1734 #define TARGET_RTX_COSTS ix86_rtx_costs
1735 #undef TARGET_ADDRESS_COST
1736 #define TARGET_ADDRESS_COST ix86_address_cost
1738 #undef TARGET_FIXED_CONDITION_CODE_REGS
1739 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1740 #undef TARGET_CC_MODES_COMPATIBLE
1741 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1743 #undef TARGET_MACHINE_DEPENDENT_REORG
1744 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1746 #undef TARGET_BUILD_BUILTIN_VA_LIST
1747 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1749 #undef TARGET_MD_ASM_CLOBBERS
1750 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1752 #undef TARGET_PROMOTE_PROTOTYPES
1753 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1754 #undef TARGET_STRUCT_VALUE_RTX
1755 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1756 #undef TARGET_SETUP_INCOMING_VARARGS
1757 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1758 #undef TARGET_MUST_PASS_IN_STACK
1759 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1760 #undef TARGET_PASS_BY_REFERENCE
1761 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1762 #undef TARGET_INTERNAL_ARG_POINTER
1763 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1764 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1765 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1767 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1768 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1770 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1771 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1773 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1774 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1776 #ifdef HAVE_AS_TLS
1777 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1778 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1779 #endif
1781 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1782 #undef TARGET_INSERT_ATTRIBUTES
1783 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1784 #endif
1786 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1787 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1789 #undef TARGET_STACK_PROTECT_FAIL
1790 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1792 #undef TARGET_FUNCTION_VALUE
1793 #define TARGET_FUNCTION_VALUE ix86_function_value
1795 struct gcc_target targetm = TARGET_INITIALIZER;
1798 /* The svr4 ABI for the i386 says that records and unions are returned
1799 in memory. */
1800 #ifndef DEFAULT_PCC_STRUCT_RETURN
1801 #define DEFAULT_PCC_STRUCT_RETURN 1
1802 #endif
1804 /* Implement TARGET_HANDLE_OPTION. */
1806 static bool
1807 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1809 switch (code)
1811 case OPT_m3dnow:
1812 if (!value)
1814 target_flags &= ~MASK_3DNOW_A;
1815 target_flags_explicit |= MASK_3DNOW_A;
1817 return true;
1819 case OPT_mmmx:
1820 if (!value)
1822 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1823 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1825 return true;
1827 case OPT_msse:
1828 if (!value)
1830 target_flags &= ~(MASK_SSE2 | MASK_SSE3 | MASK_SSE4A);
1831 target_flags_explicit |= MASK_SSE2 | MASK_SSE3 | MASK_SSE4A;
1833 return true;
1835 case OPT_msse2:
1836 if (!value)
1838 target_flags &= ~(MASK_SSE3 | MASK_SSE4A);
1839 target_flags_explicit |= MASK_SSE3 | MASK_SSE4A;
1841 return true;
1843 case OPT_msse3:
1844 if (!value)
1846 target_flags &= ~MASK_SSE4A;
1847 target_flags_explicit |= MASK_SSE4A;
1849 return true;
1851 default:
1852 return true;
1856 /* Sometimes certain combinations of command options do not make
1857 sense on a particular target machine. You can define a macro
1858 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1859 defined, is executed once just after all the command options have
1860 been parsed.
1862 Don't use this macro to turn on various extra optimizations for
1863 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1865 void
1866 override_options (void)
1868 int i;
1869 int ix86_tune_defaulted = 0;
1870 unsigned int ix86_arch_mask, ix86_tune_mask;
1872 /* Comes from final.c -- no real reason to change it. */
1873 #define MAX_CODE_ALIGN 16
1875 static struct ptt
1877 const struct processor_costs *cost; /* Processor costs */
1878 const int target_enable; /* Target flags to enable. */
1879 const int target_disable; /* Target flags to disable. */
1880 const int align_loop; /* Default alignments. */
1881 const int align_loop_max_skip;
1882 const int align_jump;
1883 const int align_jump_max_skip;
1884 const int align_func;
1886 const processor_target_table[PROCESSOR_max] =
1888 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1889 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1890 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1891 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1892 {&geode_cost, 0, 0, 0, 0, 0, 0, 0},
1893 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1894 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1895 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1896 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1897 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1898 {&core2_cost, 0, 0, 16, 7, 16, 7, 16},
1899 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1900 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16},
1901 {&amdfam10_cost, 0, 0, 32, 7, 32, 7, 32}
1904 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1905 static struct pta
1907 const char *const name; /* processor name or nickname. */
1908 const enum processor_type processor;
1909 const enum pta_flags
1911 PTA_SSE = 1 << 0,
1912 PTA_SSE2 = 1 << 1,
1913 PTA_SSE3 = 1 << 2,
1914 PTA_MMX = 1 << 3,
1915 PTA_PREFETCH_SSE = 1 << 4,
1916 PTA_3DNOW = 1 << 5,
1917 PTA_3DNOW_A = 1 << 6,
1918 PTA_64BIT = 1 << 7,
1919 PTA_SSSE3 = 1 << 8,
1920 PTA_CX16 = 1 << 9,
1921 PTA_POPCNT = 1 << 10,
1922 PTA_ABM = 1 << 11,
1923 PTA_SSE4A = 1 << 12,
1924 PTA_NO_SAHF = 1 << 13
1925 } flags;
1927 const processor_alias_table[] =
1929 {"i386", PROCESSOR_I386, 0},
1930 {"i486", PROCESSOR_I486, 0},
1931 {"i586", PROCESSOR_PENTIUM, 0},
1932 {"pentium", PROCESSOR_PENTIUM, 0},
1933 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1934 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1935 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1936 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1937 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1938 {"i686", PROCESSOR_PENTIUMPRO, 0},
1939 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1940 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1941 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1942 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1943 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1944 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1945 | PTA_MMX | PTA_PREFETCH_SSE},
1946 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1947 | PTA_MMX | PTA_PREFETCH_SSE},
1948 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1949 | PTA_MMX | PTA_PREFETCH_SSE},
1950 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1951 | PTA_MMX | PTA_PREFETCH_SSE
1952 | PTA_CX16 | PTA_NO_SAHF},
1953 {"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3
1954 | PTA_64BIT | PTA_MMX
1955 | PTA_PREFETCH_SSE | PTA_CX16},
1956 {"geode", PROCESSOR_GEODE, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1957 | PTA_3DNOW_A},
1958 {"k6", PROCESSOR_K6, PTA_MMX},
1959 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1960 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1961 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1962 | PTA_3DNOW_A},
1963 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1964 | PTA_3DNOW | PTA_3DNOW_A},
1965 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1966 | PTA_3DNOW_A | PTA_SSE},
1967 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1968 | PTA_3DNOW_A | PTA_SSE},
1969 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1970 | PTA_3DNOW_A | PTA_SSE},
1971 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1972 | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
1973 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1974 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1975 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1976 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1977 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1978 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1979 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1980 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1981 {"amdfam10", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1982 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1983 | PTA_SSE2 | PTA_SSE3 | PTA_POPCNT
1984 | PTA_ABM | PTA_SSE4A | PTA_CX16},
1985 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1986 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1989 int const pta_size = ARRAY_SIZE (processor_alias_table);
1991 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1992 SUBTARGET_OVERRIDE_OPTIONS;
1993 #endif
1995 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1996 SUBSUBTARGET_OVERRIDE_OPTIONS;
1997 #endif
1999 /* -fPIC is the default for x86_64. */
2000 if (TARGET_MACHO && TARGET_64BIT)
2001 flag_pic = 2;
2003 /* Set the default values for switches whose default depends on TARGET_64BIT
2004 in case they weren't overwritten by command line options. */
2005 if (TARGET_64BIT)
2007 /* Mach-O doesn't support omitting the frame pointer for now. */
2008 if (flag_omit_frame_pointer == 2)
2009 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2010 if (flag_asynchronous_unwind_tables == 2)
2011 flag_asynchronous_unwind_tables = 1;
2012 if (flag_pcc_struct_return == 2)
2013 flag_pcc_struct_return = 0;
2015 else
2017 if (flag_omit_frame_pointer == 2)
2018 flag_omit_frame_pointer = 0;
2019 if (flag_asynchronous_unwind_tables == 2)
2020 flag_asynchronous_unwind_tables = 0;
2021 if (flag_pcc_struct_return == 2)
2022 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2025 /* Need to check -mtune=generic first. */
2026 if (ix86_tune_string)
2028 if (!strcmp (ix86_tune_string, "generic")
2029 || !strcmp (ix86_tune_string, "i686")
2030 /* As special support for cross compilers we read -mtune=native
2031 as -mtune=generic. With native compilers we won't see the
2032 -mtune=native, as it was changed by the driver. */
2033 || !strcmp (ix86_tune_string, "native"))
2035 if (TARGET_64BIT)
2036 ix86_tune_string = "generic64";
2037 else
2038 ix86_tune_string = "generic32";
2040 else if (!strncmp (ix86_tune_string, "generic", 7))
2041 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2043 else
2045 if (ix86_arch_string)
2046 ix86_tune_string = ix86_arch_string;
2047 if (!ix86_tune_string)
2049 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
2050 ix86_tune_defaulted = 1;
2053 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2054 need to use a sensible tune option. */
2055 if (!strcmp (ix86_tune_string, "generic")
2056 || !strcmp (ix86_tune_string, "x86-64")
2057 || !strcmp (ix86_tune_string, "i686"))
2059 if (TARGET_64BIT)
2060 ix86_tune_string = "generic64";
2061 else
2062 ix86_tune_string = "generic32";
2065 if (ix86_stringop_string)
2067 if (!strcmp (ix86_stringop_string, "rep_byte"))
2068 stringop_alg = rep_prefix_1_byte;
2069 else if (!strcmp (ix86_stringop_string, "libcall"))
2070 stringop_alg = libcall;
2071 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2072 stringop_alg = rep_prefix_4_byte;
2073 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
2074 stringop_alg = rep_prefix_8_byte;
2075 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2076 stringop_alg = loop_1_byte;
2077 else if (!strcmp (ix86_stringop_string, "loop"))
2078 stringop_alg = loop;
2079 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2080 stringop_alg = unrolled_loop;
2081 else
2082 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
2084 if (!strcmp (ix86_tune_string, "x86-64"))
2085 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
2086 "-mtune=generic instead as appropriate.");
2088 if (!ix86_arch_string)
2089 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2090 if (!strcmp (ix86_arch_string, "generic"))
2091 error ("generic CPU can be used only for -mtune= switch");
2092 if (!strncmp (ix86_arch_string, "generic", 7))
2093 error ("bad value (%s) for -march= switch", ix86_arch_string);
2095 if (ix86_cmodel_string != 0)
2097 if (!strcmp (ix86_cmodel_string, "small"))
2098 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2099 else if (!strcmp (ix86_cmodel_string, "medium"))
2100 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2101 else if (!strcmp (ix86_cmodel_string, "large"))
2102 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2103 else if (flag_pic)
2104 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2105 else if (!strcmp (ix86_cmodel_string, "32"))
2106 ix86_cmodel = CM_32;
2107 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2108 ix86_cmodel = CM_KERNEL;
2109 else
2110 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
2112 else
2114 ix86_cmodel = CM_32;
2115 if (TARGET_64BIT)
2116 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2118 if (ix86_asm_string != 0)
2120 if (! TARGET_MACHO
2121 && !strcmp (ix86_asm_string, "intel"))
2122 ix86_asm_dialect = ASM_INTEL;
2123 else if (!strcmp (ix86_asm_string, "att"))
2124 ix86_asm_dialect = ASM_ATT;
2125 else
2126 error ("bad value (%s) for -masm= switch", ix86_asm_string);
2128 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2129 error ("code model %qs not supported in the %s bit mode",
2130 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2131 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
2132 sorry ("%i-bit mode not compiled in",
2133 (target_flags & MASK_64BIT) ? 64 : 32);
2135 for (i = 0; i < pta_size; i++)
2136 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2138 ix86_arch = processor_alias_table[i].processor;
2139 /* Default cpu tuning to the architecture. */
2140 ix86_tune = ix86_arch;
2141 if (processor_alias_table[i].flags & PTA_MMX
2142 && !(target_flags_explicit & MASK_MMX))
2143 target_flags |= MASK_MMX;
2144 if (processor_alias_table[i].flags & PTA_3DNOW
2145 && !(target_flags_explicit & MASK_3DNOW))
2146 target_flags |= MASK_3DNOW;
2147 if (processor_alias_table[i].flags & PTA_3DNOW_A
2148 && !(target_flags_explicit & MASK_3DNOW_A))
2149 target_flags |= MASK_3DNOW_A;
2150 if (processor_alias_table[i].flags & PTA_SSE
2151 && !(target_flags_explicit & MASK_SSE))
2152 target_flags |= MASK_SSE;
2153 if (processor_alias_table[i].flags & PTA_SSE2
2154 && !(target_flags_explicit & MASK_SSE2))
2155 target_flags |= MASK_SSE2;
2156 if (processor_alias_table[i].flags & PTA_SSE3
2157 && !(target_flags_explicit & MASK_SSE3))
2158 target_flags |= MASK_SSE3;
2159 if (processor_alias_table[i].flags & PTA_SSSE3
2160 && !(target_flags_explicit & MASK_SSSE3))
2161 target_flags |= MASK_SSSE3;
2162 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
2163 x86_prefetch_sse = true;
2164 if (processor_alias_table[i].flags & PTA_CX16)
2165 x86_cmpxchg16b = true;
2166 if (processor_alias_table[i].flags & PTA_POPCNT
2167 && !(target_flags_explicit & MASK_POPCNT))
2168 target_flags |= MASK_POPCNT;
2169 if (processor_alias_table[i].flags & PTA_ABM
2170 && !(target_flags_explicit & MASK_ABM))
2171 target_flags |= MASK_ABM;
2172 if (processor_alias_table[i].flags & PTA_SSE4A
2173 && !(target_flags_explicit & MASK_SSE4A))
2174 target_flags |= MASK_SSE4A;
2175 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF)))
2176 x86_sahf = true;
2177 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2178 error ("CPU you selected does not support x86-64 "
2179 "instruction set");
2180 break;
2183 if (i == pta_size)
2184 error ("bad value (%s) for -march= switch", ix86_arch_string);
2186 ix86_arch_mask = 1u << ix86_arch;
2187 for (i = 0; i < X86_ARCH_LAST; ++i)
2188 ix86_arch_features[i] &= ix86_arch_mask;
2190 for (i = 0; i < pta_size; i++)
2191 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2193 ix86_tune = processor_alias_table[i].processor;
2194 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2196 if (ix86_tune_defaulted)
2198 ix86_tune_string = "x86-64";
2199 for (i = 0; i < pta_size; i++)
2200 if (! strcmp (ix86_tune_string,
2201 processor_alias_table[i].name))
2202 break;
2203 ix86_tune = processor_alias_table[i].processor;
2205 else
2206 error ("CPU you selected does not support x86-64 "
2207 "instruction set");
2209 /* Intel CPUs have always interpreted SSE prefetch instructions as
2210 NOPs; so, we can enable SSE prefetch instructions even when
2211 -mtune (rather than -march) points us to a processor that has them.
2212 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2213 higher processors. */
2214 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
2215 x86_prefetch_sse = true;
2216 break;
2218 if (i == pta_size)
2219 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2221 ix86_tune_mask = 1u << ix86_tune;
2222 for (i = 0; i < X86_TUNE_LAST; ++i)
2223 ix86_tune_features[i] &= ix86_tune_mask;
2225 if (optimize_size)
2226 ix86_cost = &size_cost;
2227 else
2228 ix86_cost = processor_target_table[ix86_tune].cost;
2229 target_flags |= processor_target_table[ix86_tune].target_enable;
2230 target_flags &= ~processor_target_table[ix86_tune].target_disable;
2232 /* Arrange to set up i386_stack_locals for all functions. */
2233 init_machine_status = ix86_init_machine_status;
2235 /* Validate -mregparm= value. */
2236 if (ix86_regparm_string)
2238 i = atoi (ix86_regparm_string);
2239 if (i < 0 || i > REGPARM_MAX)
2240 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2241 else
2242 ix86_regparm = i;
2244 else
2245 if (TARGET_64BIT)
2246 ix86_regparm = REGPARM_MAX;
2248 /* If the user has provided any of the -malign-* options,
2249 warn and use that value only if -falign-* is not set.
2250 Remove this code in GCC 3.2 or later. */
2251 if (ix86_align_loops_string)
2253 warning (0, "-malign-loops is obsolete, use -falign-loops");
2254 if (align_loops == 0)
2256 i = atoi (ix86_align_loops_string);
2257 if (i < 0 || i > MAX_CODE_ALIGN)
2258 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2259 else
2260 align_loops = 1 << i;
2264 if (ix86_align_jumps_string)
2266 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2267 if (align_jumps == 0)
2269 i = atoi (ix86_align_jumps_string);
2270 if (i < 0 || i > MAX_CODE_ALIGN)
2271 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2272 else
2273 align_jumps = 1 << i;
2277 if (ix86_align_funcs_string)
2279 warning (0, "-malign-functions is obsolete, use -falign-functions");
2280 if (align_functions == 0)
2282 i = atoi (ix86_align_funcs_string);
2283 if (i < 0 || i > MAX_CODE_ALIGN)
2284 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2285 else
2286 align_functions = 1 << i;
2290 /* Default align_* from the processor table. */
2291 if (align_loops == 0)
2293 align_loops = processor_target_table[ix86_tune].align_loop;
2294 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2296 if (align_jumps == 0)
2298 align_jumps = processor_target_table[ix86_tune].align_jump;
2299 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2301 if (align_functions == 0)
2303 align_functions = processor_target_table[ix86_tune].align_func;
2306 /* Validate -mbranch-cost= value, or provide default. */
2307 ix86_branch_cost = ix86_cost->branch_cost;
2308 if (ix86_branch_cost_string)
2310 i = atoi (ix86_branch_cost_string);
2311 if (i < 0 || i > 5)
2312 error ("-mbranch-cost=%d is not between 0 and 5", i);
2313 else
2314 ix86_branch_cost = i;
2316 if (ix86_section_threshold_string)
2318 i = atoi (ix86_section_threshold_string);
2319 if (i < 0)
2320 error ("-mlarge-data-threshold=%d is negative", i);
2321 else
2322 ix86_section_threshold = i;
2325 if (ix86_tls_dialect_string)
2327 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2328 ix86_tls_dialect = TLS_DIALECT_GNU;
2329 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2330 ix86_tls_dialect = TLS_DIALECT_GNU2;
2331 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2332 ix86_tls_dialect = TLS_DIALECT_SUN;
2333 else
2334 error ("bad value (%s) for -mtls-dialect= switch",
2335 ix86_tls_dialect_string);
2338 /* Keep nonleaf frame pointers. */
2339 if (flag_omit_frame_pointer)
2340 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2341 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2342 flag_omit_frame_pointer = 1;
2344 /* If we're doing fast math, we don't care about comparison order
2345 wrt NaNs. This lets us use a shorter comparison sequence. */
2346 if (flag_finite_math_only)
2347 target_flags &= ~MASK_IEEE_FP;
2349 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2350 since the insns won't need emulation. */
2351 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
2352 target_flags &= ~MASK_NO_FANCY_MATH_387;
2354 /* Likewise, if the target doesn't have a 387, or we've specified
2355 software floating point, don't use 387 inline intrinsics. */
2356 if (!TARGET_80387)
2357 target_flags |= MASK_NO_FANCY_MATH_387;
2359 /* Turn on SSE3 builtins for -mssse3. */
2360 if (TARGET_SSSE3)
2361 target_flags |= MASK_SSE3;
2363 /* Turn on SSE3 builtins for -msse4a. */
2364 if (TARGET_SSE4A)
2365 target_flags |= MASK_SSE3;
2367 /* Turn on SSE2 builtins for -msse3. */
2368 if (TARGET_SSE3)
2369 target_flags |= MASK_SSE2;
2371 /* Turn on SSE builtins for -msse2. */
2372 if (TARGET_SSE2)
2373 target_flags |= MASK_SSE;
2375 /* Turn on MMX builtins for -msse. */
2376 if (TARGET_SSE)
2378 target_flags |= MASK_MMX & ~target_flags_explicit;
2379 x86_prefetch_sse = true;
2382 /* Turn on MMX builtins for 3Dnow. */
2383 if (TARGET_3DNOW)
2384 target_flags |= MASK_MMX;
2386 /* Turn on POPCNT builtins for -mabm. */
2387 if (TARGET_ABM)
2388 target_flags |= MASK_POPCNT;
2390 if (TARGET_64BIT)
2392 if (TARGET_ALIGN_DOUBLE)
2393 error ("-malign-double makes no sense in the 64bit mode");
2394 if (TARGET_RTD)
2395 error ("-mrtd calling convention not supported in the 64bit mode");
2397 /* Enable by default the SSE and MMX builtins. Do allow the user to
2398 explicitly disable any of these. In particular, disabling SSE and
2399 MMX for kernel code is extremely useful. */
2400 target_flags
2401 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
2402 & ~target_flags_explicit);
2404 else
2406 /* i386 ABI does not specify red zone. It still makes sense to use it
2407 when programmer takes care to stack from being destroyed. */
2408 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2409 target_flags |= MASK_NO_RED_ZONE;
2412 /* Validate -mpreferred-stack-boundary= value, or provide default.
2413 The default of 128 bits is for Pentium III's SSE __m128. We can't
2414 change it because of optimize_size. Otherwise, we can't mix object
2415 files compiled with -Os and -On. */
2416 ix86_preferred_stack_boundary = 128;
2417 if (ix86_preferred_stack_boundary_string)
2419 i = atoi (ix86_preferred_stack_boundary_string);
2420 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2421 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2422 TARGET_64BIT ? 4 : 2);
2423 else
2424 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2427 /* Accept -msseregparm only if at least SSE support is enabled. */
2428 if (TARGET_SSEREGPARM
2429 && ! TARGET_SSE)
2430 error ("-msseregparm used without SSE enabled");
2432 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2433 if (ix86_fpmath_string != 0)
2435 if (! strcmp (ix86_fpmath_string, "387"))
2436 ix86_fpmath = FPMATH_387;
2437 else if (! strcmp (ix86_fpmath_string, "sse"))
2439 if (!TARGET_SSE)
2441 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2442 ix86_fpmath = FPMATH_387;
2444 else
2445 ix86_fpmath = FPMATH_SSE;
2447 else if (! strcmp (ix86_fpmath_string, "387,sse")
2448 || ! strcmp (ix86_fpmath_string, "sse,387"))
2450 if (!TARGET_SSE)
2452 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2453 ix86_fpmath = FPMATH_387;
2455 else if (!TARGET_80387)
2457 warning (0, "387 instruction set disabled, using SSE arithmetics");
2458 ix86_fpmath = FPMATH_SSE;
2460 else
2461 ix86_fpmath = FPMATH_SSE | FPMATH_387;
2463 else
2464 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2467 /* If the i387 is disabled, then do not return values in it. */
2468 if (!TARGET_80387)
2469 target_flags &= ~MASK_FLOAT_RETURNS;
2471 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
2472 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2473 && !optimize_size)
2474 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2476 /* ??? Unwind info is not correct around the CFG unless either a frame
2477 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2478 unwind info generation to be aware of the CFG and propagating states
2479 around edges. */
2480 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2481 || flag_exceptions || flag_non_call_exceptions)
2482 && flag_omit_frame_pointer
2483 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2485 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2486 warning (0, "unwind tables currently require either a frame pointer "
2487 "or -maccumulate-outgoing-args for correctness");
2488 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2491 /* For sane SSE instruction set generation we need fcomi instruction.
2492 It is safe to enable all CMOVE instructions. */
2493 if (TARGET_SSE)
2494 TARGET_CMOVE = 1;
2496 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2498 char *p;
2499 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2500 p = strchr (internal_label_prefix, 'X');
2501 internal_label_prefix_len = p - internal_label_prefix;
2502 *p = '\0';
2505 /* When scheduling description is not available, disable scheduler pass
2506 so it won't slow down the compilation and make x87 code slower. */
2507 if (!TARGET_SCHEDULE)
2508 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2510 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2511 set_param_value ("simultaneous-prefetches",
2512 ix86_cost->simultaneous_prefetches);
2513 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2514 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2517 /* switch to the appropriate section for output of DECL.
2518 DECL is either a `VAR_DECL' node or a constant of some sort.
2519 RELOC indicates whether forming the initial value of DECL requires
2520 link-time relocations. */
2522 static section *
2523 x86_64_elf_select_section (tree decl, int reloc,
2524 unsigned HOST_WIDE_INT align)
2526 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2527 && ix86_in_large_data_p (decl))
2529 const char *sname = NULL;
2530 unsigned int flags = SECTION_WRITE;
2531 switch (categorize_decl_for_section (decl, reloc))
2533 case SECCAT_DATA:
2534 sname = ".ldata";
2535 break;
2536 case SECCAT_DATA_REL:
2537 sname = ".ldata.rel";
2538 break;
2539 case SECCAT_DATA_REL_LOCAL:
2540 sname = ".ldata.rel.local";
2541 break;
2542 case SECCAT_DATA_REL_RO:
2543 sname = ".ldata.rel.ro";
2544 break;
2545 case SECCAT_DATA_REL_RO_LOCAL:
2546 sname = ".ldata.rel.ro.local";
2547 break;
2548 case SECCAT_BSS:
2549 sname = ".lbss";
2550 flags |= SECTION_BSS;
2551 break;
2552 case SECCAT_RODATA:
2553 case SECCAT_RODATA_MERGE_STR:
2554 case SECCAT_RODATA_MERGE_STR_INIT:
2555 case SECCAT_RODATA_MERGE_CONST:
2556 sname = ".lrodata";
2557 flags = 0;
2558 break;
2559 case SECCAT_SRODATA:
2560 case SECCAT_SDATA:
2561 case SECCAT_SBSS:
2562 gcc_unreachable ();
2563 case SECCAT_TEXT:
2564 case SECCAT_TDATA:
2565 case SECCAT_TBSS:
2566 /* We don't split these for medium model. Place them into
2567 default sections and hope for best. */
2568 break;
2570 if (sname)
2572 /* We might get called with string constants, but get_named_section
2573 doesn't like them as they are not DECLs. Also, we need to set
2574 flags in that case. */
2575 if (!DECL_P (decl))
2576 return get_section (sname, flags, NULL);
2577 return get_named_section (decl, sname, reloc);
2580 return default_elf_select_section (decl, reloc, align);
2583 /* Build up a unique section name, expressed as a
2584 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2585 RELOC indicates whether the initial value of EXP requires
2586 link-time relocations. */
2588 static void
2589 x86_64_elf_unique_section (tree decl, int reloc)
2591 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2592 && ix86_in_large_data_p (decl))
2594 const char *prefix = NULL;
2595 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2596 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2598 switch (categorize_decl_for_section (decl, reloc))
2600 case SECCAT_DATA:
2601 case SECCAT_DATA_REL:
2602 case SECCAT_DATA_REL_LOCAL:
2603 case SECCAT_DATA_REL_RO:
2604 case SECCAT_DATA_REL_RO_LOCAL:
2605 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2606 break;
2607 case SECCAT_BSS:
2608 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2609 break;
2610 case SECCAT_RODATA:
2611 case SECCAT_RODATA_MERGE_STR:
2612 case SECCAT_RODATA_MERGE_STR_INIT:
2613 case SECCAT_RODATA_MERGE_CONST:
2614 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2615 break;
2616 case SECCAT_SRODATA:
2617 case SECCAT_SDATA:
2618 case SECCAT_SBSS:
2619 gcc_unreachable ();
2620 case SECCAT_TEXT:
2621 case SECCAT_TDATA:
2622 case SECCAT_TBSS:
2623 /* We don't split these for medium model. Place them into
2624 default sections and hope for best. */
2625 break;
2627 if (prefix)
2629 const char *name;
2630 size_t nlen, plen;
2631 char *string;
2632 plen = strlen (prefix);
2634 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2635 name = targetm.strip_name_encoding (name);
2636 nlen = strlen (name);
2638 string = alloca (nlen + plen + 1);
2639 memcpy (string, prefix, plen);
2640 memcpy (string + plen, name, nlen + 1);
2642 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2643 return;
2646 default_unique_section (decl, reloc);
2649 #ifdef COMMON_ASM_OP
2650 /* This says how to output assembler code to declare an
2651 uninitialized external linkage data object.
2653 For medium model x86-64 we need to use .largecomm opcode for
2654 large objects. */
2655 void
2656 x86_elf_aligned_common (FILE *file,
2657 const char *name, unsigned HOST_WIDE_INT size,
2658 int align)
2660 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2661 && size > (unsigned int)ix86_section_threshold)
2662 fprintf (file, ".largecomm\t");
2663 else
2664 fprintf (file, "%s", COMMON_ASM_OP);
2665 assemble_name (file, name);
2666 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2667 size, align / BITS_PER_UNIT);
2669 #endif
2670 /* Utility function for targets to use in implementing
2671 ASM_OUTPUT_ALIGNED_BSS. */
2673 void
2674 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2675 const char *name, unsigned HOST_WIDE_INT size,
2676 int align)
2678 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2679 && size > (unsigned int)ix86_section_threshold)
2680 switch_to_section (get_named_section (decl, ".lbss", 0));
2681 else
2682 switch_to_section (bss_section);
2683 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2684 #ifdef ASM_DECLARE_OBJECT_NAME
2685 last_assemble_variable_decl = decl;
2686 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2687 #else
2688 /* Standard thing is just output label for the object. */
2689 ASM_OUTPUT_LABEL (file, name);
2690 #endif /* ASM_DECLARE_OBJECT_NAME */
2691 ASM_OUTPUT_SKIP (file, size ? size : 1);
2694 void
2695 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2697 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2698 make the problem with not enough registers even worse. */
2699 #ifdef INSN_SCHEDULING
2700 if (level > 1)
2701 flag_schedule_insns = 0;
2702 #endif
2704 if (TARGET_MACHO)
2705 /* The Darwin libraries never set errno, so we might as well
2706 avoid calling them when that's the only reason we would. */
2707 flag_errno_math = 0;
2709 /* The default values of these switches depend on the TARGET_64BIT
2710 that is not known at this moment. Mark these values with 2 and
2711 let user the to override these. In case there is no command line option
2712 specifying them, we will set the defaults in override_options. */
2713 if (optimize >= 1)
2714 flag_omit_frame_pointer = 2;
2715 flag_pcc_struct_return = 2;
2716 flag_asynchronous_unwind_tables = 2;
2717 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2718 SUBTARGET_OPTIMIZATION_OPTIONS;
2719 #endif
2722 /* Table of valid machine attributes. */
2723 const struct attribute_spec ix86_attribute_table[] =
2725 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2726 /* Stdcall attribute says callee is responsible for popping arguments
2727 if they are not variable. */
2728 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2729 /* Fastcall attribute says callee is responsible for popping arguments
2730 if they are not variable. */
2731 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2732 /* Cdecl attribute says the callee is a normal C declaration */
2733 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2734 /* Regparm attribute specifies how many integer arguments are to be
2735 passed in registers. */
2736 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
2737 /* Sseregparm attribute says we are using x86_64 calling conventions
2738 for FP arguments. */
2739 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2740 /* force_align_arg_pointer says this function realigns the stack at entry. */
2741 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
2742 false, true, true, ix86_handle_cconv_attribute },
2743 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2744 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2745 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2746 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
2747 #endif
2748 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2749 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2750 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2751 SUBTARGET_ATTRIBUTE_TABLE,
2752 #endif
2753 { NULL, 0, 0, false, false, false, NULL }
2756 /* Decide whether we can make a sibling call to a function. DECL is the
2757 declaration of the function being targeted by the call and EXP is the
2758 CALL_EXPR representing the call. */
2760 static bool
2761 ix86_function_ok_for_sibcall (tree decl, tree exp)
2763 tree func;
2764 rtx a, b;
2766 /* If we are generating position-independent code, we cannot sibcall
2767 optimize any indirect call, or a direct call to a global function,
2768 as the PLT requires %ebx be live. */
2769 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2770 return false;
2772 if (decl)
2773 func = decl;
2774 else
2776 func = TREE_TYPE (CALL_EXPR_FN (exp));
2777 if (POINTER_TYPE_P (func))
2778 func = TREE_TYPE (func);
2781 /* Check that the return value locations are the same. Like
2782 if we are returning floats on the 80387 register stack, we cannot
2783 make a sibcall from a function that doesn't return a float to a
2784 function that does or, conversely, from a function that does return
2785 a float to a function that doesn't; the necessary stack adjustment
2786 would not be executed. This is also the place we notice
2787 differences in the return value ABI. Note that it is ok for one
2788 of the functions to have void return type as long as the return
2789 value of the other is passed in a register. */
2790 a = ix86_function_value (TREE_TYPE (exp), func, false);
2791 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2792 cfun->decl, false);
2793 if (STACK_REG_P (a) || STACK_REG_P (b))
2795 if (!rtx_equal_p (a, b))
2796 return false;
2798 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2800 else if (!rtx_equal_p (a, b))
2801 return false;
2803 /* If this call is indirect, we'll need to be able to use a call-clobbered
2804 register for the address of the target function. Make sure that all
2805 such registers are not used for passing parameters. */
2806 if (!decl && !TARGET_64BIT)
2808 tree type;
2810 /* We're looking at the CALL_EXPR, we need the type of the function. */
2811 type = CALL_EXPR_FN (exp); /* pointer expression */
2812 type = TREE_TYPE (type); /* pointer type */
2813 type = TREE_TYPE (type); /* function type */
2815 if (ix86_function_regparm (type, NULL) >= 3)
2817 /* ??? Need to count the actual number of registers to be used,
2818 not the possible number of registers. Fix later. */
2819 return false;
2823 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2824 /* Dllimport'd functions are also called indirectly. */
2825 if (decl && DECL_DLLIMPORT_P (decl)
2826 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2827 return false;
2828 #endif
2830 /* If we forced aligned the stack, then sibcalling would unalign the
2831 stack, which may break the called function. */
2832 if (cfun->machine->force_align_arg_pointer)
2833 return false;
2835 /* Otherwise okay. That also includes certain types of indirect calls. */
2836 return true;
2839 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2840 calling convention attributes;
2841 arguments as in struct attribute_spec.handler. */
2843 static tree
2844 ix86_handle_cconv_attribute (tree *node, tree name,
2845 tree args,
2846 int flags ATTRIBUTE_UNUSED,
2847 bool *no_add_attrs)
2849 if (TREE_CODE (*node) != FUNCTION_TYPE
2850 && TREE_CODE (*node) != METHOD_TYPE
2851 && TREE_CODE (*node) != FIELD_DECL
2852 && TREE_CODE (*node) != TYPE_DECL)
2854 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2855 IDENTIFIER_POINTER (name));
2856 *no_add_attrs = true;
2857 return NULL_TREE;
2860 /* Can combine regparm with all attributes but fastcall. */
2861 if (is_attribute_p ("regparm", name))
2863 tree cst;
2865 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2867 error ("fastcall and regparm attributes are not compatible");
2870 cst = TREE_VALUE (args);
2871 if (TREE_CODE (cst) != INTEGER_CST)
2873 warning (OPT_Wattributes,
2874 "%qs attribute requires an integer constant argument",
2875 IDENTIFIER_POINTER (name));
2876 *no_add_attrs = true;
2878 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2880 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2881 IDENTIFIER_POINTER (name), REGPARM_MAX);
2882 *no_add_attrs = true;
2885 if (!TARGET_64BIT
2886 && lookup_attribute (ix86_force_align_arg_pointer_string,
2887 TYPE_ATTRIBUTES (*node))
2888 && compare_tree_int (cst, REGPARM_MAX-1))
2890 error ("%s functions limited to %d register parameters",
2891 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2894 return NULL_TREE;
2897 if (TARGET_64BIT)
2899 warning (OPT_Wattributes, "%qs attribute ignored",
2900 IDENTIFIER_POINTER (name));
2901 *no_add_attrs = true;
2902 return NULL_TREE;
2905 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2906 if (is_attribute_p ("fastcall", name))
2908 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2910 error ("fastcall and cdecl attributes are not compatible");
2912 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2914 error ("fastcall and stdcall attributes are not compatible");
2916 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2918 error ("fastcall and regparm attributes are not compatible");
2922 /* Can combine stdcall with fastcall (redundant), regparm and
2923 sseregparm. */
2924 else if (is_attribute_p ("stdcall", name))
2926 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2928 error ("stdcall and cdecl attributes are not compatible");
2930 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2932 error ("stdcall and fastcall attributes are not compatible");
2936 /* Can combine cdecl with regparm and sseregparm. */
2937 else if (is_attribute_p ("cdecl", name))
2939 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2941 error ("stdcall and cdecl attributes are not compatible");
2943 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2945 error ("fastcall and cdecl attributes are not compatible");
2949 /* Can combine sseregparm with all attributes. */
2951 return NULL_TREE;
2954 /* Return 0 if the attributes for two types are incompatible, 1 if they
2955 are compatible, and 2 if they are nearly compatible (which causes a
2956 warning to be generated). */
2958 static int
2959 ix86_comp_type_attributes (tree type1, tree type2)
2961 /* Check for mismatch of non-default calling convention. */
2962 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2964 if (TREE_CODE (type1) != FUNCTION_TYPE)
2965 return 1;
2967 /* Check for mismatched fastcall/regparm types. */
2968 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2969 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2970 || (ix86_function_regparm (type1, NULL)
2971 != ix86_function_regparm (type2, NULL)))
2972 return 0;
2974 /* Check for mismatched sseregparm types. */
2975 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2976 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2977 return 0;
2979 /* Check for mismatched return types (cdecl vs stdcall). */
2980 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2981 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2982 return 0;
2984 return 1;
2987 /* Return the regparm value for a function with the indicated TYPE and DECL.
2988 DECL may be NULL when calling function indirectly
2989 or considering a libcall. */
2991 static int
2992 ix86_function_regparm (tree type, tree decl)
2994 tree attr;
2995 int regparm = ix86_regparm;
2996 bool user_convention = false;
2998 if (!TARGET_64BIT)
3000 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
3001 if (attr)
3003 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
3004 user_convention = true;
3007 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
3009 regparm = 2;
3010 user_convention = true;
3013 /* Use register calling convention for local functions when possible. */
3014 if (!TARGET_64BIT && !user_convention && decl
3015 && flag_unit_at_a_time && !profile_flag)
3017 struct cgraph_local_info *i = cgraph_local_info (decl);
3018 if (i && i->local)
3020 int local_regparm, globals = 0, regno;
3022 /* Make sure no regparm register is taken by a global register
3023 variable. */
3024 for (local_regparm = 0; local_regparm < 3; local_regparm++)
3025 if (global_regs[local_regparm])
3026 break;
3027 /* We can't use regparm(3) for nested functions as these use
3028 static chain pointer in third argument. */
3029 if (local_regparm == 3
3030 && decl_function_context (decl)
3031 && !DECL_NO_STATIC_CHAIN (decl))
3032 local_regparm = 2;
3033 /* If the function realigns its stackpointer, the
3034 prologue will clobber %ecx. If we've already
3035 generated code for the callee, the callee
3036 DECL_STRUCT_FUNCTION is gone, so we fall back to
3037 scanning the attributes for the self-realigning
3038 property. */
3039 if ((DECL_STRUCT_FUNCTION (decl)
3040 && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer)
3041 || (!DECL_STRUCT_FUNCTION (decl)
3042 && lookup_attribute (ix86_force_align_arg_pointer_string,
3043 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
3044 local_regparm = 2;
3045 /* Each global register variable increases register preassure,
3046 so the more global reg vars there are, the smaller regparm
3047 optimization use, unless requested by the user explicitly. */
3048 for (regno = 0; regno < 6; regno++)
3049 if (global_regs[regno])
3050 globals++;
3051 local_regparm
3052 = globals < local_regparm ? local_regparm - globals : 0;
3054 if (local_regparm > regparm)
3055 regparm = local_regparm;
3059 return regparm;
3062 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
3063 DFmode (2) arguments in SSE registers for a function with the
3064 indicated TYPE and DECL. DECL may be NULL when calling function
3065 indirectly or considering a libcall. Otherwise return 0. */
3067 static int
3068 ix86_function_sseregparm (tree type, tree decl)
3070 /* Use SSE registers to pass SFmode and DFmode arguments if requested
3071 by the sseregparm attribute. */
3072 if (TARGET_SSEREGPARM
3073 || (type
3074 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
3076 if (!TARGET_SSE)
3078 if (decl)
3079 error ("Calling %qD with attribute sseregparm without "
3080 "SSE/SSE2 enabled", decl);
3081 else
3082 error ("Calling %qT with attribute sseregparm without "
3083 "SSE/SSE2 enabled", type);
3084 return 0;
3087 return 2;
3090 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3091 (and DFmode for SSE2) arguments in SSE registers,
3092 even for 32-bit targets. */
3093 if (!TARGET_64BIT && decl
3094 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
3096 struct cgraph_local_info *i = cgraph_local_info (decl);
3097 if (i && i->local)
3098 return TARGET_SSE2 ? 2 : 1;
3101 return 0;
3104 /* Return true if EAX is live at the start of the function. Used by
3105 ix86_expand_prologue to determine if we need special help before
3106 calling allocate_stack_worker. */
3108 static bool
3109 ix86_eax_live_at_start_p (void)
3111 /* Cheat. Don't bother working forward from ix86_function_regparm
3112 to the function type to whether an actual argument is located in
3113 eax. Instead just look at cfg info, which is still close enough
3114 to correct at this point. This gives false positives for broken
3115 functions that might use uninitialized data that happens to be
3116 allocated in eax, but who cares? */
3117 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
3120 /* Value is the number of bytes of arguments automatically
3121 popped when returning from a subroutine call.
3122 FUNDECL is the declaration node of the function (as a tree),
3123 FUNTYPE is the data type of the function (as a tree),
3124 or for a library call it is an identifier node for the subroutine name.
3125 SIZE is the number of bytes of arguments passed on the stack.
3127 On the 80386, the RTD insn may be used to pop them if the number
3128 of args is fixed, but if the number is variable then the caller
3129 must pop them all. RTD can't be used for library calls now
3130 because the library is compiled with the Unix compiler.
3131 Use of RTD is a selectable option, since it is incompatible with
3132 standard Unix calling sequences. If the option is not selected,
3133 the caller must always pop the args.
3135 The attribute stdcall is equivalent to RTD on a per module basis. */
3138 ix86_return_pops_args (tree fundecl, tree funtype, int size)
3140 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
3142 /* Cdecl functions override -mrtd, and never pop the stack. */
3143 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
3145 /* Stdcall and fastcall functions will pop the stack if not
3146 variable args. */
3147 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
3148 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
3149 rtd = 1;
3151 if (rtd
3152 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
3153 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
3154 == void_type_node)))
3155 return size;
3158 /* Lose any fake structure return argument if it is passed on the stack. */
3159 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
3160 && !TARGET_64BIT
3161 && !KEEP_AGGREGATE_RETURN_POINTER)
3163 int nregs = ix86_function_regparm (funtype, fundecl);
3165 if (!nregs)
3166 return GET_MODE_SIZE (Pmode);
3169 return 0;
3172 /* Argument support functions. */
3174 /* Return true when register may be used to pass function parameters. */
3175 bool
3176 ix86_function_arg_regno_p (int regno)
3178 int i;
3179 if (!TARGET_64BIT)
3181 if (TARGET_MACHO)
3182 return (regno < REGPARM_MAX
3183 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
3184 else
3185 return (regno < REGPARM_MAX
3186 || (TARGET_MMX && MMX_REGNO_P (regno)
3187 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
3188 || (TARGET_SSE && SSE_REGNO_P (regno)
3189 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3192 if (TARGET_MACHO)
3194 if (SSE_REGNO_P (regno) && TARGET_SSE)
3195 return true;
3197 else
3199 if (TARGET_SSE && SSE_REGNO_P (regno)
3200 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3201 return true;
3203 /* RAX is used as hidden argument to va_arg functions. */
3204 if (!regno)
3205 return true;
3206 for (i = 0; i < REGPARM_MAX; i++)
3207 if (regno == x86_64_int_parameter_registers[i])
3208 return true;
3209 return false;
3212 /* Return if we do not know how to pass TYPE solely in registers. */
3214 static bool
3215 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
3217 if (must_pass_in_stack_var_size_or_pad (mode, type))
3218 return true;
3220 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3221 The layout_type routine is crafty and tries to trick us into passing
3222 currently unsupported vector types on the stack by using TImode. */
3223 return (!TARGET_64BIT && mode == TImode
3224 && type && TREE_CODE (type) != VECTOR_TYPE);
3227 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3228 for a call to a function whose data type is FNTYPE.
3229 For a library call, FNTYPE is 0. */
3231 void
3232 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3233 tree fntype, /* tree ptr for function decl */
3234 rtx libname, /* SYMBOL_REF of library name or 0 */
3235 tree fndecl)
3237 static CUMULATIVE_ARGS zero_cum;
3238 tree param, next_param;
3240 if (TARGET_DEBUG_ARG)
3242 fprintf (stderr, "\ninit_cumulative_args (");
3243 if (fntype)
3244 fprintf (stderr, "fntype code = %s, ret code = %s",
3245 tree_code_name[(int) TREE_CODE (fntype)],
3246 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
3247 else
3248 fprintf (stderr, "no fntype");
3250 if (libname)
3251 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
3254 *cum = zero_cum;
3256 /* Set up the number of registers to use for passing arguments. */
3257 cum->nregs = ix86_regparm;
3258 if (TARGET_SSE)
3259 cum->sse_nregs = SSE_REGPARM_MAX;
3260 if (TARGET_MMX)
3261 cum->mmx_nregs = MMX_REGPARM_MAX;
3262 cum->warn_sse = true;
3263 cum->warn_mmx = true;
3264 cum->maybe_vaarg = false;
3266 /* Use ecx and edx registers if function has fastcall attribute,
3267 else look for regparm information. */
3268 if (fntype && !TARGET_64BIT)
3270 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3272 cum->nregs = 2;
3273 cum->fastcall = 1;
3275 else
3276 cum->nregs = ix86_function_regparm (fntype, fndecl);
3279 /* Set up the number of SSE registers used for passing SFmode
3280 and DFmode arguments. Warn for mismatching ABI. */
3281 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
3283 /* Determine if this function has variable arguments. This is
3284 indicated by the last argument being 'void_type_mode' if there
3285 are no variable arguments. If there are variable arguments, then
3286 we won't pass anything in registers in 32-bit mode. */
3288 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
3290 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
3291 param != 0; param = next_param)
3293 next_param = TREE_CHAIN (param);
3294 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
3296 if (!TARGET_64BIT)
3298 cum->nregs = 0;
3299 cum->sse_nregs = 0;
3300 cum->mmx_nregs = 0;
3301 cum->warn_sse = 0;
3302 cum->warn_mmx = 0;
3303 cum->fastcall = 0;
3304 cum->float_in_sse = 0;
3306 cum->maybe_vaarg = true;
3310 if ((!fntype && !libname)
3311 || (fntype && !TYPE_ARG_TYPES (fntype)))
3312 cum->maybe_vaarg = true;
3314 if (TARGET_DEBUG_ARG)
3315 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
3317 return;
3320 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3321 But in the case of vector types, it is some vector mode.
3323 When we have only some of our vector isa extensions enabled, then there
3324 are some modes for which vector_mode_supported_p is false. For these
3325 modes, the generic vector support in gcc will choose some non-vector mode
3326 in order to implement the type. By computing the natural mode, we'll
3327 select the proper ABI location for the operand and not depend on whatever
3328 the middle-end decides to do with these vector types. */
3330 static enum machine_mode
3331 type_natural_mode (tree type)
3333 enum machine_mode mode = TYPE_MODE (type);
3335 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3337 HOST_WIDE_INT size = int_size_in_bytes (type);
3338 if ((size == 8 || size == 16)
3339 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3340 && TYPE_VECTOR_SUBPARTS (type) > 1)
3342 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3344 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3345 mode = MIN_MODE_VECTOR_FLOAT;
3346 else
3347 mode = MIN_MODE_VECTOR_INT;
3349 /* Get the mode which has this inner mode and number of units. */
3350 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3351 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3352 && GET_MODE_INNER (mode) == innermode)
3353 return mode;
3355 gcc_unreachable ();
3359 return mode;
3362 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3363 this may not agree with the mode that the type system has chosen for the
3364 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3365 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3367 static rtx
3368 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3369 unsigned int regno)
3371 rtx tmp;
3373 if (orig_mode != BLKmode)
3374 tmp = gen_rtx_REG (orig_mode, regno);
3375 else
3377 tmp = gen_rtx_REG (mode, regno);
3378 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3379 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3382 return tmp;
3385 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3386 of this code is to classify each 8bytes of incoming argument by the register
3387 class and assign registers accordingly. */
3389 /* Return the union class of CLASS1 and CLASS2.
3390 See the x86-64 PS ABI for details. */
3392 static enum x86_64_reg_class
3393 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3395 /* Rule #1: If both classes are equal, this is the resulting class. */
3396 if (class1 == class2)
3397 return class1;
3399 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3400 the other class. */
3401 if (class1 == X86_64_NO_CLASS)
3402 return class2;
3403 if (class2 == X86_64_NO_CLASS)
3404 return class1;
3406 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3407 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3408 return X86_64_MEMORY_CLASS;
3410 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3411 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3412 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3413 return X86_64_INTEGERSI_CLASS;
3414 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3415 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3416 return X86_64_INTEGER_CLASS;
3418 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3419 MEMORY is used. */
3420 if (class1 == X86_64_X87_CLASS
3421 || class1 == X86_64_X87UP_CLASS
3422 || class1 == X86_64_COMPLEX_X87_CLASS
3423 || class2 == X86_64_X87_CLASS
3424 || class2 == X86_64_X87UP_CLASS
3425 || class2 == X86_64_COMPLEX_X87_CLASS)
3426 return X86_64_MEMORY_CLASS;
3428 /* Rule #6: Otherwise class SSE is used. */
3429 return X86_64_SSE_CLASS;
3432 /* Classify the argument of type TYPE and mode MODE.
3433 CLASSES will be filled by the register class used to pass each word
3434 of the operand. The number of words is returned. In case the parameter
3435 should be passed in memory, 0 is returned. As a special case for zero
3436 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3438 BIT_OFFSET is used internally for handling records and specifies offset
3439 of the offset in bits modulo 256 to avoid overflow cases.
3441 See the x86-64 PS ABI for details.
3444 static int
3445 classify_argument (enum machine_mode mode, tree type,
3446 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3448 HOST_WIDE_INT bytes =
3449 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3450 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3452 /* Variable sized entities are always passed/returned in memory. */
3453 if (bytes < 0)
3454 return 0;
3456 if (mode != VOIDmode
3457 && targetm.calls.must_pass_in_stack (mode, type))
3458 return 0;
3460 if (type && AGGREGATE_TYPE_P (type))
3462 int i;
3463 tree field;
3464 enum x86_64_reg_class subclasses[MAX_CLASSES];
3466 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3467 if (bytes > 16)
3468 return 0;
3470 for (i = 0; i < words; i++)
3471 classes[i] = X86_64_NO_CLASS;
3473 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3474 signalize memory class, so handle it as special case. */
3475 if (!words)
3477 classes[0] = X86_64_NO_CLASS;
3478 return 1;
3481 /* Classify each field of record and merge classes. */
3482 switch (TREE_CODE (type))
3484 case RECORD_TYPE:
3485 /* And now merge the fields of structure. */
3486 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3488 if (TREE_CODE (field) == FIELD_DECL)
3490 int num;
3492 if (TREE_TYPE (field) == error_mark_node)
3493 continue;
3495 /* Bitfields are always classified as integer. Handle them
3496 early, since later code would consider them to be
3497 misaligned integers. */
3498 if (DECL_BIT_FIELD (field))
3500 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3501 i < ((int_bit_position (field) + (bit_offset % 64))
3502 + tree_low_cst (DECL_SIZE (field), 0)
3503 + 63) / 8 / 8; i++)
3504 classes[i] =
3505 merge_classes (X86_64_INTEGER_CLASS,
3506 classes[i]);
3508 else
3510 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3511 TREE_TYPE (field), subclasses,
3512 (int_bit_position (field)
3513 + bit_offset) % 256);
3514 if (!num)
3515 return 0;
3516 for (i = 0; i < num; i++)
3518 int pos =
3519 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3520 classes[i + pos] =
3521 merge_classes (subclasses[i], classes[i + pos]);
3526 break;
3528 case ARRAY_TYPE:
3529 /* Arrays are handled as small records. */
3531 int num;
3532 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3533 TREE_TYPE (type), subclasses, bit_offset);
3534 if (!num)
3535 return 0;
3537 /* The partial classes are now full classes. */
3538 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3539 subclasses[0] = X86_64_SSE_CLASS;
3540 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3541 subclasses[0] = X86_64_INTEGER_CLASS;
3543 for (i = 0; i < words; i++)
3544 classes[i] = subclasses[i % num];
3546 break;
3548 case UNION_TYPE:
3549 case QUAL_UNION_TYPE:
3550 /* Unions are similar to RECORD_TYPE but offset is always 0.
3552 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3554 if (TREE_CODE (field) == FIELD_DECL)
3556 int num;
3558 if (TREE_TYPE (field) == error_mark_node)
3559 continue;
3561 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3562 TREE_TYPE (field), subclasses,
3563 bit_offset);
3564 if (!num)
3565 return 0;
3566 for (i = 0; i < num; i++)
3567 classes[i] = merge_classes (subclasses[i], classes[i]);
3570 break;
3572 default:
3573 gcc_unreachable ();
3576 /* Final merger cleanup. */
3577 for (i = 0; i < words; i++)
3579 /* If one class is MEMORY, everything should be passed in
3580 memory. */
3581 if (classes[i] == X86_64_MEMORY_CLASS)
3582 return 0;
3584 /* The X86_64_SSEUP_CLASS should be always preceded by
3585 X86_64_SSE_CLASS. */
3586 if (classes[i] == X86_64_SSEUP_CLASS
3587 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3588 classes[i] = X86_64_SSE_CLASS;
3590 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3591 if (classes[i] == X86_64_X87UP_CLASS
3592 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3593 classes[i] = X86_64_SSE_CLASS;
3595 return words;
3598 /* Compute alignment needed. We align all types to natural boundaries with
3599 exception of XFmode that is aligned to 64bits. */
3600 if (mode != VOIDmode && mode != BLKmode)
3602 int mode_alignment = GET_MODE_BITSIZE (mode);
3604 if (mode == XFmode)
3605 mode_alignment = 128;
3606 else if (mode == XCmode)
3607 mode_alignment = 256;
3608 if (COMPLEX_MODE_P (mode))
3609 mode_alignment /= 2;
3610 /* Misaligned fields are always returned in memory. */
3611 if (bit_offset % mode_alignment)
3612 return 0;
3615 /* for V1xx modes, just use the base mode */
3616 if (VECTOR_MODE_P (mode)
3617 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3618 mode = GET_MODE_INNER (mode);
3620 /* Classification of atomic types. */
3621 switch (mode)
3623 case SDmode:
3624 case DDmode:
3625 classes[0] = X86_64_SSE_CLASS;
3626 return 1;
3627 case TDmode:
3628 classes[0] = X86_64_SSE_CLASS;
3629 classes[1] = X86_64_SSEUP_CLASS;
3630 return 2;
3631 case DImode:
3632 case SImode:
3633 case HImode:
3634 case QImode:
3635 case CSImode:
3636 case CHImode:
3637 case CQImode:
3638 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3639 classes[0] = X86_64_INTEGERSI_CLASS;
3640 else
3641 classes[0] = X86_64_INTEGER_CLASS;
3642 return 1;
3643 case CDImode:
3644 case TImode:
3645 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3646 return 2;
3647 case CTImode:
3648 return 0;
3649 case SFmode:
3650 if (!(bit_offset % 64))
3651 classes[0] = X86_64_SSESF_CLASS;
3652 else
3653 classes[0] = X86_64_SSE_CLASS;
3654 return 1;
3655 case DFmode:
3656 classes[0] = X86_64_SSEDF_CLASS;
3657 return 1;
3658 case XFmode:
3659 classes[0] = X86_64_X87_CLASS;
3660 classes[1] = X86_64_X87UP_CLASS;
3661 return 2;
3662 case TFmode:
3663 classes[0] = X86_64_SSE_CLASS;
3664 classes[1] = X86_64_SSEUP_CLASS;
3665 return 2;
3666 case SCmode:
3667 classes[0] = X86_64_SSE_CLASS;
3668 return 1;
3669 case DCmode:
3670 classes[0] = X86_64_SSEDF_CLASS;
3671 classes[1] = X86_64_SSEDF_CLASS;
3672 return 2;
3673 case XCmode:
3674 classes[0] = X86_64_COMPLEX_X87_CLASS;
3675 return 1;
3676 case TCmode:
3677 /* This modes is larger than 16 bytes. */
3678 return 0;
3679 case V4SFmode:
3680 case V4SImode:
3681 case V16QImode:
3682 case V8HImode:
3683 case V2DFmode:
3684 case V2DImode:
3685 classes[0] = X86_64_SSE_CLASS;
3686 classes[1] = X86_64_SSEUP_CLASS;
3687 return 2;
3688 case V2SFmode:
3689 case V2SImode:
3690 case V4HImode:
3691 case V8QImode:
3692 classes[0] = X86_64_SSE_CLASS;
3693 return 1;
3694 case BLKmode:
3695 case VOIDmode:
3696 return 0;
3697 default:
3698 gcc_assert (VECTOR_MODE_P (mode));
3700 if (bytes > 16)
3701 return 0;
3703 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3705 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3706 classes[0] = X86_64_INTEGERSI_CLASS;
3707 else
3708 classes[0] = X86_64_INTEGER_CLASS;
3709 classes[1] = X86_64_INTEGER_CLASS;
3710 return 1 + (bytes > 8);
3714 /* Examine the argument and return set number of register required in each
3715 class. Return 0 iff parameter should be passed in memory. */
3716 static int
3717 examine_argument (enum machine_mode mode, tree type, int in_return,
3718 int *int_nregs, int *sse_nregs)
3720 enum x86_64_reg_class class[MAX_CLASSES];
3721 int n = classify_argument (mode, type, class, 0);
3723 *int_nregs = 0;
3724 *sse_nregs = 0;
3725 if (!n)
3726 return 0;
3727 for (n--; n >= 0; n--)
3728 switch (class[n])
3730 case X86_64_INTEGER_CLASS:
3731 case X86_64_INTEGERSI_CLASS:
3732 (*int_nregs)++;
3733 break;
3734 case X86_64_SSE_CLASS:
3735 case X86_64_SSESF_CLASS:
3736 case X86_64_SSEDF_CLASS:
3737 (*sse_nregs)++;
3738 break;
3739 case X86_64_NO_CLASS:
3740 case X86_64_SSEUP_CLASS:
3741 break;
3742 case X86_64_X87_CLASS:
3743 case X86_64_X87UP_CLASS:
3744 if (!in_return)
3745 return 0;
3746 break;
3747 case X86_64_COMPLEX_X87_CLASS:
3748 return in_return ? 2 : 0;
3749 case X86_64_MEMORY_CLASS:
3750 gcc_unreachable ();
3752 return 1;
3755 /* Construct container for the argument used by GCC interface. See
3756 FUNCTION_ARG for the detailed description. */
3758 static rtx
3759 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3760 tree type, int in_return, int nintregs, int nsseregs,
3761 const int *intreg, int sse_regno)
3763 /* The following variables hold the static issued_error state. */
3764 static bool issued_sse_arg_error;
3765 static bool issued_sse_ret_error;
3766 static bool issued_x87_ret_error;
3768 enum machine_mode tmpmode;
3769 int bytes =
3770 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3771 enum x86_64_reg_class class[MAX_CLASSES];
3772 int n;
3773 int i;
3774 int nexps = 0;
3775 int needed_sseregs, needed_intregs;
3776 rtx exp[MAX_CLASSES];
3777 rtx ret;
3779 n = classify_argument (mode, type, class, 0);
3780 if (TARGET_DEBUG_ARG)
3782 if (!n)
3783 fprintf (stderr, "Memory class\n");
3784 else
3786 fprintf (stderr, "Classes:");
3787 for (i = 0; i < n; i++)
3789 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
3791 fprintf (stderr, "\n");
3794 if (!n)
3795 return NULL;
3796 if (!examine_argument (mode, type, in_return, &needed_intregs,
3797 &needed_sseregs))
3798 return NULL;
3799 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3800 return NULL;
3802 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3803 some less clueful developer tries to use floating-point anyway. */
3804 if (needed_sseregs && !TARGET_SSE)
3806 if (in_return)
3808 if (!issued_sse_ret_error)
3810 error ("SSE register return with SSE disabled");
3811 issued_sse_ret_error = true;
3814 else if (!issued_sse_arg_error)
3816 error ("SSE register argument with SSE disabled");
3817 issued_sse_arg_error = true;
3819 return NULL;
3822 /* Likewise, error if the ABI requires us to return values in the
3823 x87 registers and the user specified -mno-80387. */
3824 if (!TARGET_80387 && in_return)
3825 for (i = 0; i < n; i++)
3826 if (class[i] == X86_64_X87_CLASS
3827 || class[i] == X86_64_X87UP_CLASS
3828 || class[i] == X86_64_COMPLEX_X87_CLASS)
3830 if (!issued_x87_ret_error)
3832 error ("x87 register return with x87 disabled");
3833 issued_x87_ret_error = true;
3835 return NULL;
3838 /* First construct simple cases. Avoid SCmode, since we want to use
3839 single register to pass this type. */
3840 if (n == 1 && mode != SCmode)
3841 switch (class[0])
3843 case X86_64_INTEGER_CLASS:
3844 case X86_64_INTEGERSI_CLASS:
3845 return gen_rtx_REG (mode, intreg[0]);
3846 case X86_64_SSE_CLASS:
3847 case X86_64_SSESF_CLASS:
3848 case X86_64_SSEDF_CLASS:
3849 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3850 case X86_64_X87_CLASS:
3851 case X86_64_COMPLEX_X87_CLASS:
3852 return gen_rtx_REG (mode, FIRST_STACK_REG);
3853 case X86_64_NO_CLASS:
3854 /* Zero sized array, struct or class. */
3855 return NULL;
3856 default:
3857 gcc_unreachable ();
3859 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3860 && mode != BLKmode)
3861 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3862 if (n == 2
3863 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3864 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3865 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3866 && class[1] == X86_64_INTEGER_CLASS
3867 && (mode == CDImode || mode == TImode || mode == TFmode)
3868 && intreg[0] + 1 == intreg[1])
3869 return gen_rtx_REG (mode, intreg[0]);
3871 /* Otherwise figure out the entries of the PARALLEL. */
3872 for (i = 0; i < n; i++)
3874 switch (class[i])
3876 case X86_64_NO_CLASS:
3877 break;
3878 case X86_64_INTEGER_CLASS:
3879 case X86_64_INTEGERSI_CLASS:
3880 /* Merge TImodes on aligned occasions here too. */
3881 if (i * 8 + 8 > bytes)
3882 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3883 else if (class[i] == X86_64_INTEGERSI_CLASS)
3884 tmpmode = SImode;
3885 else
3886 tmpmode = DImode;
3887 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3888 if (tmpmode == BLKmode)
3889 tmpmode = DImode;
3890 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3891 gen_rtx_REG (tmpmode, *intreg),
3892 GEN_INT (i*8));
3893 intreg++;
3894 break;
3895 case X86_64_SSESF_CLASS:
3896 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3897 gen_rtx_REG (SFmode,
3898 SSE_REGNO (sse_regno)),
3899 GEN_INT (i*8));
3900 sse_regno++;
3901 break;
3902 case X86_64_SSEDF_CLASS:
3903 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3904 gen_rtx_REG (DFmode,
3905 SSE_REGNO (sse_regno)),
3906 GEN_INT (i*8));
3907 sse_regno++;
3908 break;
3909 case X86_64_SSE_CLASS:
3910 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3911 tmpmode = TImode;
3912 else
3913 tmpmode = DImode;
3914 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3915 gen_rtx_REG (tmpmode,
3916 SSE_REGNO (sse_regno)),
3917 GEN_INT (i*8));
3918 if (tmpmode == TImode)
3919 i++;
3920 sse_regno++;
3921 break;
3922 default:
3923 gcc_unreachable ();
3927 /* Empty aligned struct, union or class. */
3928 if (nexps == 0)
3929 return NULL;
3931 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3932 for (i = 0; i < nexps; i++)
3933 XVECEXP (ret, 0, i) = exp [i];
3934 return ret;
3937 /* Update the data in CUM to advance over an argument
3938 of mode MODE and data type TYPE.
3939 (TYPE is null for libcalls where that information may not be available.) */
3941 void
3942 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3943 tree type, int named)
3945 int bytes =
3946 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3947 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3949 if (type)
3950 mode = type_natural_mode (type);
3952 if (TARGET_DEBUG_ARG)
3953 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3954 "mode=%s, named=%d)\n\n",
3955 words, cum->words, cum->nregs, cum->sse_nregs,
3956 GET_MODE_NAME (mode), named);
3958 if (TARGET_64BIT)
3960 int int_nregs, sse_nregs;
3961 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3962 cum->words += words;
3963 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3965 cum->nregs -= int_nregs;
3966 cum->sse_nregs -= sse_nregs;
3967 cum->regno += int_nregs;
3968 cum->sse_regno += sse_nregs;
3970 else
3971 cum->words += words;
3973 else
3975 switch (mode)
3977 default:
3978 break;
3980 case BLKmode:
3981 if (bytes < 0)
3982 break;
3983 /* FALLTHRU */
3985 case DImode:
3986 case SImode:
3987 case HImode:
3988 case QImode:
3989 cum->words += words;
3990 cum->nregs -= words;
3991 cum->regno += words;
3993 if (cum->nregs <= 0)
3995 cum->nregs = 0;
3996 cum->regno = 0;
3998 break;
4000 case DFmode:
4001 if (cum->float_in_sse < 2)
4002 break;
4003 case SFmode:
4004 if (cum->float_in_sse < 1)
4005 break;
4006 /* FALLTHRU */
4008 case TImode:
4009 case V16QImode:
4010 case V8HImode:
4011 case V4SImode:
4012 case V2DImode:
4013 case V4SFmode:
4014 case V2DFmode:
4015 if (!type || !AGGREGATE_TYPE_P (type))
4017 cum->sse_words += words;
4018 cum->sse_nregs -= 1;
4019 cum->sse_regno += 1;
4020 if (cum->sse_nregs <= 0)
4022 cum->sse_nregs = 0;
4023 cum->sse_regno = 0;
4026 break;
4028 case V8QImode:
4029 case V4HImode:
4030 case V2SImode:
4031 case V2SFmode:
4032 if (!type || !AGGREGATE_TYPE_P (type))
4034 cum->mmx_words += words;
4035 cum->mmx_nregs -= 1;
4036 cum->mmx_regno += 1;
4037 if (cum->mmx_nregs <= 0)
4039 cum->mmx_nregs = 0;
4040 cum->mmx_regno = 0;
4043 break;
4048 /* Define where to put the arguments to a function.
4049 Value is zero to push the argument on the stack,
4050 or a hard register in which to store the argument.
4052 MODE is the argument's machine mode.
4053 TYPE is the data type of the argument (as a tree).
4054 This is null for libcalls where that information may
4055 not be available.
4056 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4057 the preceding args and about the function being called.
4058 NAMED is nonzero if this argument is a named parameter
4059 (otherwise it is an extra parameter matching an ellipsis). */
4062 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
4063 tree type, int named)
4065 enum machine_mode mode = orig_mode;
4066 rtx ret = NULL_RTX;
4067 int bytes =
4068 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
4069 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4070 static bool warnedsse, warnedmmx;
4072 /* To simplify the code below, represent vector types with a vector mode
4073 even if MMX/SSE are not active. */
4074 if (type && TREE_CODE (type) == VECTOR_TYPE)
4075 mode = type_natural_mode (type);
4077 /* Handle a hidden AL argument containing number of registers for varargs
4078 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
4079 any AL settings. */
4080 if (mode == VOIDmode)
4082 if (TARGET_64BIT)
4083 return GEN_INT (cum->maybe_vaarg
4084 ? (cum->sse_nregs < 0
4085 ? SSE_REGPARM_MAX
4086 : cum->sse_regno)
4087 : -1);
4088 else
4089 return constm1_rtx;
4091 if (TARGET_64BIT)
4092 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
4093 cum->sse_nregs,
4094 &x86_64_int_parameter_registers [cum->regno],
4095 cum->sse_regno);
4096 else
4097 switch (mode)
4099 /* For now, pass fp/complex values on the stack. */
4100 default:
4101 break;
4103 case BLKmode:
4104 if (bytes < 0)
4105 break;
4106 /* FALLTHRU */
4107 case DImode:
4108 case SImode:
4109 case HImode:
4110 case QImode:
4111 if (words <= cum->nregs)
4113 int regno = cum->regno;
4115 /* Fastcall allocates the first two DWORD (SImode) or
4116 smaller arguments to ECX and EDX. */
4117 if (cum->fastcall)
4119 if (mode == BLKmode || mode == DImode)
4120 break;
4122 /* ECX not EAX is the first allocated register. */
4123 if (regno == 0)
4124 regno = 2;
4126 ret = gen_rtx_REG (mode, regno);
4128 break;
4129 case DFmode:
4130 if (cum->float_in_sse < 2)
4131 break;
4132 case SFmode:
4133 if (cum->float_in_sse < 1)
4134 break;
4135 /* FALLTHRU */
4136 case TImode:
4137 case V16QImode:
4138 case V8HImode:
4139 case V4SImode:
4140 case V2DImode:
4141 case V4SFmode:
4142 case V2DFmode:
4143 if (!type || !AGGREGATE_TYPE_P (type))
4145 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
4147 warnedsse = true;
4148 warning (0, "SSE vector argument without SSE enabled "
4149 "changes the ABI");
4151 if (cum->sse_nregs)
4152 ret = gen_reg_or_parallel (mode, orig_mode,
4153 cum->sse_regno + FIRST_SSE_REG);
4155 break;
4156 case V8QImode:
4157 case V4HImode:
4158 case V2SImode:
4159 case V2SFmode:
4160 if (!type || !AGGREGATE_TYPE_P (type))
4162 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
4164 warnedmmx = true;
4165 warning (0, "MMX vector argument without MMX enabled "
4166 "changes the ABI");
4168 if (cum->mmx_nregs)
4169 ret = gen_reg_or_parallel (mode, orig_mode,
4170 cum->mmx_regno + FIRST_MMX_REG);
4172 break;
4175 if (TARGET_DEBUG_ARG)
4177 fprintf (stderr,
4178 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
4179 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
4181 if (ret)
4182 print_simple_rtl (stderr, ret);
4183 else
4184 fprintf (stderr, ", stack");
4186 fprintf (stderr, " )\n");
4189 return ret;
4192 /* A C expression that indicates when an argument must be passed by
4193 reference. If nonzero for an argument, a copy of that argument is
4194 made in memory and a pointer to the argument is passed instead of
4195 the argument itself. The pointer is passed in whatever way is
4196 appropriate for passing a pointer to that type. */
4198 static bool
4199 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4200 enum machine_mode mode ATTRIBUTE_UNUSED,
4201 tree type, bool named ATTRIBUTE_UNUSED)
4203 if (!TARGET_64BIT)
4204 return 0;
4206 if (type && int_size_in_bytes (type) == -1)
4208 if (TARGET_DEBUG_ARG)
4209 fprintf (stderr, "function_arg_pass_by_reference\n");
4210 return 1;
4213 return 0;
4216 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4217 ABI. Only called if TARGET_SSE. */
4218 static bool
4219 contains_128bit_aligned_vector_p (tree type)
4221 enum machine_mode mode = TYPE_MODE (type);
4222 if (SSE_REG_MODE_P (mode)
4223 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4224 return true;
4225 if (TYPE_ALIGN (type) < 128)
4226 return false;
4228 if (AGGREGATE_TYPE_P (type))
4230 /* Walk the aggregates recursively. */
4231 switch (TREE_CODE (type))
4233 case RECORD_TYPE:
4234 case UNION_TYPE:
4235 case QUAL_UNION_TYPE:
4237 tree field;
4239 /* Walk all the structure fields. */
4240 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4242 if (TREE_CODE (field) == FIELD_DECL
4243 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
4244 return true;
4246 break;
4249 case ARRAY_TYPE:
4250 /* Just for use if some languages passes arrays by value. */
4251 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
4252 return true;
4253 break;
4255 default:
4256 gcc_unreachable ();
4259 return false;
4262 /* Gives the alignment boundary, in bits, of an argument with the
4263 specified mode and type. */
4266 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4268 int align;
4269 if (type)
4270 align = TYPE_ALIGN (type);
4271 else
4272 align = GET_MODE_ALIGNMENT (mode);
4273 if (align < PARM_BOUNDARY)
4274 align = PARM_BOUNDARY;
4275 if (!TARGET_64BIT)
4277 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4278 make an exception for SSE modes since these require 128bit
4279 alignment.
4281 The handling here differs from field_alignment. ICC aligns MMX
4282 arguments to 4 byte boundaries, while structure fields are aligned
4283 to 8 byte boundaries. */
4284 if (!TARGET_SSE)
4285 align = PARM_BOUNDARY;
4286 else if (!type)
4288 if (!SSE_REG_MODE_P (mode))
4289 align = PARM_BOUNDARY;
4291 else
4293 if (!contains_128bit_aligned_vector_p (type))
4294 align = PARM_BOUNDARY;
4297 if (align > 128)
4298 align = 128;
4299 return align;
4302 /* Return true if N is a possible register number of function value. */
4303 bool
4304 ix86_function_value_regno_p (int regno)
4306 if (TARGET_MACHO)
4308 if (!TARGET_64BIT)
4310 return ((regno) == 0
4311 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
4312 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
4314 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
4315 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
4316 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
4318 else
4320 if (regno == 0
4321 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
4322 || (regno == FIRST_SSE_REG && TARGET_SSE))
4323 return true;
4325 if (!TARGET_64BIT
4326 && (regno == FIRST_MMX_REG && TARGET_MMX))
4327 return true;
4329 return false;
4333 /* Define how to find the value returned by a function.
4334 VALTYPE is the data type of the value (as a tree).
4335 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4336 otherwise, FUNC is 0. */
4338 ix86_function_value (tree valtype, tree fntype_or_decl,
4339 bool outgoing ATTRIBUTE_UNUSED)
4341 enum machine_mode natmode = type_natural_mode (valtype);
4343 if (TARGET_64BIT)
4345 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
4346 1, REGPARM_MAX, SSE_REGPARM_MAX,
4347 x86_64_int_return_registers, 0);
4348 /* For zero sized structures, construct_container return NULL, but we
4349 need to keep rest of compiler happy by returning meaningful value. */
4350 if (!ret)
4351 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
4352 return ret;
4354 else
4356 tree fn = NULL_TREE, fntype;
4357 if (fntype_or_decl
4358 && DECL_P (fntype_or_decl))
4359 fn = fntype_or_decl;
4360 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4361 return gen_rtx_REG (TYPE_MODE (valtype),
4362 ix86_value_regno (natmode, fn, fntype));
4366 /* Return true iff type is returned in memory. */
4368 ix86_return_in_memory (tree type)
4370 int needed_intregs, needed_sseregs, size;
4371 enum machine_mode mode = type_natural_mode (type);
4373 if (TARGET_64BIT)
4374 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4376 if (mode == BLKmode)
4377 return 1;
4379 size = int_size_in_bytes (type);
4381 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4382 return 0;
4384 if (VECTOR_MODE_P (mode) || mode == TImode)
4386 /* User-created vectors small enough to fit in EAX. */
4387 if (size < 8)
4388 return 0;
4390 /* MMX/3dNow values are returned in MM0,
4391 except when it doesn't exits. */
4392 if (size == 8)
4393 return (TARGET_MMX ? 0 : 1);
4395 /* SSE values are returned in XMM0, except when it doesn't exist. */
4396 if (size == 16)
4397 return (TARGET_SSE ? 0 : 1);
4400 if (mode == XFmode)
4401 return 0;
4403 if (mode == TDmode)
4404 return 1;
4406 if (size > 12)
4407 return 1;
4408 return 0;
4411 /* When returning SSE vector types, we have a choice of either
4412 (1) being abi incompatible with a -march switch, or
4413 (2) generating an error.
4414 Given no good solution, I think the safest thing is one warning.
4415 The user won't be able to use -Werror, but....
4417 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4418 called in response to actually generating a caller or callee that
4419 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4420 via aggregate_value_p for general type probing from tree-ssa. */
4422 static rtx
4423 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4425 static bool warnedsse, warnedmmx;
4427 if (type)
4429 /* Look at the return type of the function, not the function type. */
4430 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4432 if (!TARGET_SSE && !warnedsse)
4434 if (mode == TImode
4435 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4437 warnedsse = true;
4438 warning (0, "SSE vector return without SSE enabled "
4439 "changes the ABI");
4443 if (!TARGET_MMX && !warnedmmx)
4445 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4447 warnedmmx = true;
4448 warning (0, "MMX vector return without MMX enabled "
4449 "changes the ABI");
4454 return NULL;
4457 /* Define how to find the value returned by a library function
4458 assuming the value has mode MODE. */
4460 ix86_libcall_value (enum machine_mode mode)
4462 if (TARGET_64BIT)
4464 switch (mode)
4466 case SFmode:
4467 case SCmode:
4468 case DFmode:
4469 case DCmode:
4470 case TFmode:
4471 case SDmode:
4472 case DDmode:
4473 case TDmode:
4474 return gen_rtx_REG (mode, FIRST_SSE_REG);
4475 case XFmode:
4476 case XCmode:
4477 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4478 case TCmode:
4479 return NULL;
4480 default:
4481 return gen_rtx_REG (mode, 0);
4484 else
4485 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
4488 /* Given a mode, return the register to use for a return value. */
4490 static int
4491 ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
4493 gcc_assert (!TARGET_64BIT);
4495 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4496 we normally prevent this case when mmx is not available. However
4497 some ABIs may require the result to be returned like DImode. */
4498 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4499 return TARGET_MMX ? FIRST_MMX_REG : 0;
4501 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4502 we prevent this case when sse is not available. However some ABIs
4503 may require the result to be returned like integer TImode. */
4504 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4505 return TARGET_SSE ? FIRST_SSE_REG : 0;
4507 /* Decimal floating point values can go in %eax, unlike other float modes. */
4508 if (DECIMAL_FLOAT_MODE_P (mode))
4509 return 0;
4511 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4512 if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
4513 return 0;
4515 /* Floating point return values in %st(0), except for local functions when
4516 SSE math is enabled or for functions with sseregparm attribute. */
4517 if ((func || fntype)
4518 && (mode == SFmode || mode == DFmode))
4520 int sse_level = ix86_function_sseregparm (fntype, func);
4521 if ((sse_level >= 1 && mode == SFmode)
4522 || (sse_level == 2 && mode == DFmode))
4523 return FIRST_SSE_REG;
4526 return FIRST_FLOAT_REG;
4529 /* Create the va_list data type. */
4531 static tree
4532 ix86_build_builtin_va_list (void)
4534 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4536 /* For i386 we use plain pointer to argument area. */
4537 if (!TARGET_64BIT)
4538 return build_pointer_type (char_type_node);
4540 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4541 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4543 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4544 unsigned_type_node);
4545 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4546 unsigned_type_node);
4547 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4548 ptr_type_node);
4549 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4550 ptr_type_node);
4552 va_list_gpr_counter_field = f_gpr;
4553 va_list_fpr_counter_field = f_fpr;
4555 DECL_FIELD_CONTEXT (f_gpr) = record;
4556 DECL_FIELD_CONTEXT (f_fpr) = record;
4557 DECL_FIELD_CONTEXT (f_ovf) = record;
4558 DECL_FIELD_CONTEXT (f_sav) = record;
4560 TREE_CHAIN (record) = type_decl;
4561 TYPE_NAME (record) = type_decl;
4562 TYPE_FIELDS (record) = f_gpr;
4563 TREE_CHAIN (f_gpr) = f_fpr;
4564 TREE_CHAIN (f_fpr) = f_ovf;
4565 TREE_CHAIN (f_ovf) = f_sav;
4567 layout_type (record);
4569 /* The correct type is an array type of one element. */
4570 return build_array_type (record, build_index_type (size_zero_node));
4573 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4575 static void
4576 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4577 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4578 int no_rtl)
4580 CUMULATIVE_ARGS next_cum;
4581 rtx save_area = NULL_RTX, mem;
4582 rtx label;
4583 rtx label_ref;
4584 rtx tmp_reg;
4585 rtx nsse_reg;
4586 int set;
4587 tree fntype;
4588 int stdarg_p;
4589 int i;
4591 if (!TARGET_64BIT)
4592 return;
4594 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4595 return;
4597 /* Indicate to allocate space on the stack for varargs save area. */
4598 ix86_save_varrargs_registers = 1;
4600 cfun->stack_alignment_needed = 128;
4602 fntype = TREE_TYPE (current_function_decl);
4603 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4604 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4605 != void_type_node));
4607 /* For varargs, we do not want to skip the dummy va_dcl argument.
4608 For stdargs, we do want to skip the last named argument. */
4609 next_cum = *cum;
4610 if (stdarg_p)
4611 function_arg_advance (&next_cum, mode, type, 1);
4613 if (!no_rtl)
4614 save_area = frame_pointer_rtx;
4616 set = get_varargs_alias_set ();
4618 for (i = next_cum.regno;
4619 i < ix86_regparm
4620 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4621 i++)
4623 mem = gen_rtx_MEM (Pmode,
4624 plus_constant (save_area, i * UNITS_PER_WORD));
4625 MEM_NOTRAP_P (mem) = 1;
4626 set_mem_alias_set (mem, set);
4627 emit_move_insn (mem, gen_rtx_REG (Pmode,
4628 x86_64_int_parameter_registers[i]));
4631 if (next_cum.sse_nregs && cfun->va_list_fpr_size)
4633 /* Now emit code to save SSE registers. The AX parameter contains number
4634 of SSE parameter registers used to call this function. We use
4635 sse_prologue_save insn template that produces computed jump across
4636 SSE saves. We need some preparation work to get this working. */
4638 label = gen_label_rtx ();
4639 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4641 /* Compute address to jump to :
4642 label - 5*eax + nnamed_sse_arguments*5 */
4643 tmp_reg = gen_reg_rtx (Pmode);
4644 nsse_reg = gen_reg_rtx (Pmode);
4645 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4646 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4647 gen_rtx_MULT (Pmode, nsse_reg,
4648 GEN_INT (4))));
4649 if (next_cum.sse_regno)
4650 emit_move_insn
4651 (nsse_reg,
4652 gen_rtx_CONST (DImode,
4653 gen_rtx_PLUS (DImode,
4654 label_ref,
4655 GEN_INT (next_cum.sse_regno * 4))));
4656 else
4657 emit_move_insn (nsse_reg, label_ref);
4658 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4660 /* Compute address of memory block we save into. We always use pointer
4661 pointing 127 bytes after first byte to store - this is needed to keep
4662 instruction size limited by 4 bytes. */
4663 tmp_reg = gen_reg_rtx (Pmode);
4664 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4665 plus_constant (save_area,
4666 8 * REGPARM_MAX + 127)));
4667 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4668 MEM_NOTRAP_P (mem) = 1;
4669 set_mem_alias_set (mem, set);
4670 set_mem_align (mem, BITS_PER_WORD);
4672 /* And finally do the dirty job! */
4673 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4674 GEN_INT (next_cum.sse_regno), label));
4679 /* Implement va_start. */
4681 void
4682 ix86_va_start (tree valist, rtx nextarg)
4684 HOST_WIDE_INT words, n_gpr, n_fpr;
4685 tree f_gpr, f_fpr, f_ovf, f_sav;
4686 tree gpr, fpr, ovf, sav, t;
4687 tree type;
4689 /* Only 64bit target needs something special. */
4690 if (!TARGET_64BIT)
4692 std_expand_builtin_va_start (valist, nextarg);
4693 return;
4696 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4697 f_fpr = TREE_CHAIN (f_gpr);
4698 f_ovf = TREE_CHAIN (f_fpr);
4699 f_sav = TREE_CHAIN (f_ovf);
4701 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4702 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4703 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4704 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4705 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4707 /* Count number of gp and fp argument registers used. */
4708 words = current_function_args_info.words;
4709 n_gpr = current_function_args_info.regno;
4710 n_fpr = current_function_args_info.sse_regno;
4712 if (TARGET_DEBUG_ARG)
4713 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4714 (int) words, (int) n_gpr, (int) n_fpr);
4716 if (cfun->va_list_gpr_size)
4718 type = TREE_TYPE (gpr);
4719 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
4720 build_int_cst (type, n_gpr * 8));
4721 TREE_SIDE_EFFECTS (t) = 1;
4722 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4725 if (cfun->va_list_fpr_size)
4727 type = TREE_TYPE (fpr);
4728 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
4729 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4730 TREE_SIDE_EFFECTS (t) = 1;
4731 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4734 /* Find the overflow area. */
4735 type = TREE_TYPE (ovf);
4736 t = make_tree (type, virtual_incoming_args_rtx);
4737 if (words != 0)
4738 t = build2 (PLUS_EXPR, type, t,
4739 build_int_cst (type, words * UNITS_PER_WORD));
4740 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
4741 TREE_SIDE_EFFECTS (t) = 1;
4742 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4744 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4746 /* Find the register save area.
4747 Prologue of the function save it right above stack frame. */
4748 type = TREE_TYPE (sav);
4749 t = make_tree (type, frame_pointer_rtx);
4750 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
4751 TREE_SIDE_EFFECTS (t) = 1;
4752 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4756 /* Implement va_arg. */
4758 tree
4759 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4761 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4762 tree f_gpr, f_fpr, f_ovf, f_sav;
4763 tree gpr, fpr, ovf, sav, t;
4764 int size, rsize;
4765 tree lab_false, lab_over = NULL_TREE;
4766 tree addr, t2;
4767 rtx container;
4768 int indirect_p = 0;
4769 tree ptrtype;
4770 enum machine_mode nat_mode;
4772 /* Only 64bit target needs something special. */
4773 if (!TARGET_64BIT)
4774 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4776 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4777 f_fpr = TREE_CHAIN (f_gpr);
4778 f_ovf = TREE_CHAIN (f_fpr);
4779 f_sav = TREE_CHAIN (f_ovf);
4781 valist = build_va_arg_indirect_ref (valist);
4782 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4783 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4784 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4785 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4787 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4788 if (indirect_p)
4789 type = build_pointer_type (type);
4790 size = int_size_in_bytes (type);
4791 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4793 nat_mode = type_natural_mode (type);
4794 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4795 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4797 /* Pull the value out of the saved registers. */
4799 addr = create_tmp_var (ptr_type_node, "addr");
4800 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4802 if (container)
4804 int needed_intregs, needed_sseregs;
4805 bool need_temp;
4806 tree int_addr, sse_addr;
4808 lab_false = create_artificial_label ();
4809 lab_over = create_artificial_label ();
4811 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4813 need_temp = (!REG_P (container)
4814 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4815 || TYPE_ALIGN (type) > 128));
4817 /* In case we are passing structure, verify that it is consecutive block
4818 on the register save area. If not we need to do moves. */
4819 if (!need_temp && !REG_P (container))
4821 /* Verify that all registers are strictly consecutive */
4822 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4824 int i;
4826 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4828 rtx slot = XVECEXP (container, 0, i);
4829 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4830 || INTVAL (XEXP (slot, 1)) != i * 16)
4831 need_temp = 1;
4834 else
4836 int i;
4838 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4840 rtx slot = XVECEXP (container, 0, i);
4841 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4842 || INTVAL (XEXP (slot, 1)) != i * 8)
4843 need_temp = 1;
4847 if (!need_temp)
4849 int_addr = addr;
4850 sse_addr = addr;
4852 else
4854 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4855 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4856 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4857 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4860 /* First ensure that we fit completely in registers. */
4861 if (needed_intregs)
4863 t = build_int_cst (TREE_TYPE (gpr),
4864 (REGPARM_MAX - needed_intregs + 1) * 8);
4865 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4866 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4867 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4868 gimplify_and_add (t, pre_p);
4870 if (needed_sseregs)
4872 t = build_int_cst (TREE_TYPE (fpr),
4873 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4874 + REGPARM_MAX * 8);
4875 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4876 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4877 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4878 gimplify_and_add (t, pre_p);
4881 /* Compute index to start of area used for integer regs. */
4882 if (needed_intregs)
4884 /* int_addr = gpr + sav; */
4885 t = fold_convert (ptr_type_node, gpr);
4886 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4887 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
4888 gimplify_and_add (t, pre_p);
4890 if (needed_sseregs)
4892 /* sse_addr = fpr + sav; */
4893 t = fold_convert (ptr_type_node, fpr);
4894 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4895 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
4896 gimplify_and_add (t, pre_p);
4898 if (need_temp)
4900 int i;
4901 tree temp = create_tmp_var (type, "va_arg_tmp");
4903 /* addr = &temp; */
4904 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4905 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
4906 gimplify_and_add (t, pre_p);
4908 for (i = 0; i < XVECLEN (container, 0); i++)
4910 rtx slot = XVECEXP (container, 0, i);
4911 rtx reg = XEXP (slot, 0);
4912 enum machine_mode mode = GET_MODE (reg);
4913 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4914 tree addr_type = build_pointer_type (piece_type);
4915 tree src_addr, src;
4916 int src_offset;
4917 tree dest_addr, dest;
4919 if (SSE_REGNO_P (REGNO (reg)))
4921 src_addr = sse_addr;
4922 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4924 else
4926 src_addr = int_addr;
4927 src_offset = REGNO (reg) * 8;
4929 src_addr = fold_convert (addr_type, src_addr);
4930 src_addr = fold_build2 (PLUS_EXPR, addr_type, src_addr,
4931 size_int (src_offset));
4932 src = build_va_arg_indirect_ref (src_addr);
4934 dest_addr = fold_convert (addr_type, addr);
4935 dest_addr = fold_build2 (PLUS_EXPR, addr_type, dest_addr,
4936 size_int (INTVAL (XEXP (slot, 1))));
4937 dest = build_va_arg_indirect_ref (dest_addr);
4939 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
4940 gimplify_and_add (t, pre_p);
4944 if (needed_intregs)
4946 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4947 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4948 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
4949 gimplify_and_add (t, pre_p);
4951 if (needed_sseregs)
4953 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4954 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4955 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
4956 gimplify_and_add (t, pre_p);
4959 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4960 gimplify_and_add (t, pre_p);
4962 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4963 append_to_statement_list (t, pre_p);
4966 /* ... otherwise out of the overflow area. */
4968 /* Care for on-stack alignment if needed. */
4969 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4970 || integer_zerop (TYPE_SIZE (type)))
4971 t = ovf;
4972 else
4974 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4975 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4976 build_int_cst (TREE_TYPE (ovf), align - 1));
4977 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4978 build_int_cst (TREE_TYPE (t), -align));
4980 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4982 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
4983 gimplify_and_add (t2, pre_p);
4985 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4986 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4987 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
4988 gimplify_and_add (t, pre_p);
4990 if (container)
4992 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4993 append_to_statement_list (t, pre_p);
4996 ptrtype = build_pointer_type (type);
4997 addr = fold_convert (ptrtype, addr);
4999 if (indirect_p)
5000 addr = build_va_arg_indirect_ref (addr);
5001 return build_va_arg_indirect_ref (addr);
5004 /* Return nonzero if OPNUM's MEM should be matched
5005 in movabs* patterns. */
5008 ix86_check_movabs (rtx insn, int opnum)
5010 rtx set, mem;
5012 set = PATTERN (insn);
5013 if (GET_CODE (set) == PARALLEL)
5014 set = XVECEXP (set, 0, 0);
5015 gcc_assert (GET_CODE (set) == SET);
5016 mem = XEXP (set, opnum);
5017 while (GET_CODE (mem) == SUBREG)
5018 mem = SUBREG_REG (mem);
5019 gcc_assert (MEM_P (mem));
5020 return (volatile_ok || !MEM_VOLATILE_P (mem));
5023 /* Initialize the table of extra 80387 mathematical constants. */
5025 static void
5026 init_ext_80387_constants (void)
5028 static const char * cst[5] =
5030 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5031 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5032 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5033 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5034 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5036 int i;
5038 for (i = 0; i < 5; i++)
5040 real_from_string (&ext_80387_constants_table[i], cst[i]);
5041 /* Ensure each constant is rounded to XFmode precision. */
5042 real_convert (&ext_80387_constants_table[i],
5043 XFmode, &ext_80387_constants_table[i]);
5046 ext_80387_constants_init = 1;
5049 /* Return true if the constant is something that can be loaded with
5050 a special instruction. */
5053 standard_80387_constant_p (rtx x)
5055 REAL_VALUE_TYPE r;
5057 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
5058 return -1;
5060 if (x == CONST0_RTX (GET_MODE (x)))
5061 return 1;
5062 if (x == CONST1_RTX (GET_MODE (x)))
5063 return 2;
5065 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5067 /* For XFmode constants, try to find a special 80387 instruction when
5068 optimizing for size or on those CPUs that benefit from them. */
5069 if (GET_MODE (x) == XFmode
5070 && (optimize_size || TARGET_EXT_80387_CONSTANTS))
5072 int i;
5074 if (! ext_80387_constants_init)
5075 init_ext_80387_constants ();
5077 for (i = 0; i < 5; i++)
5078 if (real_identical (&r, &ext_80387_constants_table[i]))
5079 return i + 3;
5082 /* Load of the constant -0.0 or -1.0 will be split as
5083 fldz;fchs or fld1;fchs sequence. */
5084 if (real_isnegzero (&r))
5085 return 8;
5086 if (real_identical (&r, &dconstm1))
5087 return 9;
5089 return 0;
5092 /* Return the opcode of the special instruction to be used to load
5093 the constant X. */
5095 const char *
5096 standard_80387_constant_opcode (rtx x)
5098 switch (standard_80387_constant_p (x))
5100 case 1:
5101 return "fldz";
5102 case 2:
5103 return "fld1";
5104 case 3:
5105 return "fldlg2";
5106 case 4:
5107 return "fldln2";
5108 case 5:
5109 return "fldl2e";
5110 case 6:
5111 return "fldl2t";
5112 case 7:
5113 return "fldpi";
5114 case 8:
5115 case 9:
5116 return "#";
5117 default:
5118 gcc_unreachable ();
5122 /* Return the CONST_DOUBLE representing the 80387 constant that is
5123 loaded by the specified special instruction. The argument IDX
5124 matches the return value from standard_80387_constant_p. */
5127 standard_80387_constant_rtx (int idx)
5129 int i;
5131 if (! ext_80387_constants_init)
5132 init_ext_80387_constants ();
5134 switch (idx)
5136 case 3:
5137 case 4:
5138 case 5:
5139 case 6:
5140 case 7:
5141 i = idx - 3;
5142 break;
5144 default:
5145 gcc_unreachable ();
5148 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5149 XFmode);
5152 /* Return 1 if mode is a valid mode for sse. */
5153 static int
5154 standard_sse_mode_p (enum machine_mode mode)
5156 switch (mode)
5158 case V16QImode:
5159 case V8HImode:
5160 case V4SImode:
5161 case V2DImode:
5162 case V4SFmode:
5163 case V2DFmode:
5164 return 1;
5166 default:
5167 return 0;
5171 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5174 standard_sse_constant_p (rtx x)
5176 enum machine_mode mode = GET_MODE (x);
5178 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5179 return 1;
5180 if (vector_all_ones_operand (x, mode)
5181 && standard_sse_mode_p (mode))
5182 return TARGET_SSE2 ? 2 : -1;
5184 return 0;
5187 /* Return the opcode of the special instruction to be used to load
5188 the constant X. */
5190 const char *
5191 standard_sse_constant_opcode (rtx insn, rtx x)
5193 switch (standard_sse_constant_p (x))
5195 case 1:
5196 if (get_attr_mode (insn) == MODE_V4SF)
5197 return "xorps\t%0, %0";
5198 else if (get_attr_mode (insn) == MODE_V2DF)
5199 return "xorpd\t%0, %0";
5200 else
5201 return "pxor\t%0, %0";
5202 case 2:
5203 return "pcmpeqd\t%0, %0";
5205 gcc_unreachable ();
5208 /* Returns 1 if OP contains a symbol reference */
5211 symbolic_reference_mentioned_p (rtx op)
5213 const char *fmt;
5214 int i;
5216 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5217 return 1;
5219 fmt = GET_RTX_FORMAT (GET_CODE (op));
5220 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5222 if (fmt[i] == 'E')
5224 int j;
5226 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5227 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5228 return 1;
5231 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5232 return 1;
5235 return 0;
5238 /* Return 1 if it is appropriate to emit `ret' instructions in the
5239 body of a function. Do this only if the epilogue is simple, needing a
5240 couple of insns. Prior to reloading, we can't tell how many registers
5241 must be saved, so return 0 then. Return 0 if there is no frame
5242 marker to de-allocate. */
5245 ix86_can_use_return_insn_p (void)
5247 struct ix86_frame frame;
5249 if (! reload_completed || frame_pointer_needed)
5250 return 0;
5252 /* Don't allow more than 32 pop, since that's all we can do
5253 with one instruction. */
5254 if (current_function_pops_args
5255 && current_function_args_size >= 32768)
5256 return 0;
5258 ix86_compute_frame_layout (&frame);
5259 return frame.to_allocate == 0 && frame.nregs == 0;
5262 /* Value should be nonzero if functions must have frame pointers.
5263 Zero means the frame pointer need not be set up (and parms may
5264 be accessed via the stack pointer) in functions that seem suitable. */
5267 ix86_frame_pointer_required (void)
5269 /* If we accessed previous frames, then the generated code expects
5270 to be able to access the saved ebp value in our frame. */
5271 if (cfun->machine->accesses_prev_frame)
5272 return 1;
5274 /* Several x86 os'es need a frame pointer for other reasons,
5275 usually pertaining to setjmp. */
5276 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5277 return 1;
5279 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5280 the frame pointer by default. Turn it back on now if we've not
5281 got a leaf function. */
5282 if (TARGET_OMIT_LEAF_FRAME_POINTER
5283 && (!current_function_is_leaf
5284 || ix86_current_function_calls_tls_descriptor))
5285 return 1;
5287 if (current_function_profile)
5288 return 1;
5290 return 0;
5293 /* Record that the current function accesses previous call frames. */
5295 void
5296 ix86_setup_frame_addresses (void)
5298 cfun->machine->accesses_prev_frame = 1;
5301 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5302 # define USE_HIDDEN_LINKONCE 1
5303 #else
5304 # define USE_HIDDEN_LINKONCE 0
5305 #endif
5307 static int pic_labels_used;
5309 /* Fills in the label name that should be used for a pc thunk for
5310 the given register. */
5312 static void
5313 get_pc_thunk_name (char name[32], unsigned int regno)
5315 gcc_assert (!TARGET_64BIT);
5317 if (USE_HIDDEN_LINKONCE)
5318 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5319 else
5320 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5324 /* This function generates code for -fpic that loads %ebx with
5325 the return address of the caller and then returns. */
5327 void
5328 ix86_file_end (void)
5330 rtx xops[2];
5331 int regno;
5333 for (regno = 0; regno < 8; ++regno)
5335 char name[32];
5337 if (! ((pic_labels_used >> regno) & 1))
5338 continue;
5340 get_pc_thunk_name (name, regno);
5342 #if TARGET_MACHO
5343 if (TARGET_MACHO)
5345 switch_to_section (darwin_sections[text_coal_section]);
5346 fputs ("\t.weak_definition\t", asm_out_file);
5347 assemble_name (asm_out_file, name);
5348 fputs ("\n\t.private_extern\t", asm_out_file);
5349 assemble_name (asm_out_file, name);
5350 fputs ("\n", asm_out_file);
5351 ASM_OUTPUT_LABEL (asm_out_file, name);
5353 else
5354 #endif
5355 if (USE_HIDDEN_LINKONCE)
5357 tree decl;
5359 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5360 error_mark_node);
5361 TREE_PUBLIC (decl) = 1;
5362 TREE_STATIC (decl) = 1;
5363 DECL_ONE_ONLY (decl) = 1;
5365 (*targetm.asm_out.unique_section) (decl, 0);
5366 switch_to_section (get_named_section (decl, NULL, 0));
5368 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5369 fputs ("\t.hidden\t", asm_out_file);
5370 assemble_name (asm_out_file, name);
5371 fputc ('\n', asm_out_file);
5372 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5374 else
5376 switch_to_section (text_section);
5377 ASM_OUTPUT_LABEL (asm_out_file, name);
5380 xops[0] = gen_rtx_REG (SImode, regno);
5381 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5382 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5383 output_asm_insn ("ret", xops);
5386 if (NEED_INDICATE_EXEC_STACK)
5387 file_end_indicate_exec_stack ();
5390 /* Emit code for the SET_GOT patterns. */
5392 const char *
5393 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5395 rtx xops[3];
5397 xops[0] = dest;
5399 if (TARGET_VXWORKS_RTP && flag_pic)
5401 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5402 xops[2] = gen_rtx_MEM (Pmode,
5403 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
5404 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5406 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5407 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5408 an unadorned address. */
5409 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5410 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
5411 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
5412 return "";
5415 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5417 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5419 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5421 if (!flag_pic)
5422 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5423 else
5424 output_asm_insn ("call\t%a2", xops);
5426 #if TARGET_MACHO
5427 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5428 is what will be referenced by the Mach-O PIC subsystem. */
5429 if (!label)
5430 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5431 #endif
5433 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5434 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5436 if (flag_pic)
5437 output_asm_insn ("pop{l}\t%0", xops);
5439 else
5441 char name[32];
5442 get_pc_thunk_name (name, REGNO (dest));
5443 pic_labels_used |= 1 << REGNO (dest);
5445 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5446 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5447 output_asm_insn ("call\t%X2", xops);
5448 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5449 is what will be referenced by the Mach-O PIC subsystem. */
5450 #if TARGET_MACHO
5451 if (!label)
5452 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5453 else
5454 targetm.asm_out.internal_label (asm_out_file, "L",
5455 CODE_LABEL_NUMBER (label));
5456 #endif
5459 if (TARGET_MACHO)
5460 return "";
5462 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5463 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5464 else
5465 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5467 return "";
5470 /* Generate an "push" pattern for input ARG. */
5472 static rtx
5473 gen_push (rtx arg)
5475 return gen_rtx_SET (VOIDmode,
5476 gen_rtx_MEM (Pmode,
5477 gen_rtx_PRE_DEC (Pmode,
5478 stack_pointer_rtx)),
5479 arg);
5482 /* Return >= 0 if there is an unused call-clobbered register available
5483 for the entire function. */
5485 static unsigned int
5486 ix86_select_alt_pic_regnum (void)
5488 if (current_function_is_leaf && !current_function_profile
5489 && !ix86_current_function_calls_tls_descriptor)
5491 int i;
5492 for (i = 2; i >= 0; --i)
5493 if (!regs_ever_live[i])
5494 return i;
5497 return INVALID_REGNUM;
5500 /* Return 1 if we need to save REGNO. */
5501 static int
5502 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5504 if (pic_offset_table_rtx
5505 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5506 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5507 || current_function_profile
5508 || current_function_calls_eh_return
5509 || current_function_uses_const_pool))
5511 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5512 return 0;
5513 return 1;
5516 if (current_function_calls_eh_return && maybe_eh_return)
5518 unsigned i;
5519 for (i = 0; ; i++)
5521 unsigned test = EH_RETURN_DATA_REGNO (i);
5522 if (test == INVALID_REGNUM)
5523 break;
5524 if (test == regno)
5525 return 1;
5529 if (cfun->machine->force_align_arg_pointer
5530 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5531 return 1;
5533 return (regs_ever_live[regno]
5534 && !call_used_regs[regno]
5535 && !fixed_regs[regno]
5536 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5539 /* Return number of registers to be saved on the stack. */
5541 static int
5542 ix86_nsaved_regs (void)
5544 int nregs = 0;
5545 int regno;
5547 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5548 if (ix86_save_reg (regno, true))
5549 nregs++;
5550 return nregs;
5553 /* Return the offset between two registers, one to be eliminated, and the other
5554 its replacement, at the start of a routine. */
5556 HOST_WIDE_INT
5557 ix86_initial_elimination_offset (int from, int to)
5559 struct ix86_frame frame;
5560 ix86_compute_frame_layout (&frame);
5562 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5563 return frame.hard_frame_pointer_offset;
5564 else if (from == FRAME_POINTER_REGNUM
5565 && to == HARD_FRAME_POINTER_REGNUM)
5566 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5567 else
5569 gcc_assert (to == STACK_POINTER_REGNUM);
5571 if (from == ARG_POINTER_REGNUM)
5572 return frame.stack_pointer_offset;
5574 gcc_assert (from == FRAME_POINTER_REGNUM);
5575 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5579 /* Fill structure ix86_frame about frame of currently computed function. */
5581 static void
5582 ix86_compute_frame_layout (struct ix86_frame *frame)
5584 HOST_WIDE_INT total_size;
5585 unsigned int stack_alignment_needed;
5586 HOST_WIDE_INT offset;
5587 unsigned int preferred_alignment;
5588 HOST_WIDE_INT size = get_frame_size ();
5590 frame->nregs = ix86_nsaved_regs ();
5591 total_size = size;
5593 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5594 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5596 /* During reload iteration the amount of registers saved can change.
5597 Recompute the value as needed. Do not recompute when amount of registers
5598 didn't change as reload does multiple calls to the function and does not
5599 expect the decision to change within single iteration. */
5600 if (!optimize_size
5601 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5603 int count = frame->nregs;
5605 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5606 /* The fast prologue uses move instead of push to save registers. This
5607 is significantly longer, but also executes faster as modern hardware
5608 can execute the moves in parallel, but can't do that for push/pop.
5610 Be careful about choosing what prologue to emit: When function takes
5611 many instructions to execute we may use slow version as well as in
5612 case function is known to be outside hot spot (this is known with
5613 feedback only). Weight the size of function by number of registers
5614 to save as it is cheap to use one or two push instructions but very
5615 slow to use many of them. */
5616 if (count)
5617 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5618 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5619 || (flag_branch_probabilities
5620 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5621 cfun->machine->use_fast_prologue_epilogue = false;
5622 else
5623 cfun->machine->use_fast_prologue_epilogue
5624 = !expensive_function_p (count);
5626 if (TARGET_PROLOGUE_USING_MOVE
5627 && cfun->machine->use_fast_prologue_epilogue)
5628 frame->save_regs_using_mov = true;
5629 else
5630 frame->save_regs_using_mov = false;
5633 /* Skip return address and saved base pointer. */
5634 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5636 frame->hard_frame_pointer_offset = offset;
5638 /* Do some sanity checking of stack_alignment_needed and
5639 preferred_alignment, since i386 port is the only using those features
5640 that may break easily. */
5642 gcc_assert (!size || stack_alignment_needed);
5643 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5644 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5645 gcc_assert (stack_alignment_needed
5646 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5648 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5649 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5651 /* Register save area */
5652 offset += frame->nregs * UNITS_PER_WORD;
5654 /* Va-arg area */
5655 if (ix86_save_varrargs_registers)
5657 offset += X86_64_VARARGS_SIZE;
5658 frame->va_arg_size = X86_64_VARARGS_SIZE;
5660 else
5661 frame->va_arg_size = 0;
5663 /* Align start of frame for local function. */
5664 frame->padding1 = ((offset + stack_alignment_needed - 1)
5665 & -stack_alignment_needed) - offset;
5667 offset += frame->padding1;
5669 /* Frame pointer points here. */
5670 frame->frame_pointer_offset = offset;
5672 offset += size;
5674 /* Add outgoing arguments area. Can be skipped if we eliminated
5675 all the function calls as dead code.
5676 Skipping is however impossible when function calls alloca. Alloca
5677 expander assumes that last current_function_outgoing_args_size
5678 of stack frame are unused. */
5679 if (ACCUMULATE_OUTGOING_ARGS
5680 && (!current_function_is_leaf || current_function_calls_alloca
5681 || ix86_current_function_calls_tls_descriptor))
5683 offset += current_function_outgoing_args_size;
5684 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5686 else
5687 frame->outgoing_arguments_size = 0;
5689 /* Align stack boundary. Only needed if we're calling another function
5690 or using alloca. */
5691 if (!current_function_is_leaf || current_function_calls_alloca
5692 || ix86_current_function_calls_tls_descriptor)
5693 frame->padding2 = ((offset + preferred_alignment - 1)
5694 & -preferred_alignment) - offset;
5695 else
5696 frame->padding2 = 0;
5698 offset += frame->padding2;
5700 /* We've reached end of stack frame. */
5701 frame->stack_pointer_offset = offset;
5703 /* Size prologue needs to allocate. */
5704 frame->to_allocate =
5705 (size + frame->padding1 + frame->padding2
5706 + frame->outgoing_arguments_size + frame->va_arg_size);
5708 if ((!frame->to_allocate && frame->nregs <= 1)
5709 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5710 frame->save_regs_using_mov = false;
5712 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5713 && current_function_is_leaf
5714 && !ix86_current_function_calls_tls_descriptor)
5716 frame->red_zone_size = frame->to_allocate;
5717 if (frame->save_regs_using_mov)
5718 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5719 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5720 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5722 else
5723 frame->red_zone_size = 0;
5724 frame->to_allocate -= frame->red_zone_size;
5725 frame->stack_pointer_offset -= frame->red_zone_size;
5726 #if 0
5727 fprintf (stderr, "\n");
5728 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
5729 fprintf (stderr, "size: %ld\n", (long)size);
5730 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
5731 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
5732 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
5733 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
5734 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
5735 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
5736 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
5737 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
5738 (long)frame->hard_frame_pointer_offset);
5739 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
5740 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
5741 fprintf (stderr, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca);
5742 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
5743 #endif
5746 /* Emit code to save registers in the prologue. */
5748 static void
5749 ix86_emit_save_regs (void)
5751 unsigned int regno;
5752 rtx insn;
5754 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5755 if (ix86_save_reg (regno, true))
5757 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5758 RTX_FRAME_RELATED_P (insn) = 1;
5762 /* Emit code to save registers using MOV insns. First register
5763 is restored from POINTER + OFFSET. */
5764 static void
5765 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5767 unsigned int regno;
5768 rtx insn;
5770 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5771 if (ix86_save_reg (regno, true))
5773 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5774 Pmode, offset),
5775 gen_rtx_REG (Pmode, regno));
5776 RTX_FRAME_RELATED_P (insn) = 1;
5777 offset += UNITS_PER_WORD;
5781 /* Expand prologue or epilogue stack adjustment.
5782 The pattern exist to put a dependency on all ebp-based memory accesses.
5783 STYLE should be negative if instructions should be marked as frame related,
5784 zero if %r11 register is live and cannot be freely used and positive
5785 otherwise. */
5787 static void
5788 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5790 rtx insn;
5792 if (! TARGET_64BIT)
5793 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5794 else if (x86_64_immediate_operand (offset, DImode))
5795 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5796 else
5798 rtx r11;
5799 /* r11 is used by indirect sibcall return as well, set before the
5800 epilogue and used after the epilogue. ATM indirect sibcall
5801 shouldn't be used together with huge frame sizes in one
5802 function because of the frame_size check in sibcall.c. */
5803 gcc_assert (style);
5804 r11 = gen_rtx_REG (DImode, R11_REG);
5805 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5806 if (style < 0)
5807 RTX_FRAME_RELATED_P (insn) = 1;
5808 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5809 offset));
5811 if (style < 0)
5812 RTX_FRAME_RELATED_P (insn) = 1;
5815 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5817 static rtx
5818 ix86_internal_arg_pointer (void)
5820 bool has_force_align_arg_pointer =
5821 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5822 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5823 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5824 && DECL_NAME (current_function_decl)
5825 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5826 && DECL_FILE_SCOPE_P (current_function_decl))
5827 || ix86_force_align_arg_pointer
5828 || has_force_align_arg_pointer)
5830 /* Nested functions can't realign the stack due to a register
5831 conflict. */
5832 if (DECL_CONTEXT (current_function_decl)
5833 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5835 if (ix86_force_align_arg_pointer)
5836 warning (0, "-mstackrealign ignored for nested functions");
5837 if (has_force_align_arg_pointer)
5838 error ("%s not supported for nested functions",
5839 ix86_force_align_arg_pointer_string);
5840 return virtual_incoming_args_rtx;
5842 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5843 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5845 else
5846 return virtual_incoming_args_rtx;
5849 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5850 This is called from dwarf2out.c to emit call frame instructions
5851 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5852 static void
5853 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5855 rtx unspec = SET_SRC (pattern);
5856 gcc_assert (GET_CODE (unspec) == UNSPEC);
5858 switch (index)
5860 case UNSPEC_REG_SAVE:
5861 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5862 SET_DEST (pattern));
5863 break;
5864 case UNSPEC_DEF_CFA:
5865 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5866 INTVAL (XVECEXP (unspec, 0, 0)));
5867 break;
5868 default:
5869 gcc_unreachable ();
5873 /* Expand the prologue into a bunch of separate insns. */
5875 void
5876 ix86_expand_prologue (void)
5878 rtx insn;
5879 bool pic_reg_used;
5880 struct ix86_frame frame;
5881 HOST_WIDE_INT allocate;
5883 ix86_compute_frame_layout (&frame);
5885 if (cfun->machine->force_align_arg_pointer)
5887 rtx x, y;
5889 /* Grab the argument pointer. */
5890 x = plus_constant (stack_pointer_rtx, 4);
5891 y = cfun->machine->force_align_arg_pointer;
5892 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5893 RTX_FRAME_RELATED_P (insn) = 1;
5895 /* The unwind info consists of two parts: install the fafp as the cfa,
5896 and record the fafp as the "save register" of the stack pointer.
5897 The later is there in order that the unwinder can see where it
5898 should restore the stack pointer across the and insn. */
5899 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5900 x = gen_rtx_SET (VOIDmode, y, x);
5901 RTX_FRAME_RELATED_P (x) = 1;
5902 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5903 UNSPEC_REG_SAVE);
5904 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5905 RTX_FRAME_RELATED_P (y) = 1;
5906 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5907 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5908 REG_NOTES (insn) = x;
5910 /* Align the stack. */
5911 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5912 GEN_INT (-16)));
5914 /* And here we cheat like madmen with the unwind info. We force the
5915 cfa register back to sp+4, which is exactly what it was at the
5916 start of the function. Re-pushing the return address results in
5917 the return at the same spot relative to the cfa, and thus is
5918 correct wrt the unwind info. */
5919 x = cfun->machine->force_align_arg_pointer;
5920 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5921 insn = emit_insn (gen_push (x));
5922 RTX_FRAME_RELATED_P (insn) = 1;
5924 x = GEN_INT (4);
5925 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5926 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5927 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5928 REG_NOTES (insn) = x;
5931 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5932 slower on all targets. Also sdb doesn't like it. */
5934 if (frame_pointer_needed)
5936 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5937 RTX_FRAME_RELATED_P (insn) = 1;
5939 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5940 RTX_FRAME_RELATED_P (insn) = 1;
5943 allocate = frame.to_allocate;
5945 if (!frame.save_regs_using_mov)
5946 ix86_emit_save_regs ();
5947 else
5948 allocate += frame.nregs * UNITS_PER_WORD;
5950 /* When using red zone we may start register saving before allocating
5951 the stack frame saving one cycle of the prologue. */
5952 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5953 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5954 : stack_pointer_rtx,
5955 -frame.nregs * UNITS_PER_WORD);
5957 if (allocate == 0)
5959 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5960 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5961 GEN_INT (-allocate), -1);
5962 else
5964 /* Only valid for Win32. */
5965 rtx eax = gen_rtx_REG (SImode, 0);
5966 bool eax_live = ix86_eax_live_at_start_p ();
5967 rtx t;
5969 gcc_assert (!TARGET_64BIT);
5971 if (eax_live)
5973 emit_insn (gen_push (eax));
5974 allocate -= 4;
5977 emit_move_insn (eax, GEN_INT (allocate));
5979 insn = emit_insn (gen_allocate_stack_worker (eax));
5980 RTX_FRAME_RELATED_P (insn) = 1;
5981 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5982 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5983 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5984 t, REG_NOTES (insn));
5986 if (eax_live)
5988 if (frame_pointer_needed)
5989 t = plus_constant (hard_frame_pointer_rtx,
5990 allocate
5991 - frame.to_allocate
5992 - frame.nregs * UNITS_PER_WORD);
5993 else
5994 t = plus_constant (stack_pointer_rtx, allocate);
5995 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5999 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
6001 if (!frame_pointer_needed || !frame.to_allocate)
6002 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
6003 else
6004 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
6005 -frame.nregs * UNITS_PER_WORD);
6008 pic_reg_used = false;
6009 if (pic_offset_table_rtx
6010 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
6011 || current_function_profile))
6013 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
6015 if (alt_pic_reg_used != INVALID_REGNUM)
6016 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
6018 pic_reg_used = true;
6021 if (pic_reg_used)
6023 if (TARGET_64BIT)
6025 if (ix86_cmodel == CM_LARGE_PIC)
6027 rtx tmp_reg = gen_rtx_REG (DImode,
6028 FIRST_REX_INT_REG + 3 /* R11 */);
6029 rtx label = gen_label_rtx ();
6030 emit_label (label);
6031 LABEL_PRESERVE_P (label) = 1;
6032 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
6033 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
6034 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
6035 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6036 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
6037 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
6038 pic_offset_table_rtx, tmp_reg));
6040 else
6041 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6043 else
6044 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
6046 /* Even with accurate pre-reload life analysis, we can wind up
6047 deleting all references to the pic register after reload.
6048 Consider if cross-jumping unifies two sides of a branch
6049 controlled by a comparison vs the only read from a global.
6050 In which case, allow the set_got to be deleted, though we're
6051 too late to do anything about the ebx save in the prologue. */
6052 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
6055 /* Prevent function calls from be scheduled before the call to mcount.
6056 In the pic_reg_used case, make sure that the got load isn't deleted. */
6057 if (current_function_profile)
6058 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
6061 /* Emit code to restore saved registers using MOV insns. First register
6062 is restored from POINTER + OFFSET. */
6063 static void
6064 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
6065 int maybe_eh_return)
6067 int regno;
6068 rtx base_address = gen_rtx_MEM (Pmode, pointer);
6070 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6071 if (ix86_save_reg (regno, maybe_eh_return))
6073 /* Ensure that adjust_address won't be forced to produce pointer
6074 out of range allowed by x86-64 instruction set. */
6075 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
6077 rtx r11;
6079 r11 = gen_rtx_REG (DImode, R11_REG);
6080 emit_move_insn (r11, GEN_INT (offset));
6081 emit_insn (gen_adddi3 (r11, r11, pointer));
6082 base_address = gen_rtx_MEM (Pmode, r11);
6083 offset = 0;
6085 emit_move_insn (gen_rtx_REG (Pmode, regno),
6086 adjust_address (base_address, Pmode, offset));
6087 offset += UNITS_PER_WORD;
6091 /* Restore function stack, frame, and registers. */
6093 void
6094 ix86_expand_epilogue (int style)
6096 int regno;
6097 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
6098 struct ix86_frame frame;
6099 HOST_WIDE_INT offset;
6101 ix86_compute_frame_layout (&frame);
6103 /* Calculate start of saved registers relative to ebp. Special care
6104 must be taken for the normal return case of a function using
6105 eh_return: the eax and edx registers are marked as saved, but not
6106 restored along this path. */
6107 offset = frame.nregs;
6108 if (current_function_calls_eh_return && style != 2)
6109 offset -= 2;
6110 offset *= -UNITS_PER_WORD;
6112 /* If we're only restoring one register and sp is not valid then
6113 using a move instruction to restore the register since it's
6114 less work than reloading sp and popping the register.
6116 The default code result in stack adjustment using add/lea instruction,
6117 while this code results in LEAVE instruction (or discrete equivalent),
6118 so it is profitable in some other cases as well. Especially when there
6119 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6120 and there is exactly one register to pop. This heuristic may need some
6121 tuning in future. */
6122 if ((!sp_valid && frame.nregs <= 1)
6123 || (TARGET_EPILOGUE_USING_MOVE
6124 && cfun->machine->use_fast_prologue_epilogue
6125 && (frame.nregs > 1 || frame.to_allocate))
6126 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
6127 || (frame_pointer_needed && TARGET_USE_LEAVE
6128 && cfun->machine->use_fast_prologue_epilogue
6129 && frame.nregs == 1)
6130 || current_function_calls_eh_return)
6132 /* Restore registers. We can use ebp or esp to address the memory
6133 locations. If both are available, default to ebp, since offsets
6134 are known to be small. Only exception is esp pointing directly to the
6135 end of block of saved registers, where we may simplify addressing
6136 mode. */
6138 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
6139 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
6140 frame.to_allocate, style == 2);
6141 else
6142 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
6143 offset, style == 2);
6145 /* eh_return epilogues need %ecx added to the stack pointer. */
6146 if (style == 2)
6148 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
6150 if (frame_pointer_needed)
6152 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
6153 tmp = plus_constant (tmp, UNITS_PER_WORD);
6154 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
6156 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
6157 emit_move_insn (hard_frame_pointer_rtx, tmp);
6159 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
6160 const0_rtx, style);
6162 else
6164 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
6165 tmp = plus_constant (tmp, (frame.to_allocate
6166 + frame.nregs * UNITS_PER_WORD));
6167 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
6170 else if (!frame_pointer_needed)
6171 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6172 GEN_INT (frame.to_allocate
6173 + frame.nregs * UNITS_PER_WORD),
6174 style);
6175 /* If not an i386, mov & pop is faster than "leave". */
6176 else if (TARGET_USE_LEAVE || optimize_size
6177 || !cfun->machine->use_fast_prologue_epilogue)
6178 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6179 else
6181 pro_epilogue_adjust_stack (stack_pointer_rtx,
6182 hard_frame_pointer_rtx,
6183 const0_rtx, style);
6184 if (TARGET_64BIT)
6185 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6186 else
6187 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6190 else
6192 /* First step is to deallocate the stack frame so that we can
6193 pop the registers. */
6194 if (!sp_valid)
6196 gcc_assert (frame_pointer_needed);
6197 pro_epilogue_adjust_stack (stack_pointer_rtx,
6198 hard_frame_pointer_rtx,
6199 GEN_INT (offset), style);
6201 else if (frame.to_allocate)
6202 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6203 GEN_INT (frame.to_allocate), style);
6205 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6206 if (ix86_save_reg (regno, false))
6208 if (TARGET_64BIT)
6209 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6210 else
6211 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6213 if (frame_pointer_needed)
6215 /* Leave results in shorter dependency chains on CPUs that are
6216 able to grok it fast. */
6217 if (TARGET_USE_LEAVE)
6218 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6219 else if (TARGET_64BIT)
6220 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6221 else
6222 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6226 if (cfun->machine->force_align_arg_pointer)
6228 emit_insn (gen_addsi3 (stack_pointer_rtx,
6229 cfun->machine->force_align_arg_pointer,
6230 GEN_INT (-4)));
6233 /* Sibcall epilogues don't want a return instruction. */
6234 if (style == 0)
6235 return;
6237 if (current_function_pops_args && current_function_args_size)
6239 rtx popc = GEN_INT (current_function_pops_args);
6241 /* i386 can only pop 64K bytes. If asked to pop more, pop
6242 return address, do explicit add, and jump indirectly to the
6243 caller. */
6245 if (current_function_pops_args >= 65536)
6247 rtx ecx = gen_rtx_REG (SImode, 2);
6249 /* There is no "pascal" calling convention in 64bit ABI. */
6250 gcc_assert (!TARGET_64BIT);
6252 emit_insn (gen_popsi1 (ecx));
6253 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6254 emit_jump_insn (gen_return_indirect_internal (ecx));
6256 else
6257 emit_jump_insn (gen_return_pop_internal (popc));
6259 else
6260 emit_jump_insn (gen_return_internal ());
6263 /* Reset from the function's potential modifications. */
6265 static void
6266 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6267 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6269 if (pic_offset_table_rtx)
6270 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
6271 #if TARGET_MACHO
6272 /* Mach-O doesn't support labels at the end of objects, so if
6273 it looks like we might want one, insert a NOP. */
6275 rtx insn = get_last_insn ();
6276 while (insn
6277 && NOTE_P (insn)
6278 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
6279 insn = PREV_INSN (insn);
6280 if (insn
6281 && (LABEL_P (insn)
6282 || (NOTE_P (insn)
6283 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
6284 fputs ("\tnop\n", file);
6286 #endif
6290 /* Extract the parts of an RTL expression that is a valid memory address
6291 for an instruction. Return 0 if the structure of the address is
6292 grossly off. Return -1 if the address contains ASHIFT, so it is not
6293 strictly valid, but still used for computing length of lea instruction. */
6296 ix86_decompose_address (rtx addr, struct ix86_address *out)
6298 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6299 rtx base_reg, index_reg;
6300 HOST_WIDE_INT scale = 1;
6301 rtx scale_rtx = NULL_RTX;
6302 int retval = 1;
6303 enum ix86_address_seg seg = SEG_DEFAULT;
6305 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
6306 base = addr;
6307 else if (GET_CODE (addr) == PLUS)
6309 rtx addends[4], op;
6310 int n = 0, i;
6312 op = addr;
6315 if (n >= 4)
6316 return 0;
6317 addends[n++] = XEXP (op, 1);
6318 op = XEXP (op, 0);
6320 while (GET_CODE (op) == PLUS);
6321 if (n >= 4)
6322 return 0;
6323 addends[n] = op;
6325 for (i = n; i >= 0; --i)
6327 op = addends[i];
6328 switch (GET_CODE (op))
6330 case MULT:
6331 if (index)
6332 return 0;
6333 index = XEXP (op, 0);
6334 scale_rtx = XEXP (op, 1);
6335 break;
6337 case UNSPEC:
6338 if (XINT (op, 1) == UNSPEC_TP
6339 && TARGET_TLS_DIRECT_SEG_REFS
6340 && seg == SEG_DEFAULT)
6341 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6342 else
6343 return 0;
6344 break;
6346 case REG:
6347 case SUBREG:
6348 if (!base)
6349 base = op;
6350 else if (!index)
6351 index = op;
6352 else
6353 return 0;
6354 break;
6356 case CONST:
6357 case CONST_INT:
6358 case SYMBOL_REF:
6359 case LABEL_REF:
6360 if (disp)
6361 return 0;
6362 disp = op;
6363 break;
6365 default:
6366 return 0;
6370 else if (GET_CODE (addr) == MULT)
6372 index = XEXP (addr, 0); /* index*scale */
6373 scale_rtx = XEXP (addr, 1);
6375 else if (GET_CODE (addr) == ASHIFT)
6377 rtx tmp;
6379 /* We're called for lea too, which implements ashift on occasion. */
6380 index = XEXP (addr, 0);
6381 tmp = XEXP (addr, 1);
6382 if (!CONST_INT_P (tmp))
6383 return 0;
6384 scale = INTVAL (tmp);
6385 if ((unsigned HOST_WIDE_INT) scale > 3)
6386 return 0;
6387 scale = 1 << scale;
6388 retval = -1;
6390 else
6391 disp = addr; /* displacement */
6393 /* Extract the integral value of scale. */
6394 if (scale_rtx)
6396 if (!CONST_INT_P (scale_rtx))
6397 return 0;
6398 scale = INTVAL (scale_rtx);
6401 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6402 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6404 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6405 if (base_reg && index_reg && scale == 1
6406 && (index_reg == arg_pointer_rtx
6407 || index_reg == frame_pointer_rtx
6408 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6410 rtx tmp;
6411 tmp = base, base = index, index = tmp;
6412 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6415 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6416 if ((base_reg == hard_frame_pointer_rtx
6417 || base_reg == frame_pointer_rtx
6418 || base_reg == arg_pointer_rtx) && !disp)
6419 disp = const0_rtx;
6421 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6422 Avoid this by transforming to [%esi+0]. */
6423 if (ix86_tune == PROCESSOR_K6 && !optimize_size
6424 && base_reg && !index_reg && !disp
6425 && REG_P (base_reg)
6426 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6427 disp = const0_rtx;
6429 /* Special case: encode reg+reg instead of reg*2. */
6430 if (!base && index && scale && scale == 2)
6431 base = index, base_reg = index_reg, scale = 1;
6433 /* Special case: scaling cannot be encoded without base or displacement. */
6434 if (!base && !disp && index && scale != 1)
6435 disp = const0_rtx;
6437 out->base = base;
6438 out->index = index;
6439 out->disp = disp;
6440 out->scale = scale;
6441 out->seg = seg;
6443 return retval;
6446 /* Return cost of the memory address x.
6447 For i386, it is better to use a complex address than let gcc copy
6448 the address into a reg and make a new pseudo. But not if the address
6449 requires to two regs - that would mean more pseudos with longer
6450 lifetimes. */
6451 static int
6452 ix86_address_cost (rtx x)
6454 struct ix86_address parts;
6455 int cost = 1;
6456 int ok = ix86_decompose_address (x, &parts);
6458 gcc_assert (ok);
6460 if (parts.base && GET_CODE (parts.base) == SUBREG)
6461 parts.base = SUBREG_REG (parts.base);
6462 if (parts.index && GET_CODE (parts.index) == SUBREG)
6463 parts.index = SUBREG_REG (parts.index);
6465 /* More complex memory references are better. */
6466 if (parts.disp && parts.disp != const0_rtx)
6467 cost--;
6468 if (parts.seg != SEG_DEFAULT)
6469 cost--;
6471 /* Attempt to minimize number of registers in the address. */
6472 if ((parts.base
6473 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6474 || (parts.index
6475 && (!REG_P (parts.index)
6476 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6477 cost++;
6479 if (parts.base
6480 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6481 && parts.index
6482 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6483 && parts.base != parts.index)
6484 cost++;
6486 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6487 since it's predecode logic can't detect the length of instructions
6488 and it degenerates to vector decoded. Increase cost of such
6489 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6490 to split such addresses or even refuse such addresses at all.
6492 Following addressing modes are affected:
6493 [base+scale*index]
6494 [scale*index+disp]
6495 [base+index]
6497 The first and last case may be avoidable by explicitly coding the zero in
6498 memory address, but I don't have AMD-K6 machine handy to check this
6499 theory. */
6501 if (TARGET_K6
6502 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6503 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6504 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6505 cost += 10;
6507 return cost;
6510 /* If X is a machine specific address (i.e. a symbol or label being
6511 referenced as a displacement from the GOT implemented using an
6512 UNSPEC), then return the base term. Otherwise return X. */
6515 ix86_find_base_term (rtx x)
6517 rtx term;
6519 if (TARGET_64BIT)
6521 if (GET_CODE (x) != CONST)
6522 return x;
6523 term = XEXP (x, 0);
6524 if (GET_CODE (term) == PLUS
6525 && (CONST_INT_P (XEXP (term, 1))
6526 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
6527 term = XEXP (term, 0);
6528 if (GET_CODE (term) != UNSPEC
6529 || XINT (term, 1) != UNSPEC_GOTPCREL)
6530 return x;
6532 term = XVECEXP (term, 0, 0);
6534 if (GET_CODE (term) != SYMBOL_REF
6535 && GET_CODE (term) != LABEL_REF)
6536 return x;
6538 return term;
6541 term = ix86_delegitimize_address (x);
6543 if (GET_CODE (term) != SYMBOL_REF
6544 && GET_CODE (term) != LABEL_REF)
6545 return x;
6547 return term;
6550 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6551 this is used for to form addresses to local data when -fPIC is in
6552 use. */
6554 static bool
6555 darwin_local_data_pic (rtx disp)
6557 if (GET_CODE (disp) == MINUS)
6559 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6560 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6561 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6563 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6564 if (! strcmp (sym_name, "<pic base>"))
6565 return true;
6569 return false;
6572 /* Determine if a given RTX is a valid constant. We already know this
6573 satisfies CONSTANT_P. */
6575 bool
6576 legitimate_constant_p (rtx x)
6578 switch (GET_CODE (x))
6580 case CONST:
6581 x = XEXP (x, 0);
6583 if (GET_CODE (x) == PLUS)
6585 if (!CONST_INT_P (XEXP (x, 1)))
6586 return false;
6587 x = XEXP (x, 0);
6590 if (TARGET_MACHO && darwin_local_data_pic (x))
6591 return true;
6593 /* Only some unspecs are valid as "constants". */
6594 if (GET_CODE (x) == UNSPEC)
6595 switch (XINT (x, 1))
6597 case UNSPEC_GOT:
6598 case UNSPEC_GOTOFF:
6599 case UNSPEC_PLTOFF:
6600 return TARGET_64BIT;
6601 case UNSPEC_TPOFF:
6602 case UNSPEC_NTPOFF:
6603 x = XVECEXP (x, 0, 0);
6604 return (GET_CODE (x) == SYMBOL_REF
6605 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6606 case UNSPEC_DTPOFF:
6607 x = XVECEXP (x, 0, 0);
6608 return (GET_CODE (x) == SYMBOL_REF
6609 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6610 default:
6611 return false;
6614 /* We must have drilled down to a symbol. */
6615 if (GET_CODE (x) == LABEL_REF)
6616 return true;
6617 if (GET_CODE (x) != SYMBOL_REF)
6618 return false;
6619 /* FALLTHRU */
6621 case SYMBOL_REF:
6622 /* TLS symbols are never valid. */
6623 if (SYMBOL_REF_TLS_MODEL (x))
6624 return false;
6625 break;
6627 case CONST_DOUBLE:
6628 if (GET_MODE (x) == TImode
6629 && x != CONST0_RTX (TImode)
6630 && !TARGET_64BIT)
6631 return false;
6632 break;
6634 case CONST_VECTOR:
6635 if (x == CONST0_RTX (GET_MODE (x)))
6636 return true;
6637 return false;
6639 default:
6640 break;
6643 /* Otherwise we handle everything else in the move patterns. */
6644 return true;
6647 /* Determine if it's legal to put X into the constant pool. This
6648 is not possible for the address of thread-local symbols, which
6649 is checked above. */
6651 static bool
6652 ix86_cannot_force_const_mem (rtx x)
6654 /* We can always put integral constants and vectors in memory. */
6655 switch (GET_CODE (x))
6657 case CONST_INT:
6658 case CONST_DOUBLE:
6659 case CONST_VECTOR:
6660 return false;
6662 default:
6663 break;
6665 return !legitimate_constant_p (x);
6668 /* Determine if a given RTX is a valid constant address. */
6670 bool
6671 constant_address_p (rtx x)
6673 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6676 /* Nonzero if the constant value X is a legitimate general operand
6677 when generating PIC code. It is given that flag_pic is on and
6678 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6680 bool
6681 legitimate_pic_operand_p (rtx x)
6683 rtx inner;
6685 switch (GET_CODE (x))
6687 case CONST:
6688 inner = XEXP (x, 0);
6689 if (GET_CODE (inner) == PLUS
6690 && CONST_INT_P (XEXP (inner, 1)))
6691 inner = XEXP (inner, 0);
6693 /* Only some unspecs are valid as "constants". */
6694 if (GET_CODE (inner) == UNSPEC)
6695 switch (XINT (inner, 1))
6697 case UNSPEC_GOT:
6698 case UNSPEC_GOTOFF:
6699 case UNSPEC_PLTOFF:
6700 return TARGET_64BIT;
6701 case UNSPEC_TPOFF:
6702 x = XVECEXP (inner, 0, 0);
6703 return (GET_CODE (x) == SYMBOL_REF
6704 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6705 default:
6706 return false;
6708 /* FALLTHRU */
6710 case SYMBOL_REF:
6711 case LABEL_REF:
6712 return legitimate_pic_address_disp_p (x);
6714 default:
6715 return true;
6719 /* Determine if a given CONST RTX is a valid memory displacement
6720 in PIC mode. */
6723 legitimate_pic_address_disp_p (rtx disp)
6725 bool saw_plus;
6727 /* In 64bit mode we can allow direct addresses of symbols and labels
6728 when they are not dynamic symbols. */
6729 if (TARGET_64BIT)
6731 rtx op0 = disp, op1;
6733 switch (GET_CODE (disp))
6735 case LABEL_REF:
6736 return true;
6738 case CONST:
6739 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6740 break;
6741 op0 = XEXP (XEXP (disp, 0), 0);
6742 op1 = XEXP (XEXP (disp, 0), 1);
6743 if (!CONST_INT_P (op1)
6744 || INTVAL (op1) >= 16*1024*1024
6745 || INTVAL (op1) < -16*1024*1024)
6746 break;
6747 if (GET_CODE (op0) == LABEL_REF)
6748 return true;
6749 if (GET_CODE (op0) != SYMBOL_REF)
6750 break;
6751 /* FALLTHRU */
6753 case SYMBOL_REF:
6754 /* TLS references should always be enclosed in UNSPEC. */
6755 if (SYMBOL_REF_TLS_MODEL (op0))
6756 return false;
6757 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
6758 && ix86_cmodel != CM_LARGE_PIC)
6759 return true;
6760 break;
6762 default:
6763 break;
6766 if (GET_CODE (disp) != CONST)
6767 return 0;
6768 disp = XEXP (disp, 0);
6770 if (TARGET_64BIT)
6772 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6773 of GOT tables. We should not need these anyway. */
6774 if (GET_CODE (disp) != UNSPEC
6775 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6776 && XINT (disp, 1) != UNSPEC_GOTOFF
6777 && XINT (disp, 1) != UNSPEC_PLTOFF))
6778 return 0;
6780 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6781 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6782 return 0;
6783 return 1;
6786 saw_plus = false;
6787 if (GET_CODE (disp) == PLUS)
6789 if (!CONST_INT_P (XEXP (disp, 1)))
6790 return 0;
6791 disp = XEXP (disp, 0);
6792 saw_plus = true;
6795 if (TARGET_MACHO && darwin_local_data_pic (disp))
6796 return 1;
6798 if (GET_CODE (disp) != UNSPEC)
6799 return 0;
6801 switch (XINT (disp, 1))
6803 case UNSPEC_GOT:
6804 if (saw_plus)
6805 return false;
6806 /* We need to check for both symbols and labels because VxWorks loads
6807 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
6808 details. */
6809 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6810 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
6811 case UNSPEC_GOTOFF:
6812 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6813 While ABI specify also 32bit relocation but we don't produce it in
6814 small PIC model at all. */
6815 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6816 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6817 && !TARGET_64BIT)
6818 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
6819 return false;
6820 case UNSPEC_GOTTPOFF:
6821 case UNSPEC_GOTNTPOFF:
6822 case UNSPEC_INDNTPOFF:
6823 if (saw_plus)
6824 return false;
6825 disp = XVECEXP (disp, 0, 0);
6826 return (GET_CODE (disp) == SYMBOL_REF
6827 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6828 case UNSPEC_NTPOFF:
6829 disp = XVECEXP (disp, 0, 0);
6830 return (GET_CODE (disp) == SYMBOL_REF
6831 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6832 case UNSPEC_DTPOFF:
6833 disp = XVECEXP (disp, 0, 0);
6834 return (GET_CODE (disp) == SYMBOL_REF
6835 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6838 return 0;
6841 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6842 memory address for an instruction. The MODE argument is the machine mode
6843 for the MEM expression that wants to use this address.
6845 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6846 convert common non-canonical forms to canonical form so that they will
6847 be recognized. */
6850 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6852 struct ix86_address parts;
6853 rtx base, index, disp;
6854 HOST_WIDE_INT scale;
6855 const char *reason = NULL;
6856 rtx reason_rtx = NULL_RTX;
6858 if (TARGET_DEBUG_ADDR)
6860 fprintf (stderr,
6861 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6862 GET_MODE_NAME (mode), strict);
6863 debug_rtx (addr);
6866 if (ix86_decompose_address (addr, &parts) <= 0)
6868 reason = "decomposition failed";
6869 goto report_error;
6872 base = parts.base;
6873 index = parts.index;
6874 disp = parts.disp;
6875 scale = parts.scale;
6877 /* Validate base register.
6879 Don't allow SUBREG's that span more than a word here. It can lead to spill
6880 failures when the base is one word out of a two word structure, which is
6881 represented internally as a DImode int. */
6883 if (base)
6885 rtx reg;
6886 reason_rtx = base;
6888 if (REG_P (base))
6889 reg = base;
6890 else if (GET_CODE (base) == SUBREG
6891 && REG_P (SUBREG_REG (base))
6892 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6893 <= UNITS_PER_WORD)
6894 reg = SUBREG_REG (base);
6895 else
6897 reason = "base is not a register";
6898 goto report_error;
6901 if (GET_MODE (base) != Pmode)
6903 reason = "base is not in Pmode";
6904 goto report_error;
6907 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6908 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6910 reason = "base is not valid";
6911 goto report_error;
6915 /* Validate index register.
6917 Don't allow SUBREG's that span more than a word here -- same as above. */
6919 if (index)
6921 rtx reg;
6922 reason_rtx = index;
6924 if (REG_P (index))
6925 reg = index;
6926 else if (GET_CODE (index) == SUBREG
6927 && REG_P (SUBREG_REG (index))
6928 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6929 <= UNITS_PER_WORD)
6930 reg = SUBREG_REG (index);
6931 else
6933 reason = "index is not a register";
6934 goto report_error;
6937 if (GET_MODE (index) != Pmode)
6939 reason = "index is not in Pmode";
6940 goto report_error;
6943 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6944 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6946 reason = "index is not valid";
6947 goto report_error;
6951 /* Validate scale factor. */
6952 if (scale != 1)
6954 reason_rtx = GEN_INT (scale);
6955 if (!index)
6957 reason = "scale without index";
6958 goto report_error;
6961 if (scale != 2 && scale != 4 && scale != 8)
6963 reason = "scale is not a valid multiplier";
6964 goto report_error;
6968 /* Validate displacement. */
6969 if (disp)
6971 reason_rtx = disp;
6973 if (GET_CODE (disp) == CONST
6974 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6975 switch (XINT (XEXP (disp, 0), 1))
6977 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6978 used. While ABI specify also 32bit relocations, we don't produce
6979 them at all and use IP relative instead. */
6980 case UNSPEC_GOT:
6981 case UNSPEC_GOTOFF:
6982 gcc_assert (flag_pic);
6983 if (!TARGET_64BIT)
6984 goto is_legitimate_pic;
6985 reason = "64bit address unspec";
6986 goto report_error;
6988 case UNSPEC_GOTPCREL:
6989 gcc_assert (flag_pic);
6990 goto is_legitimate_pic;
6992 case UNSPEC_GOTTPOFF:
6993 case UNSPEC_GOTNTPOFF:
6994 case UNSPEC_INDNTPOFF:
6995 case UNSPEC_NTPOFF:
6996 case UNSPEC_DTPOFF:
6997 break;
6999 default:
7000 reason = "invalid address unspec";
7001 goto report_error;
7004 else if (SYMBOLIC_CONST (disp)
7005 && (flag_pic
7006 || (TARGET_MACHO
7007 #if TARGET_MACHO
7008 && MACHOPIC_INDIRECT
7009 && !machopic_operand_p (disp)
7010 #endif
7014 is_legitimate_pic:
7015 if (TARGET_64BIT && (index || base))
7017 /* foo@dtpoff(%rX) is ok. */
7018 if (GET_CODE (disp) != CONST
7019 || GET_CODE (XEXP (disp, 0)) != PLUS
7020 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
7021 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
7022 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
7023 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
7025 reason = "non-constant pic memory reference";
7026 goto report_error;
7029 else if (! legitimate_pic_address_disp_p (disp))
7031 reason = "displacement is an invalid pic construct";
7032 goto report_error;
7035 /* This code used to verify that a symbolic pic displacement
7036 includes the pic_offset_table_rtx register.
7038 While this is good idea, unfortunately these constructs may
7039 be created by "adds using lea" optimization for incorrect
7040 code like:
7042 int a;
7043 int foo(int i)
7045 return *(&a+i);
7048 This code is nonsensical, but results in addressing
7049 GOT table with pic_offset_table_rtx base. We can't
7050 just refuse it easily, since it gets matched by
7051 "addsi3" pattern, that later gets split to lea in the
7052 case output register differs from input. While this
7053 can be handled by separate addsi pattern for this case
7054 that never results in lea, this seems to be easier and
7055 correct fix for crash to disable this test. */
7057 else if (GET_CODE (disp) != LABEL_REF
7058 && !CONST_INT_P (disp)
7059 && (GET_CODE (disp) != CONST
7060 || !legitimate_constant_p (disp))
7061 && (GET_CODE (disp) != SYMBOL_REF
7062 || !legitimate_constant_p (disp)))
7064 reason = "displacement is not constant";
7065 goto report_error;
7067 else if (TARGET_64BIT
7068 && !x86_64_immediate_operand (disp, VOIDmode))
7070 reason = "displacement is out of range";
7071 goto report_error;
7075 /* Everything looks valid. */
7076 if (TARGET_DEBUG_ADDR)
7077 fprintf (stderr, "Success.\n");
7078 return TRUE;
7080 report_error:
7081 if (TARGET_DEBUG_ADDR)
7083 fprintf (stderr, "Error: %s\n", reason);
7084 debug_rtx (reason_rtx);
7086 return FALSE;
7089 /* Return a unique alias set for the GOT. */
7091 static HOST_WIDE_INT
7092 ix86_GOT_alias_set (void)
7094 static HOST_WIDE_INT set = -1;
7095 if (set == -1)
7096 set = new_alias_set ();
7097 return set;
7100 /* Return a legitimate reference for ORIG (an address) using the
7101 register REG. If REG is 0, a new pseudo is generated.
7103 There are two types of references that must be handled:
7105 1. Global data references must load the address from the GOT, via
7106 the PIC reg. An insn is emitted to do this load, and the reg is
7107 returned.
7109 2. Static data references, constant pool addresses, and code labels
7110 compute the address as an offset from the GOT, whose base is in
7111 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
7112 differentiate them from global data objects. The returned
7113 address is the PIC reg + an unspec constant.
7115 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7116 reg also appears in the address. */
7118 static rtx
7119 legitimize_pic_address (rtx orig, rtx reg)
7121 rtx addr = orig;
7122 rtx new = orig;
7123 rtx base;
7125 #if TARGET_MACHO
7126 if (TARGET_MACHO && !TARGET_64BIT)
7128 if (reg == 0)
7129 reg = gen_reg_rtx (Pmode);
7130 /* Use the generic Mach-O PIC machinery. */
7131 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
7133 #endif
7135 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
7136 new = addr;
7137 else if (TARGET_64BIT
7138 && ix86_cmodel != CM_SMALL_PIC
7139 && gotoff_operand (addr, Pmode))
7141 rtx tmpreg;
7142 /* This symbol may be referenced via a displacement from the PIC
7143 base address (@GOTOFF). */
7145 if (reload_in_progress)
7146 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7147 if (GET_CODE (addr) == CONST)
7148 addr = XEXP (addr, 0);
7149 if (GET_CODE (addr) == PLUS)
7151 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
7152 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
7154 else
7155 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7156 new = gen_rtx_CONST (Pmode, new);
7157 if (!reg)
7158 tmpreg = gen_reg_rtx (Pmode);
7159 else
7160 tmpreg = reg;
7161 emit_move_insn (tmpreg, new);
7163 if (reg != 0)
7165 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
7166 tmpreg, 1, OPTAB_DIRECT);
7167 new = reg;
7169 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
7171 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
7173 /* This symbol may be referenced via a displacement from the PIC
7174 base address (@GOTOFF). */
7176 if (reload_in_progress)
7177 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7178 if (GET_CODE (addr) == CONST)
7179 addr = XEXP (addr, 0);
7180 if (GET_CODE (addr) == PLUS)
7182 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
7183 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
7185 else
7186 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7187 new = gen_rtx_CONST (Pmode, new);
7188 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7190 if (reg != 0)
7192 emit_move_insn (reg, new);
7193 new = reg;
7196 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7197 /* We can't use @GOTOFF for text labels on VxWorks;
7198 see gotoff_operand. */
7199 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
7201 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
7203 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7204 new = gen_rtx_CONST (Pmode, new);
7205 new = gen_const_mem (Pmode, new);
7206 set_mem_alias_set (new, ix86_GOT_alias_set ());
7208 if (reg == 0)
7209 reg = gen_reg_rtx (Pmode);
7210 /* Use directly gen_movsi, otherwise the address is loaded
7211 into register for CSE. We don't want to CSE this addresses,
7212 instead we CSE addresses from the GOT table, so skip this. */
7213 emit_insn (gen_movsi (reg, new));
7214 new = reg;
7216 else
7218 /* This symbol must be referenced via a load from the
7219 Global Offset Table (@GOT). */
7221 if (reload_in_progress)
7222 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7223 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7224 new = gen_rtx_CONST (Pmode, new);
7225 if (TARGET_64BIT)
7226 new = force_reg (Pmode, new);
7227 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7228 new = gen_const_mem (Pmode, new);
7229 set_mem_alias_set (new, ix86_GOT_alias_set ());
7231 if (reg == 0)
7232 reg = gen_reg_rtx (Pmode);
7233 emit_move_insn (reg, new);
7234 new = reg;
7237 else
7239 if (CONST_INT_P (addr)
7240 && !x86_64_immediate_operand (addr, VOIDmode))
7242 if (reg)
7244 emit_move_insn (reg, addr);
7245 new = reg;
7247 else
7248 new = force_reg (Pmode, addr);
7250 else if (GET_CODE (addr) == CONST)
7252 addr = XEXP (addr, 0);
7254 /* We must match stuff we generate before. Assume the only
7255 unspecs that can get here are ours. Not that we could do
7256 anything with them anyway.... */
7257 if (GET_CODE (addr) == UNSPEC
7258 || (GET_CODE (addr) == PLUS
7259 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7260 return orig;
7261 gcc_assert (GET_CODE (addr) == PLUS);
7263 if (GET_CODE (addr) == PLUS)
7265 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7267 /* Check first to see if this is a constant offset from a @GOTOFF
7268 symbol reference. */
7269 if (gotoff_operand (op0, Pmode)
7270 && CONST_INT_P (op1))
7272 if (!TARGET_64BIT)
7274 if (reload_in_progress)
7275 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7276 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7277 UNSPEC_GOTOFF);
7278 new = gen_rtx_PLUS (Pmode, new, op1);
7279 new = gen_rtx_CONST (Pmode, new);
7280 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7282 if (reg != 0)
7284 emit_move_insn (reg, new);
7285 new = reg;
7288 else
7290 if (INTVAL (op1) < -16*1024*1024
7291 || INTVAL (op1) >= 16*1024*1024)
7293 if (!x86_64_immediate_operand (op1, Pmode))
7294 op1 = force_reg (Pmode, op1);
7295 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7299 else
7301 base = legitimize_pic_address (XEXP (addr, 0), reg);
7302 new = legitimize_pic_address (XEXP (addr, 1),
7303 base == reg ? NULL_RTX : reg);
7305 if (CONST_INT_P (new))
7306 new = plus_constant (base, INTVAL (new));
7307 else
7309 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
7311 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
7312 new = XEXP (new, 1);
7314 new = gen_rtx_PLUS (Pmode, base, new);
7319 return new;
7322 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7324 static rtx
7325 get_thread_pointer (int to_reg)
7327 rtx tp, reg, insn;
7329 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7330 if (!to_reg)
7331 return tp;
7333 reg = gen_reg_rtx (Pmode);
7334 insn = gen_rtx_SET (VOIDmode, reg, tp);
7335 insn = emit_insn (insn);
7337 return reg;
7340 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7341 false if we expect this to be used for a memory address and true if
7342 we expect to load the address into a register. */
7344 static rtx
7345 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7347 rtx dest, base, off, pic, tp;
7348 int type;
7350 switch (model)
7352 case TLS_MODEL_GLOBAL_DYNAMIC:
7353 dest = gen_reg_rtx (Pmode);
7354 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7356 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7358 rtx rax = gen_rtx_REG (Pmode, 0), insns;
7360 start_sequence ();
7361 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7362 insns = get_insns ();
7363 end_sequence ();
7365 emit_libcall_block (insns, dest, rax, x);
7367 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7368 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7369 else
7370 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7372 if (TARGET_GNU2_TLS)
7374 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7376 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7378 break;
7380 case TLS_MODEL_LOCAL_DYNAMIC:
7381 base = gen_reg_rtx (Pmode);
7382 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7384 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7386 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
7388 start_sequence ();
7389 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7390 insns = get_insns ();
7391 end_sequence ();
7393 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7394 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7395 emit_libcall_block (insns, base, rax, note);
7397 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7398 emit_insn (gen_tls_local_dynamic_base_64 (base));
7399 else
7400 emit_insn (gen_tls_local_dynamic_base_32 (base));
7402 if (TARGET_GNU2_TLS)
7404 rtx x = ix86_tls_module_base ();
7406 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7407 gen_rtx_MINUS (Pmode, x, tp));
7410 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7411 off = gen_rtx_CONST (Pmode, off);
7413 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7415 if (TARGET_GNU2_TLS)
7417 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7419 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7422 break;
7424 case TLS_MODEL_INITIAL_EXEC:
7425 if (TARGET_64BIT)
7427 pic = NULL;
7428 type = UNSPEC_GOTNTPOFF;
7430 else if (flag_pic)
7432 if (reload_in_progress)
7433 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7434 pic = pic_offset_table_rtx;
7435 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7437 else if (!TARGET_ANY_GNU_TLS)
7439 pic = gen_reg_rtx (Pmode);
7440 emit_insn (gen_set_got (pic));
7441 type = UNSPEC_GOTTPOFF;
7443 else
7445 pic = NULL;
7446 type = UNSPEC_INDNTPOFF;
7449 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7450 off = gen_rtx_CONST (Pmode, off);
7451 if (pic)
7452 off = gen_rtx_PLUS (Pmode, pic, off);
7453 off = gen_const_mem (Pmode, off);
7454 set_mem_alias_set (off, ix86_GOT_alias_set ());
7456 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7458 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7459 off = force_reg (Pmode, off);
7460 return gen_rtx_PLUS (Pmode, base, off);
7462 else
7464 base = get_thread_pointer (true);
7465 dest = gen_reg_rtx (Pmode);
7466 emit_insn (gen_subsi3 (dest, base, off));
7468 break;
7470 case TLS_MODEL_LOCAL_EXEC:
7471 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7472 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7473 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7474 off = gen_rtx_CONST (Pmode, off);
7476 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7478 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7479 return gen_rtx_PLUS (Pmode, base, off);
7481 else
7483 base = get_thread_pointer (true);
7484 dest = gen_reg_rtx (Pmode);
7485 emit_insn (gen_subsi3 (dest, base, off));
7487 break;
7489 default:
7490 gcc_unreachable ();
7493 return dest;
7496 /* Try machine-dependent ways of modifying an illegitimate address
7497 to be legitimate. If we find one, return the new, valid address.
7498 This macro is used in only one place: `memory_address' in explow.c.
7500 OLDX is the address as it was before break_out_memory_refs was called.
7501 In some cases it is useful to look at this to decide what needs to be done.
7503 MODE and WIN are passed so that this macro can use
7504 GO_IF_LEGITIMATE_ADDRESS.
7506 It is always safe for this macro to do nothing. It exists to recognize
7507 opportunities to optimize the output.
7509 For the 80386, we handle X+REG by loading X into a register R and
7510 using R+REG. R will go in a general reg and indexing will be used.
7511 However, if REG is a broken-out memory address or multiplication,
7512 nothing needs to be done because REG can certainly go in a general reg.
7514 When -fpic is used, special handling is needed for symbolic references.
7515 See comments by legitimize_pic_address in i386.c for details. */
7518 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7520 int changed = 0;
7521 unsigned log;
7523 if (TARGET_DEBUG_ADDR)
7525 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
7526 GET_MODE_NAME (mode));
7527 debug_rtx (x);
7530 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7531 if (log)
7532 return legitimize_tls_address (x, log, false);
7533 if (GET_CODE (x) == CONST
7534 && GET_CODE (XEXP (x, 0)) == PLUS
7535 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7536 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7538 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
7539 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7542 if (flag_pic && SYMBOLIC_CONST (x))
7543 return legitimize_pic_address (x, 0);
7545 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7546 if (GET_CODE (x) == ASHIFT
7547 && CONST_INT_P (XEXP (x, 1))
7548 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7550 changed = 1;
7551 log = INTVAL (XEXP (x, 1));
7552 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7553 GEN_INT (1 << log));
7556 if (GET_CODE (x) == PLUS)
7558 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7560 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7561 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
7562 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7564 changed = 1;
7565 log = INTVAL (XEXP (XEXP (x, 0), 1));
7566 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7567 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7568 GEN_INT (1 << log));
7571 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7572 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
7573 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7575 changed = 1;
7576 log = INTVAL (XEXP (XEXP (x, 1), 1));
7577 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7578 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7579 GEN_INT (1 << log));
7582 /* Put multiply first if it isn't already. */
7583 if (GET_CODE (XEXP (x, 1)) == MULT)
7585 rtx tmp = XEXP (x, 0);
7586 XEXP (x, 0) = XEXP (x, 1);
7587 XEXP (x, 1) = tmp;
7588 changed = 1;
7591 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7592 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7593 created by virtual register instantiation, register elimination, and
7594 similar optimizations. */
7595 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7597 changed = 1;
7598 x = gen_rtx_PLUS (Pmode,
7599 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7600 XEXP (XEXP (x, 1), 0)),
7601 XEXP (XEXP (x, 1), 1));
7604 /* Canonicalize
7605 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7606 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7607 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7608 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7609 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7610 && CONSTANT_P (XEXP (x, 1)))
7612 rtx constant;
7613 rtx other = NULL_RTX;
7615 if (CONST_INT_P (XEXP (x, 1)))
7617 constant = XEXP (x, 1);
7618 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7620 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
7622 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7623 other = XEXP (x, 1);
7625 else
7626 constant = 0;
7628 if (constant)
7630 changed = 1;
7631 x = gen_rtx_PLUS (Pmode,
7632 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7633 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7634 plus_constant (other, INTVAL (constant)));
7638 if (changed && legitimate_address_p (mode, x, FALSE))
7639 return x;
7641 if (GET_CODE (XEXP (x, 0)) == MULT)
7643 changed = 1;
7644 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7647 if (GET_CODE (XEXP (x, 1)) == MULT)
7649 changed = 1;
7650 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7653 if (changed
7654 && REG_P (XEXP (x, 1))
7655 && REG_P (XEXP (x, 0)))
7656 return x;
7658 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7660 changed = 1;
7661 x = legitimize_pic_address (x, 0);
7664 if (changed && legitimate_address_p (mode, x, FALSE))
7665 return x;
7667 if (REG_P (XEXP (x, 0)))
7669 rtx temp = gen_reg_rtx (Pmode);
7670 rtx val = force_operand (XEXP (x, 1), temp);
7671 if (val != temp)
7672 emit_move_insn (temp, val);
7674 XEXP (x, 1) = temp;
7675 return x;
7678 else if (REG_P (XEXP (x, 1)))
7680 rtx temp = gen_reg_rtx (Pmode);
7681 rtx val = force_operand (XEXP (x, 0), temp);
7682 if (val != temp)
7683 emit_move_insn (temp, val);
7685 XEXP (x, 0) = temp;
7686 return x;
7690 return x;
7693 /* Print an integer constant expression in assembler syntax. Addition
7694 and subtraction are the only arithmetic that may appear in these
7695 expressions. FILE is the stdio stream to write to, X is the rtx, and
7696 CODE is the operand print code from the output string. */
7698 static void
7699 output_pic_addr_const (FILE *file, rtx x, int code)
7701 char buf[256];
7703 switch (GET_CODE (x))
7705 case PC:
7706 gcc_assert (flag_pic);
7707 putc ('.', file);
7708 break;
7710 case SYMBOL_REF:
7711 if (! TARGET_MACHO || TARGET_64BIT)
7712 output_addr_const (file, x);
7713 else
7715 const char *name = XSTR (x, 0);
7717 /* Mark the decl as referenced so that cgraph will output the function. */
7718 if (SYMBOL_REF_DECL (x))
7719 mark_decl_referenced (SYMBOL_REF_DECL (x));
7721 #if TARGET_MACHO
7722 if (MACHOPIC_INDIRECT
7723 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
7724 name = machopic_indirection_name (x, /*stub_p=*/true);
7725 #endif
7726 assemble_name (file, name);
7728 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7729 fputs ("@PLT", file);
7730 break;
7732 case LABEL_REF:
7733 x = XEXP (x, 0);
7734 /* FALLTHRU */
7735 case CODE_LABEL:
7736 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7737 assemble_name (asm_out_file, buf);
7738 break;
7740 case CONST_INT:
7741 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7742 break;
7744 case CONST:
7745 /* This used to output parentheses around the expression,
7746 but that does not work on the 386 (either ATT or BSD assembler). */
7747 output_pic_addr_const (file, XEXP (x, 0), code);
7748 break;
7750 case CONST_DOUBLE:
7751 if (GET_MODE (x) == VOIDmode)
7753 /* We can use %d if the number is <32 bits and positive. */
7754 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7755 fprintf (file, "0x%lx%08lx",
7756 (unsigned long) CONST_DOUBLE_HIGH (x),
7757 (unsigned long) CONST_DOUBLE_LOW (x));
7758 else
7759 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7761 else
7762 /* We can't handle floating point constants;
7763 PRINT_OPERAND must handle them. */
7764 output_operand_lossage ("floating constant misused");
7765 break;
7767 case PLUS:
7768 /* Some assemblers need integer constants to appear first. */
7769 if (CONST_INT_P (XEXP (x, 0)))
7771 output_pic_addr_const (file, XEXP (x, 0), code);
7772 putc ('+', file);
7773 output_pic_addr_const (file, XEXP (x, 1), code);
7775 else
7777 gcc_assert (CONST_INT_P (XEXP (x, 1)));
7778 output_pic_addr_const (file, XEXP (x, 1), code);
7779 putc ('+', file);
7780 output_pic_addr_const (file, XEXP (x, 0), code);
7782 break;
7784 case MINUS:
7785 if (!TARGET_MACHO)
7786 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7787 output_pic_addr_const (file, XEXP (x, 0), code);
7788 putc ('-', file);
7789 output_pic_addr_const (file, XEXP (x, 1), code);
7790 if (!TARGET_MACHO)
7791 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7792 break;
7794 case UNSPEC:
7795 gcc_assert (XVECLEN (x, 0) == 1);
7796 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7797 switch (XINT (x, 1))
7799 case UNSPEC_GOT:
7800 fputs ("@GOT", file);
7801 break;
7802 case UNSPEC_GOTOFF:
7803 fputs ("@GOTOFF", file);
7804 break;
7805 case UNSPEC_PLTOFF:
7806 fputs ("@PLTOFF", file);
7807 break;
7808 case UNSPEC_GOTPCREL:
7809 fputs ("@GOTPCREL(%rip)", file);
7810 break;
7811 case UNSPEC_GOTTPOFF:
7812 /* FIXME: This might be @TPOFF in Sun ld too. */
7813 fputs ("@GOTTPOFF", file);
7814 break;
7815 case UNSPEC_TPOFF:
7816 fputs ("@TPOFF", file);
7817 break;
7818 case UNSPEC_NTPOFF:
7819 if (TARGET_64BIT)
7820 fputs ("@TPOFF", file);
7821 else
7822 fputs ("@NTPOFF", file);
7823 break;
7824 case UNSPEC_DTPOFF:
7825 fputs ("@DTPOFF", file);
7826 break;
7827 case UNSPEC_GOTNTPOFF:
7828 if (TARGET_64BIT)
7829 fputs ("@GOTTPOFF(%rip)", file);
7830 else
7831 fputs ("@GOTNTPOFF", file);
7832 break;
7833 case UNSPEC_INDNTPOFF:
7834 fputs ("@INDNTPOFF", file);
7835 break;
7836 default:
7837 output_operand_lossage ("invalid UNSPEC as operand");
7838 break;
7840 break;
7842 default:
7843 output_operand_lossage ("invalid expression as operand");
7847 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7848 We need to emit DTP-relative relocations. */
7850 static void
7851 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7853 fputs (ASM_LONG, file);
7854 output_addr_const (file, x);
7855 fputs ("@DTPOFF", file);
7856 switch (size)
7858 case 4:
7859 break;
7860 case 8:
7861 fputs (", 0", file);
7862 break;
7863 default:
7864 gcc_unreachable ();
7868 /* In the name of slightly smaller debug output, and to cater to
7869 general assembler lossage, recognize PIC+GOTOFF and turn it back
7870 into a direct symbol reference.
7872 On Darwin, this is necessary to avoid a crash, because Darwin
7873 has a different PIC label for each routine but the DWARF debugging
7874 information is not associated with any particular routine, so it's
7875 necessary to remove references to the PIC label from RTL stored by
7876 the DWARF output code. */
7878 static rtx
7879 ix86_delegitimize_address (rtx orig_x)
7881 rtx x = orig_x;
7882 /* reg_addend is NULL or a multiple of some register. */
7883 rtx reg_addend = NULL_RTX;
7884 /* const_addend is NULL or a const_int. */
7885 rtx const_addend = NULL_RTX;
7886 /* This is the result, or NULL. */
7887 rtx result = NULL_RTX;
7889 if (MEM_P (x))
7890 x = XEXP (x, 0);
7892 if (TARGET_64BIT)
7894 if (GET_CODE (x) != CONST
7895 || GET_CODE (XEXP (x, 0)) != UNSPEC
7896 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7897 || !MEM_P (orig_x))
7898 return orig_x;
7899 return XVECEXP (XEXP (x, 0), 0, 0);
7902 if (GET_CODE (x) != PLUS
7903 || GET_CODE (XEXP (x, 1)) != CONST)
7904 return orig_x;
7906 if (REG_P (XEXP (x, 0))
7907 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7908 /* %ebx + GOT/GOTOFF */
7910 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7912 /* %ebx + %reg * scale + GOT/GOTOFF */
7913 reg_addend = XEXP (x, 0);
7914 if (REG_P (XEXP (reg_addend, 0))
7915 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7916 reg_addend = XEXP (reg_addend, 1);
7917 else if (REG_P (XEXP (reg_addend, 1))
7918 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7919 reg_addend = XEXP (reg_addend, 0);
7920 else
7921 return orig_x;
7922 if (!REG_P (reg_addend)
7923 && GET_CODE (reg_addend) != MULT
7924 && GET_CODE (reg_addend) != ASHIFT)
7925 return orig_x;
7927 else
7928 return orig_x;
7930 x = XEXP (XEXP (x, 1), 0);
7931 if (GET_CODE (x) == PLUS
7932 && CONST_INT_P (XEXP (x, 1)))
7934 const_addend = XEXP (x, 1);
7935 x = XEXP (x, 0);
7938 if (GET_CODE (x) == UNSPEC
7939 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
7940 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
7941 result = XVECEXP (x, 0, 0);
7943 if (TARGET_MACHO && darwin_local_data_pic (x)
7944 && !MEM_P (orig_x))
7945 result = XEXP (x, 0);
7947 if (! result)
7948 return orig_x;
7950 if (const_addend)
7951 result = gen_rtx_PLUS (Pmode, result, const_addend);
7952 if (reg_addend)
7953 result = gen_rtx_PLUS (Pmode, reg_addend, result);
7954 return result;
7957 static void
7958 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7959 int fp, FILE *file)
7961 const char *suffix;
7963 if (mode == CCFPmode || mode == CCFPUmode)
7965 enum rtx_code second_code, bypass_code;
7966 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7967 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7968 code = ix86_fp_compare_code_to_integer (code);
7969 mode = CCmode;
7971 if (reverse)
7972 code = reverse_condition (code);
7974 switch (code)
7976 case EQ:
7977 suffix = "e";
7978 break;
7979 case NE:
7980 suffix = "ne";
7981 break;
7982 case GT:
7983 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7984 suffix = "g";
7985 break;
7986 case GTU:
7987 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7988 Those same assemblers have the same but opposite lossage on cmov. */
7989 gcc_assert (mode == CCmode);
7990 suffix = fp ? "nbe" : "a";
7991 break;
7992 case LT:
7993 switch (mode)
7995 case CCNOmode:
7996 case CCGOCmode:
7997 suffix = "s";
7998 break;
8000 case CCmode:
8001 case CCGCmode:
8002 suffix = "l";
8003 break;
8005 default:
8006 gcc_unreachable ();
8008 break;
8009 case LTU:
8010 gcc_assert (mode == CCmode);
8011 suffix = "b";
8012 break;
8013 case GE:
8014 switch (mode)
8016 case CCNOmode:
8017 case CCGOCmode:
8018 suffix = "ns";
8019 break;
8021 case CCmode:
8022 case CCGCmode:
8023 suffix = "ge";
8024 break;
8026 default:
8027 gcc_unreachable ();
8029 break;
8030 case GEU:
8031 /* ??? As above. */
8032 gcc_assert (mode == CCmode);
8033 suffix = fp ? "nb" : "ae";
8034 break;
8035 case LE:
8036 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
8037 suffix = "le";
8038 break;
8039 case LEU:
8040 gcc_assert (mode == CCmode);
8041 suffix = "be";
8042 break;
8043 case UNORDERED:
8044 suffix = fp ? "u" : "p";
8045 break;
8046 case ORDERED:
8047 suffix = fp ? "nu" : "np";
8048 break;
8049 default:
8050 gcc_unreachable ();
8052 fputs (suffix, file);
8055 /* Print the name of register X to FILE based on its machine mode and number.
8056 If CODE is 'w', pretend the mode is HImode.
8057 If CODE is 'b', pretend the mode is QImode.
8058 If CODE is 'k', pretend the mode is SImode.
8059 If CODE is 'q', pretend the mode is DImode.
8060 If CODE is 'h', pretend the reg is the 'high' byte register.
8061 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8063 void
8064 print_reg (rtx x, int code, FILE *file)
8066 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
8067 && REGNO (x) != FRAME_POINTER_REGNUM
8068 && REGNO (x) != FLAGS_REG
8069 && REGNO (x) != FPSR_REG
8070 && REGNO (x) != FPCR_REG);
8072 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
8073 putc ('%', file);
8075 if (code == 'w' || MMX_REG_P (x))
8076 code = 2;
8077 else if (code == 'b')
8078 code = 1;
8079 else if (code == 'k')
8080 code = 4;
8081 else if (code == 'q')
8082 code = 8;
8083 else if (code == 'y')
8084 code = 3;
8085 else if (code == 'h')
8086 code = 0;
8087 else
8088 code = GET_MODE_SIZE (GET_MODE (x));
8090 /* Irritatingly, AMD extended registers use different naming convention
8091 from the normal registers. */
8092 if (REX_INT_REG_P (x))
8094 gcc_assert (TARGET_64BIT);
8095 switch (code)
8097 case 0:
8098 error ("extended registers have no high halves");
8099 break;
8100 case 1:
8101 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
8102 break;
8103 case 2:
8104 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
8105 break;
8106 case 4:
8107 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
8108 break;
8109 case 8:
8110 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
8111 break;
8112 default:
8113 error ("unsupported operand size for extended register");
8114 break;
8116 return;
8118 switch (code)
8120 case 3:
8121 if (STACK_TOP_P (x))
8123 fputs ("st(0)", file);
8124 break;
8126 /* FALLTHRU */
8127 case 8:
8128 case 4:
8129 case 12:
8130 if (! ANY_FP_REG_P (x))
8131 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
8132 /* FALLTHRU */
8133 case 16:
8134 case 2:
8135 normal:
8136 fputs (hi_reg_name[REGNO (x)], file);
8137 break;
8138 case 1:
8139 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
8140 goto normal;
8141 fputs (qi_reg_name[REGNO (x)], file);
8142 break;
8143 case 0:
8144 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
8145 goto normal;
8146 fputs (qi_high_reg_name[REGNO (x)], file);
8147 break;
8148 default:
8149 gcc_unreachable ();
8153 /* Locate some local-dynamic symbol still in use by this function
8154 so that we can print its name in some tls_local_dynamic_base
8155 pattern. */
8157 static const char *
8158 get_some_local_dynamic_name (void)
8160 rtx insn;
8162 if (cfun->machine->some_ld_name)
8163 return cfun->machine->some_ld_name;
8165 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
8166 if (INSN_P (insn)
8167 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
8168 return cfun->machine->some_ld_name;
8170 gcc_unreachable ();
8173 static int
8174 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8176 rtx x = *px;
8178 if (GET_CODE (x) == SYMBOL_REF
8179 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
8181 cfun->machine->some_ld_name = XSTR (x, 0);
8182 return 1;
8185 return 0;
8188 /* Meaning of CODE:
8189 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8190 C -- print opcode suffix for set/cmov insn.
8191 c -- like C, but print reversed condition
8192 F,f -- likewise, but for floating-point.
8193 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8194 otherwise nothing
8195 R -- print the prefix for register names.
8196 z -- print the opcode suffix for the size of the current operand.
8197 * -- print a star (in certain assembler syntax)
8198 A -- print an absolute memory reference.
8199 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8200 s -- print a shift double count, followed by the assemblers argument
8201 delimiter.
8202 b -- print the QImode name of the register for the indicated operand.
8203 %b0 would print %al if operands[0] is reg 0.
8204 w -- likewise, print the HImode name of the register.
8205 k -- likewise, print the SImode name of the register.
8206 q -- likewise, print the DImode name of the register.
8207 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8208 y -- print "st(0)" instead of "st" as a register.
8209 D -- print condition for SSE cmp instruction.
8210 P -- if PIC, print an @PLT suffix.
8211 X -- don't print any sort of PIC '@' suffix for a symbol.
8212 & -- print some in-use local-dynamic symbol name.
8213 H -- print a memory address offset by 8; used for sse high-parts
8216 void
8217 print_operand (FILE *file, rtx x, int code)
8219 if (code)
8221 switch (code)
8223 case '*':
8224 if (ASSEMBLER_DIALECT == ASM_ATT)
8225 putc ('*', file);
8226 return;
8228 case '&':
8229 assemble_name (file, get_some_local_dynamic_name ());
8230 return;
8232 case 'A':
8233 switch (ASSEMBLER_DIALECT)
8235 case ASM_ATT:
8236 putc ('*', file);
8237 break;
8239 case ASM_INTEL:
8240 /* Intel syntax. For absolute addresses, registers should not
8241 be surrounded by braces. */
8242 if (!REG_P (x))
8244 putc ('[', file);
8245 PRINT_OPERAND (file, x, 0);
8246 putc (']', file);
8247 return;
8249 break;
8251 default:
8252 gcc_unreachable ();
8255 PRINT_OPERAND (file, x, 0);
8256 return;
8259 case 'L':
8260 if (ASSEMBLER_DIALECT == ASM_ATT)
8261 putc ('l', file);
8262 return;
8264 case 'W':
8265 if (ASSEMBLER_DIALECT == ASM_ATT)
8266 putc ('w', file);
8267 return;
8269 case 'B':
8270 if (ASSEMBLER_DIALECT == ASM_ATT)
8271 putc ('b', file);
8272 return;
8274 case 'Q':
8275 if (ASSEMBLER_DIALECT == ASM_ATT)
8276 putc ('l', file);
8277 return;
8279 case 'S':
8280 if (ASSEMBLER_DIALECT == ASM_ATT)
8281 putc ('s', file);
8282 return;
8284 case 'T':
8285 if (ASSEMBLER_DIALECT == ASM_ATT)
8286 putc ('t', file);
8287 return;
8289 case 'z':
8290 /* 387 opcodes don't get size suffixes if the operands are
8291 registers. */
8292 if (STACK_REG_P (x))
8293 return;
8295 /* Likewise if using Intel opcodes. */
8296 if (ASSEMBLER_DIALECT == ASM_INTEL)
8297 return;
8299 /* This is the size of op from size of operand. */
8300 switch (GET_MODE_SIZE (GET_MODE (x)))
8302 case 1:
8303 putc ('b', file);
8304 return;
8306 case 2:
8307 #ifdef HAVE_GAS_FILDS_FISTS
8308 putc ('s', file);
8309 #endif
8310 return;
8312 case 4:
8313 if (GET_MODE (x) == SFmode)
8315 putc ('s', file);
8316 return;
8318 else
8319 putc ('l', file);
8320 return;
8322 case 12:
8323 case 16:
8324 putc ('t', file);
8325 return;
8327 case 8:
8328 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
8330 #ifdef GAS_MNEMONICS
8331 putc ('q', file);
8332 #else
8333 putc ('l', file);
8334 putc ('l', file);
8335 #endif
8337 else
8338 putc ('l', file);
8339 return;
8341 default:
8342 gcc_unreachable ();
8345 case 'b':
8346 case 'w':
8347 case 'k':
8348 case 'q':
8349 case 'h':
8350 case 'y':
8351 case 'X':
8352 case 'P':
8353 break;
8355 case 's':
8356 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
8358 PRINT_OPERAND (file, x, 0);
8359 putc (',', file);
8361 return;
8363 case 'D':
8364 /* Little bit of braindamage here. The SSE compare instructions
8365 does use completely different names for the comparisons that the
8366 fp conditional moves. */
8367 switch (GET_CODE (x))
8369 case EQ:
8370 case UNEQ:
8371 fputs ("eq", file);
8372 break;
8373 case LT:
8374 case UNLT:
8375 fputs ("lt", file);
8376 break;
8377 case LE:
8378 case UNLE:
8379 fputs ("le", file);
8380 break;
8381 case UNORDERED:
8382 fputs ("unord", file);
8383 break;
8384 case NE:
8385 case LTGT:
8386 fputs ("neq", file);
8387 break;
8388 case UNGE:
8389 case GE:
8390 fputs ("nlt", file);
8391 break;
8392 case UNGT:
8393 case GT:
8394 fputs ("nle", file);
8395 break;
8396 case ORDERED:
8397 fputs ("ord", file);
8398 break;
8399 default:
8400 gcc_unreachable ();
8402 return;
8403 case 'O':
8404 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8405 if (ASSEMBLER_DIALECT == ASM_ATT)
8407 switch (GET_MODE (x))
8409 case HImode: putc ('w', file); break;
8410 case SImode:
8411 case SFmode: putc ('l', file); break;
8412 case DImode:
8413 case DFmode: putc ('q', file); break;
8414 default: gcc_unreachable ();
8416 putc ('.', file);
8418 #endif
8419 return;
8420 case 'C':
8421 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8422 return;
8423 case 'F':
8424 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8425 if (ASSEMBLER_DIALECT == ASM_ATT)
8426 putc ('.', file);
8427 #endif
8428 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8429 return;
8431 /* Like above, but reverse condition */
8432 case 'c':
8433 /* Check to see if argument to %c is really a constant
8434 and not a condition code which needs to be reversed. */
8435 if (!COMPARISON_P (x))
8437 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8438 return;
8440 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8441 return;
8442 case 'f':
8443 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8444 if (ASSEMBLER_DIALECT == ASM_ATT)
8445 putc ('.', file);
8446 #endif
8447 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8448 return;
8450 case 'H':
8451 /* It doesn't actually matter what mode we use here, as we're
8452 only going to use this for printing. */
8453 x = adjust_address_nv (x, DImode, 8);
8454 break;
8456 case '+':
8458 rtx x;
8460 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8461 return;
8463 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8464 if (x)
8466 int pred_val = INTVAL (XEXP (x, 0));
8468 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8469 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8471 int taken = pred_val > REG_BR_PROB_BASE / 2;
8472 int cputaken = final_forward_branch_p (current_output_insn) == 0;
8474 /* Emit hints only in the case default branch prediction
8475 heuristics would fail. */
8476 if (taken != cputaken)
8478 /* We use 3e (DS) prefix for taken branches and
8479 2e (CS) prefix for not taken branches. */
8480 if (taken)
8481 fputs ("ds ; ", file);
8482 else
8483 fputs ("cs ; ", file);
8487 return;
8489 default:
8490 output_operand_lossage ("invalid operand code '%c'", code);
8494 if (REG_P (x))
8495 print_reg (x, code, file);
8497 else if (MEM_P (x))
8499 /* No `byte ptr' prefix for call instructions. */
8500 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
8502 const char * size;
8503 switch (GET_MODE_SIZE (GET_MODE (x)))
8505 case 1: size = "BYTE"; break;
8506 case 2: size = "WORD"; break;
8507 case 4: size = "DWORD"; break;
8508 case 8: size = "QWORD"; break;
8509 case 12: size = "XWORD"; break;
8510 case 16: size = "XMMWORD"; break;
8511 default:
8512 gcc_unreachable ();
8515 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8516 if (code == 'b')
8517 size = "BYTE";
8518 else if (code == 'w')
8519 size = "WORD";
8520 else if (code == 'k')
8521 size = "DWORD";
8523 fputs (size, file);
8524 fputs (" PTR ", file);
8527 x = XEXP (x, 0);
8528 /* Avoid (%rip) for call operands. */
8529 if (CONSTANT_ADDRESS_P (x) && code == 'P'
8530 && !CONST_INT_P (x))
8531 output_addr_const (file, x);
8532 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
8533 output_operand_lossage ("invalid constraints for operand");
8534 else
8535 output_address (x);
8538 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
8540 REAL_VALUE_TYPE r;
8541 long l;
8543 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8544 REAL_VALUE_TO_TARGET_SINGLE (r, l);
8546 if (ASSEMBLER_DIALECT == ASM_ATT)
8547 putc ('$', file);
8548 fprintf (file, "0x%08lx", l);
8551 /* These float cases don't actually occur as immediate operands. */
8552 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8554 char dstr[30];
8556 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8557 fprintf (file, "%s", dstr);
8560 else if (GET_CODE (x) == CONST_DOUBLE
8561 && GET_MODE (x) == XFmode)
8563 char dstr[30];
8565 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8566 fprintf (file, "%s", dstr);
8569 else
8571 /* We have patterns that allow zero sets of memory, for instance.
8572 In 64-bit mode, we should probably support all 8-byte vectors,
8573 since we can in fact encode that into an immediate. */
8574 if (GET_CODE (x) == CONST_VECTOR)
8576 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8577 x = const0_rtx;
8580 if (code != 'P')
8582 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
8584 if (ASSEMBLER_DIALECT == ASM_ATT)
8585 putc ('$', file);
8587 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8588 || GET_CODE (x) == LABEL_REF)
8590 if (ASSEMBLER_DIALECT == ASM_ATT)
8591 putc ('$', file);
8592 else
8593 fputs ("OFFSET FLAT:", file);
8596 if (CONST_INT_P (x))
8597 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8598 else if (flag_pic)
8599 output_pic_addr_const (file, x, code);
8600 else
8601 output_addr_const (file, x);
8605 /* Print a memory operand whose address is ADDR. */
8607 void
8608 print_operand_address (FILE *file, rtx addr)
8610 struct ix86_address parts;
8611 rtx base, index, disp;
8612 int scale;
8613 int ok = ix86_decompose_address (addr, &parts);
8615 gcc_assert (ok);
8617 base = parts.base;
8618 index = parts.index;
8619 disp = parts.disp;
8620 scale = parts.scale;
8622 switch (parts.seg)
8624 case SEG_DEFAULT:
8625 break;
8626 case SEG_FS:
8627 case SEG_GS:
8628 if (USER_LABEL_PREFIX[0] == 0)
8629 putc ('%', file);
8630 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8631 break;
8632 default:
8633 gcc_unreachable ();
8636 if (!base && !index)
8638 /* Displacement only requires special attention. */
8640 if (CONST_INT_P (disp))
8642 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8644 if (USER_LABEL_PREFIX[0] == 0)
8645 putc ('%', file);
8646 fputs ("ds:", file);
8648 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8650 else if (flag_pic)
8651 output_pic_addr_const (file, disp, 0);
8652 else
8653 output_addr_const (file, disp);
8655 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8656 if (TARGET_64BIT)
8658 if (GET_CODE (disp) == CONST
8659 && GET_CODE (XEXP (disp, 0)) == PLUS
8660 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8661 disp = XEXP (XEXP (disp, 0), 0);
8662 if (GET_CODE (disp) == LABEL_REF
8663 || (GET_CODE (disp) == SYMBOL_REF
8664 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8665 fputs ("(%rip)", file);
8668 else
8670 if (ASSEMBLER_DIALECT == ASM_ATT)
8672 if (disp)
8674 if (flag_pic)
8675 output_pic_addr_const (file, disp, 0);
8676 else if (GET_CODE (disp) == LABEL_REF)
8677 output_asm_label (disp);
8678 else
8679 output_addr_const (file, disp);
8682 putc ('(', file);
8683 if (base)
8684 print_reg (base, 0, file);
8685 if (index)
8687 putc (',', file);
8688 print_reg (index, 0, file);
8689 if (scale != 1)
8690 fprintf (file, ",%d", scale);
8692 putc (')', file);
8694 else
8696 rtx offset = NULL_RTX;
8698 if (disp)
8700 /* Pull out the offset of a symbol; print any symbol itself. */
8701 if (GET_CODE (disp) == CONST
8702 && GET_CODE (XEXP (disp, 0)) == PLUS
8703 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8705 offset = XEXP (XEXP (disp, 0), 1);
8706 disp = gen_rtx_CONST (VOIDmode,
8707 XEXP (XEXP (disp, 0), 0));
8710 if (flag_pic)
8711 output_pic_addr_const (file, disp, 0);
8712 else if (GET_CODE (disp) == LABEL_REF)
8713 output_asm_label (disp);
8714 else if (CONST_INT_P (disp))
8715 offset = disp;
8716 else
8717 output_addr_const (file, disp);
8720 putc ('[', file);
8721 if (base)
8723 print_reg (base, 0, file);
8724 if (offset)
8726 if (INTVAL (offset) >= 0)
8727 putc ('+', file);
8728 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8731 else if (offset)
8732 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8733 else
8734 putc ('0', file);
8736 if (index)
8738 putc ('+', file);
8739 print_reg (index, 0, file);
8740 if (scale != 1)
8741 fprintf (file, "*%d", scale);
8743 putc (']', file);
8748 bool
8749 output_addr_const_extra (FILE *file, rtx x)
8751 rtx op;
8753 if (GET_CODE (x) != UNSPEC)
8754 return false;
8756 op = XVECEXP (x, 0, 0);
8757 switch (XINT (x, 1))
8759 case UNSPEC_GOTTPOFF:
8760 output_addr_const (file, op);
8761 /* FIXME: This might be @TPOFF in Sun ld. */
8762 fputs ("@GOTTPOFF", file);
8763 break;
8764 case UNSPEC_TPOFF:
8765 output_addr_const (file, op);
8766 fputs ("@TPOFF", file);
8767 break;
8768 case UNSPEC_NTPOFF:
8769 output_addr_const (file, op);
8770 if (TARGET_64BIT)
8771 fputs ("@TPOFF", file);
8772 else
8773 fputs ("@NTPOFF", file);
8774 break;
8775 case UNSPEC_DTPOFF:
8776 output_addr_const (file, op);
8777 fputs ("@DTPOFF", file);
8778 break;
8779 case UNSPEC_GOTNTPOFF:
8780 output_addr_const (file, op);
8781 if (TARGET_64BIT)
8782 fputs ("@GOTTPOFF(%rip)", file);
8783 else
8784 fputs ("@GOTNTPOFF", file);
8785 break;
8786 case UNSPEC_INDNTPOFF:
8787 output_addr_const (file, op);
8788 fputs ("@INDNTPOFF", file);
8789 break;
8791 default:
8792 return false;
8795 return true;
8798 /* Split one or more DImode RTL references into pairs of SImode
8799 references. The RTL can be REG, offsettable MEM, integer constant, or
8800 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8801 split and "num" is its length. lo_half and hi_half are output arrays
8802 that parallel "operands". */
8804 void
8805 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8807 while (num--)
8809 rtx op = operands[num];
8811 /* simplify_subreg refuse to split volatile memory addresses,
8812 but we still have to handle it. */
8813 if (MEM_P (op))
8815 lo_half[num] = adjust_address (op, SImode, 0);
8816 hi_half[num] = adjust_address (op, SImode, 4);
8818 else
8820 lo_half[num] = simplify_gen_subreg (SImode, op,
8821 GET_MODE (op) == VOIDmode
8822 ? DImode : GET_MODE (op), 0);
8823 hi_half[num] = simplify_gen_subreg (SImode, op,
8824 GET_MODE (op) == VOIDmode
8825 ? DImode : GET_MODE (op), 4);
8829 /* Split one or more TImode RTL references into pairs of DImode
8830 references. The RTL can be REG, offsettable MEM, integer constant, or
8831 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8832 split and "num" is its length. lo_half and hi_half are output arrays
8833 that parallel "operands". */
8835 void
8836 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8838 while (num--)
8840 rtx op = operands[num];
8842 /* simplify_subreg refuse to split volatile memory addresses, but we
8843 still have to handle it. */
8844 if (MEM_P (op))
8846 lo_half[num] = adjust_address (op, DImode, 0);
8847 hi_half[num] = adjust_address (op, DImode, 8);
8849 else
8851 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8852 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8857 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8858 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8859 is the expression of the binary operation. The output may either be
8860 emitted here, or returned to the caller, like all output_* functions.
8862 There is no guarantee that the operands are the same mode, as they
8863 might be within FLOAT or FLOAT_EXTEND expressions. */
8865 #ifndef SYSV386_COMPAT
8866 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8867 wants to fix the assemblers because that causes incompatibility
8868 with gcc. No-one wants to fix gcc because that causes
8869 incompatibility with assemblers... You can use the option of
8870 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8871 #define SYSV386_COMPAT 1
8872 #endif
8874 const char *
8875 output_387_binary_op (rtx insn, rtx *operands)
8877 static char buf[30];
8878 const char *p;
8879 const char *ssep;
8880 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8882 #ifdef ENABLE_CHECKING
8883 /* Even if we do not want to check the inputs, this documents input
8884 constraints. Which helps in understanding the following code. */
8885 if (STACK_REG_P (operands[0])
8886 && ((REG_P (operands[1])
8887 && REGNO (operands[0]) == REGNO (operands[1])
8888 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
8889 || (REG_P (operands[2])
8890 && REGNO (operands[0]) == REGNO (operands[2])
8891 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
8892 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8893 ; /* ok */
8894 else
8895 gcc_assert (is_sse);
8896 #endif
8898 switch (GET_CODE (operands[3]))
8900 case PLUS:
8901 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8902 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8903 p = "fiadd";
8904 else
8905 p = "fadd";
8906 ssep = "add";
8907 break;
8909 case MINUS:
8910 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8911 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8912 p = "fisub";
8913 else
8914 p = "fsub";
8915 ssep = "sub";
8916 break;
8918 case MULT:
8919 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8920 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8921 p = "fimul";
8922 else
8923 p = "fmul";
8924 ssep = "mul";
8925 break;
8927 case DIV:
8928 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8929 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8930 p = "fidiv";
8931 else
8932 p = "fdiv";
8933 ssep = "div";
8934 break;
8936 default:
8937 gcc_unreachable ();
8940 if (is_sse)
8942 strcpy (buf, ssep);
8943 if (GET_MODE (operands[0]) == SFmode)
8944 strcat (buf, "ss\t{%2, %0|%0, %2}");
8945 else
8946 strcat (buf, "sd\t{%2, %0|%0, %2}");
8947 return buf;
8949 strcpy (buf, p);
8951 switch (GET_CODE (operands[3]))
8953 case MULT:
8954 case PLUS:
8955 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8957 rtx temp = operands[2];
8958 operands[2] = operands[1];
8959 operands[1] = temp;
8962 /* know operands[0] == operands[1]. */
8964 if (MEM_P (operands[2]))
8966 p = "%z2\t%2";
8967 break;
8970 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8972 if (STACK_TOP_P (operands[0]))
8973 /* How is it that we are storing to a dead operand[2]?
8974 Well, presumably operands[1] is dead too. We can't
8975 store the result to st(0) as st(0) gets popped on this
8976 instruction. Instead store to operands[2] (which I
8977 think has to be st(1)). st(1) will be popped later.
8978 gcc <= 2.8.1 didn't have this check and generated
8979 assembly code that the Unixware assembler rejected. */
8980 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8981 else
8982 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8983 break;
8986 if (STACK_TOP_P (operands[0]))
8987 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8988 else
8989 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8990 break;
8992 case MINUS:
8993 case DIV:
8994 if (MEM_P (operands[1]))
8996 p = "r%z1\t%1";
8997 break;
9000 if (MEM_P (operands[2]))
9002 p = "%z2\t%2";
9003 break;
9006 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9008 #if SYSV386_COMPAT
9009 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
9010 derived assemblers, confusingly reverse the direction of
9011 the operation for fsub{r} and fdiv{r} when the
9012 destination register is not st(0). The Intel assembler
9013 doesn't have this brain damage. Read !SYSV386_COMPAT to
9014 figure out what the hardware really does. */
9015 if (STACK_TOP_P (operands[0]))
9016 p = "{p\t%0, %2|rp\t%2, %0}";
9017 else
9018 p = "{rp\t%2, %0|p\t%0, %2}";
9019 #else
9020 if (STACK_TOP_P (operands[0]))
9021 /* As above for fmul/fadd, we can't store to st(0). */
9022 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9023 else
9024 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9025 #endif
9026 break;
9029 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
9031 #if SYSV386_COMPAT
9032 if (STACK_TOP_P (operands[0]))
9033 p = "{rp\t%0, %1|p\t%1, %0}";
9034 else
9035 p = "{p\t%1, %0|rp\t%0, %1}";
9036 #else
9037 if (STACK_TOP_P (operands[0]))
9038 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9039 else
9040 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9041 #endif
9042 break;
9045 if (STACK_TOP_P (operands[0]))
9047 if (STACK_TOP_P (operands[1]))
9048 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9049 else
9050 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9051 break;
9053 else if (STACK_TOP_P (operands[1]))
9055 #if SYSV386_COMPAT
9056 p = "{\t%1, %0|r\t%0, %1}";
9057 #else
9058 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9059 #endif
9061 else
9063 #if SYSV386_COMPAT
9064 p = "{r\t%2, %0|\t%0, %2}";
9065 #else
9066 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9067 #endif
9069 break;
9071 default:
9072 gcc_unreachable ();
9075 strcat (buf, p);
9076 return buf;
9079 /* Return needed mode for entity in optimize_mode_switching pass. */
9082 ix86_mode_needed (int entity, rtx insn)
9084 enum attr_i387_cw mode;
9086 /* The mode UNINITIALIZED is used to store control word after a
9087 function call or ASM pattern. The mode ANY specify that function
9088 has no requirements on the control word and make no changes in the
9089 bits we are interested in. */
9091 if (CALL_P (insn)
9092 || (NONJUMP_INSN_P (insn)
9093 && (asm_noperands (PATTERN (insn)) >= 0
9094 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
9095 return I387_CW_UNINITIALIZED;
9097 if (recog_memoized (insn) < 0)
9098 return I387_CW_ANY;
9100 mode = get_attr_i387_cw (insn);
9102 switch (entity)
9104 case I387_TRUNC:
9105 if (mode == I387_CW_TRUNC)
9106 return mode;
9107 break;
9109 case I387_FLOOR:
9110 if (mode == I387_CW_FLOOR)
9111 return mode;
9112 break;
9114 case I387_CEIL:
9115 if (mode == I387_CW_CEIL)
9116 return mode;
9117 break;
9119 case I387_MASK_PM:
9120 if (mode == I387_CW_MASK_PM)
9121 return mode;
9122 break;
9124 default:
9125 gcc_unreachable ();
9128 return I387_CW_ANY;
9131 /* Output code to initialize control word copies used by trunc?f?i and
9132 rounding patterns. CURRENT_MODE is set to current control word,
9133 while NEW_MODE is set to new control word. */
9135 void
9136 emit_i387_cw_initialization (int mode)
9138 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
9139 rtx new_mode;
9141 int slot;
9143 rtx reg = gen_reg_rtx (HImode);
9145 emit_insn (gen_x86_fnstcw_1 (stored_mode));
9146 emit_move_insn (reg, copy_rtx (stored_mode));
9148 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
9150 switch (mode)
9152 case I387_CW_TRUNC:
9153 /* round toward zero (truncate) */
9154 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
9155 slot = SLOT_CW_TRUNC;
9156 break;
9158 case I387_CW_FLOOR:
9159 /* round down toward -oo */
9160 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9161 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
9162 slot = SLOT_CW_FLOOR;
9163 break;
9165 case I387_CW_CEIL:
9166 /* round up toward +oo */
9167 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9168 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
9169 slot = SLOT_CW_CEIL;
9170 break;
9172 case I387_CW_MASK_PM:
9173 /* mask precision exception for nearbyint() */
9174 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9175 slot = SLOT_CW_MASK_PM;
9176 break;
9178 default:
9179 gcc_unreachable ();
9182 else
9184 switch (mode)
9186 case I387_CW_TRUNC:
9187 /* round toward zero (truncate) */
9188 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
9189 slot = SLOT_CW_TRUNC;
9190 break;
9192 case I387_CW_FLOOR:
9193 /* round down toward -oo */
9194 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
9195 slot = SLOT_CW_FLOOR;
9196 break;
9198 case I387_CW_CEIL:
9199 /* round up toward +oo */
9200 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
9201 slot = SLOT_CW_CEIL;
9202 break;
9204 case I387_CW_MASK_PM:
9205 /* mask precision exception for nearbyint() */
9206 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9207 slot = SLOT_CW_MASK_PM;
9208 break;
9210 default:
9211 gcc_unreachable ();
9215 gcc_assert (slot < MAX_386_STACK_LOCALS);
9217 new_mode = assign_386_stack_local (HImode, slot);
9218 emit_move_insn (new_mode, reg);
9221 /* Output code for INSN to convert a float to a signed int. OPERANDS
9222 are the insn operands. The output may be [HSD]Imode and the input
9223 operand may be [SDX]Fmode. */
9225 const char *
9226 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
9228 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9229 int dimode_p = GET_MODE (operands[0]) == DImode;
9230 int round_mode = get_attr_i387_cw (insn);
9232 /* Jump through a hoop or two for DImode, since the hardware has no
9233 non-popping instruction. We used to do this a different way, but
9234 that was somewhat fragile and broke with post-reload splitters. */
9235 if ((dimode_p || fisttp) && !stack_top_dies)
9236 output_asm_insn ("fld\t%y1", operands);
9238 gcc_assert (STACK_TOP_P (operands[1]));
9239 gcc_assert (MEM_P (operands[0]));
9241 if (fisttp)
9242 output_asm_insn ("fisttp%z0\t%0", operands);
9243 else
9245 if (round_mode != I387_CW_ANY)
9246 output_asm_insn ("fldcw\t%3", operands);
9247 if (stack_top_dies || dimode_p)
9248 output_asm_insn ("fistp%z0\t%0", operands);
9249 else
9250 output_asm_insn ("fist%z0\t%0", operands);
9251 if (round_mode != I387_CW_ANY)
9252 output_asm_insn ("fldcw\t%2", operands);
9255 return "";
9258 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9259 have the values zero or one, indicates the ffreep insn's operand
9260 from the OPERANDS array. */
9262 static const char *
9263 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
9265 if (TARGET_USE_FFREEP)
9266 #if HAVE_AS_IX86_FFREEP
9267 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
9268 #else
9270 static char retval[] = ".word\t0xc_df";
9271 int regno = REGNO (operands[opno]);
9273 gcc_assert (FP_REGNO_P (regno));
9275 retval[9] = '0' + (regno - FIRST_STACK_REG);
9276 return retval;
9278 #endif
9280 return opno ? "fstp\t%y1" : "fstp\t%y0";
9284 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9285 should be used. UNORDERED_P is true when fucom should be used. */
9287 const char *
9288 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
9290 int stack_top_dies;
9291 rtx cmp_op0, cmp_op1;
9292 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
9294 if (eflags_p)
9296 cmp_op0 = operands[0];
9297 cmp_op1 = operands[1];
9299 else
9301 cmp_op0 = operands[1];
9302 cmp_op1 = operands[2];
9305 if (is_sse)
9307 if (GET_MODE (operands[0]) == SFmode)
9308 if (unordered_p)
9309 return "ucomiss\t{%1, %0|%0, %1}";
9310 else
9311 return "comiss\t{%1, %0|%0, %1}";
9312 else
9313 if (unordered_p)
9314 return "ucomisd\t{%1, %0|%0, %1}";
9315 else
9316 return "comisd\t{%1, %0|%0, %1}";
9319 gcc_assert (STACK_TOP_P (cmp_op0));
9321 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9323 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
9325 if (stack_top_dies)
9327 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
9328 return output_387_ffreep (operands, 1);
9330 else
9331 return "ftst\n\tfnstsw\t%0";
9334 if (STACK_REG_P (cmp_op1)
9335 && stack_top_dies
9336 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
9337 && REGNO (cmp_op1) != FIRST_STACK_REG)
9339 /* If both the top of the 387 stack dies, and the other operand
9340 is also a stack register that dies, then this must be a
9341 `fcompp' float compare */
9343 if (eflags_p)
9345 /* There is no double popping fcomi variant. Fortunately,
9346 eflags is immune from the fstp's cc clobbering. */
9347 if (unordered_p)
9348 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
9349 else
9350 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
9351 return output_387_ffreep (operands, 0);
9353 else
9355 if (unordered_p)
9356 return "fucompp\n\tfnstsw\t%0";
9357 else
9358 return "fcompp\n\tfnstsw\t%0";
9361 else
9363 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9365 static const char * const alt[16] =
9367 "fcom%z2\t%y2\n\tfnstsw\t%0",
9368 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9369 "fucom%z2\t%y2\n\tfnstsw\t%0",
9370 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9372 "ficom%z2\t%y2\n\tfnstsw\t%0",
9373 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9374 NULL,
9375 NULL,
9377 "fcomi\t{%y1, %0|%0, %y1}",
9378 "fcomip\t{%y1, %0|%0, %y1}",
9379 "fucomi\t{%y1, %0|%0, %y1}",
9380 "fucomip\t{%y1, %0|%0, %y1}",
9382 NULL,
9383 NULL,
9384 NULL,
9385 NULL
9388 int mask;
9389 const char *ret;
9391 mask = eflags_p << 3;
9392 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
9393 mask |= unordered_p << 1;
9394 mask |= stack_top_dies;
9396 gcc_assert (mask < 16);
9397 ret = alt[mask];
9398 gcc_assert (ret);
9400 return ret;
9404 void
9405 ix86_output_addr_vec_elt (FILE *file, int value)
9407 const char *directive = ASM_LONG;
9409 #ifdef ASM_QUAD
9410 if (TARGET_64BIT)
9411 directive = ASM_QUAD;
9412 #else
9413 gcc_assert (!TARGET_64BIT);
9414 #endif
9416 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
9419 void
9420 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
9422 const char *directive = ASM_LONG;
9424 #ifdef ASM_QUAD
9425 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
9426 directive = ASM_QUAD;
9427 #else
9428 gcc_assert (!TARGET_64BIT);
9429 #endif
9430 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
9431 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
9432 fprintf (file, "%s%s%d-%s%d\n",
9433 directive, LPREFIX, value, LPREFIX, rel);
9434 else if (HAVE_AS_GOTOFF_IN_DATA)
9435 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
9436 #if TARGET_MACHO
9437 else if (TARGET_MACHO)
9439 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9440 machopic_output_function_base_name (file);
9441 fprintf(file, "\n");
9443 #endif
9444 else
9445 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9446 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
9449 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9450 for the target. */
9452 void
9453 ix86_expand_clear (rtx dest)
9455 rtx tmp;
9457 /* We play register width games, which are only valid after reload. */
9458 gcc_assert (reload_completed);
9460 /* Avoid HImode and its attendant prefix byte. */
9461 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
9462 dest = gen_rtx_REG (SImode, REGNO (dest));
9464 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
9466 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9467 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
9469 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
9470 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9473 emit_insn (tmp);
9476 /* X is an unchanging MEM. If it is a constant pool reference, return
9477 the constant pool rtx, else NULL. */
9480 maybe_get_pool_constant (rtx x)
9482 x = ix86_delegitimize_address (XEXP (x, 0));
9484 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9485 return get_pool_constant (x);
9487 return NULL_RTX;
9490 void
9491 ix86_expand_move (enum machine_mode mode, rtx operands[])
9493 int strict = (reload_in_progress || reload_completed);
9494 rtx op0, op1;
9495 enum tls_model model;
9497 op0 = operands[0];
9498 op1 = operands[1];
9500 if (GET_CODE (op1) == SYMBOL_REF)
9502 model = SYMBOL_REF_TLS_MODEL (op1);
9503 if (model)
9505 op1 = legitimize_tls_address (op1, model, true);
9506 op1 = force_operand (op1, op0);
9507 if (op1 == op0)
9508 return;
9511 else if (GET_CODE (op1) == CONST
9512 && GET_CODE (XEXP (op1, 0)) == PLUS
9513 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
9515 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
9516 if (model)
9518 rtx addend = XEXP (XEXP (op1, 0), 1);
9519 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
9520 op1 = force_operand (op1, NULL);
9521 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
9522 op0, 1, OPTAB_DIRECT);
9523 if (op1 == op0)
9524 return;
9528 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
9530 if (TARGET_MACHO && !TARGET_64BIT)
9532 #if TARGET_MACHO
9533 if (MACHOPIC_PURE)
9535 rtx temp = ((reload_in_progress
9536 || ((op0 && REG_P (op0))
9537 && mode == Pmode))
9538 ? op0 : gen_reg_rtx (Pmode));
9539 op1 = machopic_indirect_data_reference (op1, temp);
9540 op1 = machopic_legitimize_pic_address (op1, mode,
9541 temp == op1 ? 0 : temp);
9543 else if (MACHOPIC_INDIRECT)
9544 op1 = machopic_indirect_data_reference (op1, 0);
9545 if (op0 == op1)
9546 return;
9547 #endif
9549 else
9551 if (MEM_P (op0))
9552 op1 = force_reg (Pmode, op1);
9553 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
9555 rtx reg = no_new_pseudos ? op0 : NULL_RTX;
9556 op1 = legitimize_pic_address (op1, reg);
9557 if (op0 == op1)
9558 return;
9562 else
9564 if (MEM_P (op0)
9565 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9566 || !push_operand (op0, mode))
9567 && MEM_P (op1))
9568 op1 = force_reg (mode, op1);
9570 if (push_operand (op0, mode)
9571 && ! general_no_elim_operand (op1, mode))
9572 op1 = copy_to_mode_reg (mode, op1);
9574 /* Force large constants in 64bit compilation into register
9575 to get them CSEed. */
9576 if (TARGET_64BIT && mode == DImode
9577 && immediate_operand (op1, mode)
9578 && !x86_64_zext_immediate_operand (op1, VOIDmode)
9579 && !register_operand (op0, mode)
9580 && optimize && !reload_completed && !reload_in_progress)
9581 op1 = copy_to_mode_reg (mode, op1);
9583 if (FLOAT_MODE_P (mode))
9585 /* If we are loading a floating point constant to a register,
9586 force the value to memory now, since we'll get better code
9587 out the back end. */
9589 if (strict)
9591 else if (GET_CODE (op1) == CONST_DOUBLE)
9593 op1 = validize_mem (force_const_mem (mode, op1));
9594 if (!register_operand (op0, mode))
9596 rtx temp = gen_reg_rtx (mode);
9597 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9598 emit_move_insn (op0, temp);
9599 return;
9605 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9608 void
9609 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9611 rtx op0 = operands[0], op1 = operands[1];
9613 /* Force constants other than zero into memory. We do not know how
9614 the instructions used to build constants modify the upper 64 bits
9615 of the register, once we have that information we may be able
9616 to handle some of them more efficiently. */
9617 if ((reload_in_progress | reload_completed) == 0
9618 && register_operand (op0, mode)
9619 && CONSTANT_P (op1)
9620 && standard_sse_constant_p (op1) <= 0)
9621 op1 = validize_mem (force_const_mem (mode, op1));
9623 /* Make operand1 a register if it isn't already. */
9624 if (!no_new_pseudos
9625 && !register_operand (op0, mode)
9626 && !register_operand (op1, mode))
9628 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9629 return;
9632 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9635 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9636 straight to ix86_expand_vector_move. */
9637 /* Code generation for scalar reg-reg moves of single and double precision data:
9638 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
9639 movaps reg, reg
9640 else
9641 movss reg, reg
9642 if (x86_sse_partial_reg_dependency == true)
9643 movapd reg, reg
9644 else
9645 movsd reg, reg
9647 Code generation for scalar loads of double precision data:
9648 if (x86_sse_split_regs == true)
9649 movlpd mem, reg (gas syntax)
9650 else
9651 movsd mem, reg
9653 Code generation for unaligned packed loads of single precision data
9654 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
9655 if (x86_sse_unaligned_move_optimal)
9656 movups mem, reg
9658 if (x86_sse_partial_reg_dependency == true)
9660 xorps reg, reg
9661 movlps mem, reg
9662 movhps mem+8, reg
9664 else
9666 movlps mem, reg
9667 movhps mem+8, reg
9670 Code generation for unaligned packed loads of double precision data
9671 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
9672 if (x86_sse_unaligned_move_optimal)
9673 movupd mem, reg
9675 if (x86_sse_split_regs == true)
9677 movlpd mem, reg
9678 movhpd mem+8, reg
9680 else
9682 movsd mem, reg
9683 movhpd mem+8, reg
9687 void
9688 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9690 rtx op0, op1, m;
9692 op0 = operands[0];
9693 op1 = operands[1];
9695 if (MEM_P (op1))
9697 /* If we're optimizing for size, movups is the smallest. */
9698 if (optimize_size)
9700 op0 = gen_lowpart (V4SFmode, op0);
9701 op1 = gen_lowpart (V4SFmode, op1);
9702 emit_insn (gen_sse_movups (op0, op1));
9703 return;
9706 /* ??? If we have typed data, then it would appear that using
9707 movdqu is the only way to get unaligned data loaded with
9708 integer type. */
9709 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9711 op0 = gen_lowpart (V16QImode, op0);
9712 op1 = gen_lowpart (V16QImode, op1);
9713 emit_insn (gen_sse2_movdqu (op0, op1));
9714 return;
9717 if (TARGET_SSE2 && mode == V2DFmode)
9719 rtx zero;
9721 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9723 op0 = gen_lowpart (V2DFmode, op0);
9724 op1 = gen_lowpart (V2DFmode, op1);
9725 emit_insn (gen_sse2_movupd (op0, op1));
9726 return;
9729 /* When SSE registers are split into halves, we can avoid
9730 writing to the top half twice. */
9731 if (TARGET_SSE_SPLIT_REGS)
9733 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9734 zero = op0;
9736 else
9738 /* ??? Not sure about the best option for the Intel chips.
9739 The following would seem to satisfy; the register is
9740 entirely cleared, breaking the dependency chain. We
9741 then store to the upper half, with a dependency depth
9742 of one. A rumor has it that Intel recommends two movsd
9743 followed by an unpacklpd, but this is unconfirmed. And
9744 given that the dependency depth of the unpacklpd would
9745 still be one, I'm not sure why this would be better. */
9746 zero = CONST0_RTX (V2DFmode);
9749 m = adjust_address (op1, DFmode, 0);
9750 emit_insn (gen_sse2_loadlpd (op0, zero, m));
9751 m = adjust_address (op1, DFmode, 8);
9752 emit_insn (gen_sse2_loadhpd (op0, op0, m));
9754 else
9756 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9758 op0 = gen_lowpart (V4SFmode, op0);
9759 op1 = gen_lowpart (V4SFmode, op1);
9760 emit_insn (gen_sse_movups (op0, op1));
9761 return;
9764 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9765 emit_move_insn (op0, CONST0_RTX (mode));
9766 else
9767 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9769 if (mode != V4SFmode)
9770 op0 = gen_lowpart (V4SFmode, op0);
9771 m = adjust_address (op1, V2SFmode, 0);
9772 emit_insn (gen_sse_loadlps (op0, op0, m));
9773 m = adjust_address (op1, V2SFmode, 8);
9774 emit_insn (gen_sse_loadhps (op0, op0, m));
9777 else if (MEM_P (op0))
9779 /* If we're optimizing for size, movups is the smallest. */
9780 if (optimize_size)
9782 op0 = gen_lowpart (V4SFmode, op0);
9783 op1 = gen_lowpart (V4SFmode, op1);
9784 emit_insn (gen_sse_movups (op0, op1));
9785 return;
9788 /* ??? Similar to above, only less clear because of quote
9789 typeless stores unquote. */
9790 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9791 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9793 op0 = gen_lowpart (V16QImode, op0);
9794 op1 = gen_lowpart (V16QImode, op1);
9795 emit_insn (gen_sse2_movdqu (op0, op1));
9796 return;
9799 if (TARGET_SSE2 && mode == V2DFmode)
9801 m = adjust_address (op0, DFmode, 0);
9802 emit_insn (gen_sse2_storelpd (m, op1));
9803 m = adjust_address (op0, DFmode, 8);
9804 emit_insn (gen_sse2_storehpd (m, op1));
9806 else
9808 if (mode != V4SFmode)
9809 op1 = gen_lowpart (V4SFmode, op1);
9810 m = adjust_address (op0, V2SFmode, 0);
9811 emit_insn (gen_sse_storelps (m, op1));
9812 m = adjust_address (op0, V2SFmode, 8);
9813 emit_insn (gen_sse_storehps (m, op1));
9816 else
9817 gcc_unreachable ();
9820 /* Expand a push in MODE. This is some mode for which we do not support
9821 proper push instructions, at least from the registers that we expect
9822 the value to live in. */
9824 void
9825 ix86_expand_push (enum machine_mode mode, rtx x)
9827 rtx tmp;
9829 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9830 GEN_INT (-GET_MODE_SIZE (mode)),
9831 stack_pointer_rtx, 1, OPTAB_DIRECT);
9832 if (tmp != stack_pointer_rtx)
9833 emit_move_insn (stack_pointer_rtx, tmp);
9835 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9836 emit_move_insn (tmp, x);
9839 /* Helper function of ix86_fixup_binary_operands to canonicalize
9840 operand order. Returns true if the operands should be swapped. */
9842 static bool
9843 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
9844 rtx operands[])
9846 rtx dst = operands[0];
9847 rtx src1 = operands[1];
9848 rtx src2 = operands[2];
9850 /* If the operation is not commutative, we can't do anything. */
9851 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9852 return false;
9854 /* Highest priority is that src1 should match dst. */
9855 if (rtx_equal_p (dst, src1))
9856 return false;
9857 if (rtx_equal_p (dst, src2))
9858 return true;
9860 /* Next highest priority is that immediate constants come second. */
9861 if (immediate_operand (src2, mode))
9862 return false;
9863 if (immediate_operand (src1, mode))
9864 return true;
9866 /* Lowest priority is that memory references should come second. */
9867 if (MEM_P (src2))
9868 return false;
9869 if (MEM_P (src1))
9870 return true;
9872 return false;
9876 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9877 destination to use for the operation. If different from the true
9878 destination in operands[0], a copy operation will be required. */
9881 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
9882 rtx operands[])
9884 rtx dst = operands[0];
9885 rtx src1 = operands[1];
9886 rtx src2 = operands[2];
9888 /* Canonicalize operand order. */
9889 if (ix86_swap_binary_operands_p (code, mode, operands))
9891 rtx temp = src1;
9892 src1 = src2;
9893 src2 = temp;
9896 /* Both source operands cannot be in memory. */
9897 if (MEM_P (src1) && MEM_P (src2))
9899 /* Optimization: Only read from memory once. */
9900 if (rtx_equal_p (src1, src2))
9902 src2 = force_reg (mode, src2);
9903 src1 = src2;
9905 else
9906 src2 = force_reg (mode, src2);
9909 /* If the destination is memory, and we do not have matching source
9910 operands, do things in registers. */
9911 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
9912 dst = gen_reg_rtx (mode);
9914 /* Source 1 cannot be a constant. */
9915 if (CONSTANT_P (src1))
9916 src1 = force_reg (mode, src1);
9918 /* Source 1 cannot be a non-matching memory. */
9919 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
9920 src1 = force_reg (mode, src1);
9922 operands[1] = src1;
9923 operands[2] = src2;
9924 return dst;
9927 /* Similarly, but assume that the destination has already been
9928 set up properly. */
9930 void
9931 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9932 enum machine_mode mode, rtx operands[])
9934 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9935 gcc_assert (dst == operands[0]);
9938 /* Attempt to expand a binary operator. Make the expansion closer to the
9939 actual machine, then just general_operand, which will allow 3 separate
9940 memory references (one output, two input) in a single insn. */
9942 void
9943 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9944 rtx operands[])
9946 rtx src1, src2, dst, op, clob;
9948 dst = ix86_fixup_binary_operands (code, mode, operands);
9949 src1 = operands[1];
9950 src2 = operands[2];
9952 /* Emit the instruction. */
9954 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9955 if (reload_in_progress)
9957 /* Reload doesn't know about the flags register, and doesn't know that
9958 it doesn't want to clobber it. We can only do this with PLUS. */
9959 gcc_assert (code == PLUS);
9960 emit_insn (op);
9962 else
9964 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9965 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9968 /* Fix up the destination if needed. */
9969 if (dst != operands[0])
9970 emit_move_insn (operands[0], dst);
9973 /* Return TRUE or FALSE depending on whether the binary operator meets the
9974 appropriate constraints. */
9977 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
9978 rtx operands[3])
9980 rtx dst = operands[0];
9981 rtx src1 = operands[1];
9982 rtx src2 = operands[2];
9984 /* Both source operands cannot be in memory. */
9985 if (MEM_P (src1) && MEM_P (src2))
9986 return 0;
9988 /* Canonicalize operand order for commutative operators. */
9989 if (ix86_swap_binary_operands_p (code, mode, operands))
9991 rtx temp = src1;
9992 src1 = src2;
9993 src2 = temp;
9996 /* If the destination is memory, we must have a matching source operand. */
9997 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
9998 return 0;
10000 /* Source 1 cannot be a constant. */
10001 if (CONSTANT_P (src1))
10002 return 0;
10004 /* Source 1 cannot be a non-matching memory. */
10005 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10006 return 0;
10008 return 1;
10011 /* Attempt to expand a unary operator. Make the expansion closer to the
10012 actual machine, then just general_operand, which will allow 2 separate
10013 memory references (one output, one input) in a single insn. */
10015 void
10016 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
10017 rtx operands[])
10019 int matching_memory;
10020 rtx src, dst, op, clob;
10022 dst = operands[0];
10023 src = operands[1];
10025 /* If the destination is memory, and we do not have matching source
10026 operands, do things in registers. */
10027 matching_memory = 0;
10028 if (MEM_P (dst))
10030 if (rtx_equal_p (dst, src))
10031 matching_memory = 1;
10032 else
10033 dst = gen_reg_rtx (mode);
10036 /* When source operand is memory, destination must match. */
10037 if (MEM_P (src) && !matching_memory)
10038 src = force_reg (mode, src);
10040 /* Emit the instruction. */
10042 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
10043 if (reload_in_progress || code == NOT)
10045 /* Reload doesn't know about the flags register, and doesn't know that
10046 it doesn't want to clobber it. */
10047 gcc_assert (code == NOT);
10048 emit_insn (op);
10050 else
10052 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10053 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10056 /* Fix up the destination if needed. */
10057 if (dst != operands[0])
10058 emit_move_insn (operands[0], dst);
10061 /* Return TRUE or FALSE depending on whether the unary operator meets the
10062 appropriate constraints. */
10065 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
10066 enum machine_mode mode ATTRIBUTE_UNUSED,
10067 rtx operands[2] ATTRIBUTE_UNUSED)
10069 /* If one of operands is memory, source and destination must match. */
10070 if ((MEM_P (operands[0])
10071 || MEM_P (operands[1]))
10072 && ! rtx_equal_p (operands[0], operands[1]))
10073 return FALSE;
10074 return TRUE;
10077 /* Post-reload splitter for converting an SF or DFmode value in an
10078 SSE register into an unsigned SImode. */
10080 void
10081 ix86_split_convert_uns_si_sse (rtx operands[])
10083 enum machine_mode vecmode;
10084 rtx value, large, zero_or_two31, input, two31, x;
10086 large = operands[1];
10087 zero_or_two31 = operands[2];
10088 input = operands[3];
10089 two31 = operands[4];
10090 vecmode = GET_MODE (large);
10091 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
10093 /* Load up the value into the low element. We must ensure that the other
10094 elements are valid floats -- zero is the easiest such value. */
10095 if (MEM_P (input))
10097 if (vecmode == V4SFmode)
10098 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
10099 else
10100 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
10102 else
10104 input = gen_rtx_REG (vecmode, REGNO (input));
10105 emit_move_insn (value, CONST0_RTX (vecmode));
10106 if (vecmode == V4SFmode)
10107 emit_insn (gen_sse_movss (value, value, input));
10108 else
10109 emit_insn (gen_sse2_movsd (value, value, input));
10112 emit_move_insn (large, two31);
10113 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
10115 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
10116 emit_insn (gen_rtx_SET (VOIDmode, large, x));
10118 x = gen_rtx_AND (vecmode, zero_or_two31, large);
10119 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
10121 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
10122 emit_insn (gen_rtx_SET (VOIDmode, value, x));
10124 large = gen_rtx_REG (V4SImode, REGNO (large));
10125 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
10127 x = gen_rtx_REG (V4SImode, REGNO (value));
10128 if (vecmode == V4SFmode)
10129 emit_insn (gen_sse2_cvttps2dq (x, value));
10130 else
10131 emit_insn (gen_sse2_cvttpd2dq (x, value));
10132 value = x;
10134 emit_insn (gen_xorv4si3 (value, value, large));
10137 /* Convert an unsigned DImode value into a DFmode, using only SSE.
10138 Expects the 64-bit DImode to be supplied in a pair of integral
10139 registers. Requires SSE2; will use SSE3 if available. For x86_32,
10140 -mfpmath=sse, !optimize_size only. */
10142 void
10143 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
10145 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
10146 rtx int_xmm, fp_xmm;
10147 rtx biases, exponents;
10148 rtx x;
10150 int_xmm = gen_reg_rtx (V4SImode);
10151 if (TARGET_INTER_UNIT_MOVES)
10152 emit_insn (gen_movdi_to_sse (int_xmm, input));
10153 else if (TARGET_SSE_SPLIT_REGS)
10155 emit_insn (gen_rtx_CLOBBER (VOIDmode, int_xmm));
10156 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
10158 else
10160 x = gen_reg_rtx (V2DImode);
10161 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
10162 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
10165 x = gen_rtx_CONST_VECTOR (V4SImode,
10166 gen_rtvec (4, GEN_INT (0x43300000UL),
10167 GEN_INT (0x45300000UL),
10168 const0_rtx, const0_rtx));
10169 exponents = validize_mem (force_const_mem (V4SImode, x));
10171 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10172 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
10174 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10175 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10176 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10177 (0x1.0p84 + double(fp_value_hi_xmm)).
10178 Note these exponents differ by 32. */
10180 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
10182 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
10183 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10184 real_ldexp (&bias_lo_rvt, &dconst1, 52);
10185 real_ldexp (&bias_hi_rvt, &dconst1, 84);
10186 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
10187 x = const_double_from_real_value (bias_hi_rvt, DFmode);
10188 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
10189 biases = validize_mem (force_const_mem (V2DFmode, biases));
10190 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
10192 /* Add the upper and lower DFmode values together. */
10193 if (TARGET_SSE3)
10194 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
10195 else
10197 x = copy_to_mode_reg (V2DFmode, fp_xmm);
10198 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
10199 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
10202 ix86_expand_vector_extract (false, target, fp_xmm, 0);
10205 /* Convert an unsigned SImode value into a DFmode. Only currently used
10206 for SSE, but applicable anywhere. */
10208 void
10209 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
10211 REAL_VALUE_TYPE TWO31r;
10212 rtx x, fp;
10214 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
10215 NULL, 1, OPTAB_DIRECT);
10217 fp = gen_reg_rtx (DFmode);
10218 emit_insn (gen_floatsidf2 (fp, x));
10220 real_ldexp (&TWO31r, &dconst1, 31);
10221 x = const_double_from_real_value (TWO31r, DFmode);
10223 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
10224 if (x != target)
10225 emit_move_insn (target, x);
10228 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10229 32-bit mode; otherwise we have a direct convert instruction. */
10231 void
10232 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
10234 REAL_VALUE_TYPE TWO32r;
10235 rtx fp_lo, fp_hi, x;
10237 fp_lo = gen_reg_rtx (DFmode);
10238 fp_hi = gen_reg_rtx (DFmode);
10240 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
10242 real_ldexp (&TWO32r, &dconst1, 32);
10243 x = const_double_from_real_value (TWO32r, DFmode);
10244 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
10246 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
10248 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
10249 0, OPTAB_DIRECT);
10250 if (x != target)
10251 emit_move_insn (target, x);
10254 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10255 For x86_32, -mfpmath=sse, !optimize_size only. */
10256 void
10257 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
10259 REAL_VALUE_TYPE ONE16r;
10260 rtx fp_hi, fp_lo, int_hi, int_lo, x;
10262 real_ldexp (&ONE16r, &dconst1, 16);
10263 x = const_double_from_real_value (ONE16r, SFmode);
10264 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
10265 NULL, 0, OPTAB_DIRECT);
10266 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
10267 NULL, 0, OPTAB_DIRECT);
10268 fp_hi = gen_reg_rtx (SFmode);
10269 fp_lo = gen_reg_rtx (SFmode);
10270 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
10271 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
10272 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
10273 0, OPTAB_DIRECT);
10274 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
10275 0, OPTAB_DIRECT);
10276 if (!rtx_equal_p (target, fp_hi))
10277 emit_move_insn (target, fp_hi);
10280 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10281 then replicate the value for all elements of the vector
10282 register. */
10285 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
10287 rtvec v;
10288 switch (mode)
10290 case SFmode:
10291 if (vect)
10292 v = gen_rtvec (4, value, value, value, value);
10293 else
10294 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
10295 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10296 return gen_rtx_CONST_VECTOR (V4SFmode, v);
10298 case DFmode:
10299 if (vect)
10300 v = gen_rtvec (2, value, value);
10301 else
10302 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
10303 return gen_rtx_CONST_VECTOR (V2DFmode, v);
10305 default:
10306 gcc_unreachable ();
10310 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
10311 Create a mask for the sign bit in MODE for an SSE register. If VECT is
10312 true, then replicate the mask for all elements of the vector register.
10313 If INVERT is true, then create a mask excluding the sign bit. */
10316 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
10318 enum machine_mode vec_mode;
10319 HOST_WIDE_INT hi, lo;
10320 int shift = 63;
10321 rtx v;
10322 rtx mask;
10324 /* Find the sign bit, sign extended to 2*HWI. */
10325 if (mode == SFmode)
10326 lo = 0x80000000, hi = lo < 0;
10327 else if (HOST_BITS_PER_WIDE_INT >= 64)
10328 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
10329 else
10330 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
10332 if (invert)
10333 lo = ~lo, hi = ~hi;
10335 /* Force this value into the low part of a fp vector constant. */
10336 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
10337 mask = gen_lowpart (mode, mask);
10339 v = ix86_build_const_vector (mode, vect, mask);
10340 vec_mode = (mode == SFmode) ? V4SFmode : V2DFmode;
10341 return force_reg (vec_mode, v);
10344 /* Generate code for floating point ABS or NEG. */
10346 void
10347 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
10348 rtx operands[])
10350 rtx mask, set, use, clob, dst, src;
10351 bool matching_memory;
10352 bool use_sse = false;
10353 bool vector_mode = VECTOR_MODE_P (mode);
10354 enum machine_mode elt_mode = mode;
10356 if (vector_mode)
10358 elt_mode = GET_MODE_INNER (mode);
10359 use_sse = true;
10361 else if (TARGET_SSE_MATH)
10362 use_sse = SSE_FLOAT_MODE_P (mode);
10364 /* NEG and ABS performed with SSE use bitwise mask operations.
10365 Create the appropriate mask now. */
10366 if (use_sse)
10367 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
10368 else
10369 mask = NULL_RTX;
10371 dst = operands[0];
10372 src = operands[1];
10374 /* If the destination is memory, and we don't have matching source
10375 operands or we're using the x87, do things in registers. */
10376 matching_memory = false;
10377 if (MEM_P (dst))
10379 if (use_sse && rtx_equal_p (dst, src))
10380 matching_memory = true;
10381 else
10382 dst = gen_reg_rtx (mode);
10384 if (MEM_P (src) && !matching_memory)
10385 src = force_reg (mode, src);
10387 if (vector_mode)
10389 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
10390 set = gen_rtx_SET (VOIDmode, dst, set);
10391 emit_insn (set);
10393 else
10395 set = gen_rtx_fmt_e (code, mode, src);
10396 set = gen_rtx_SET (VOIDmode, dst, set);
10397 if (mask)
10399 use = gen_rtx_USE (VOIDmode, mask);
10400 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10401 emit_insn (gen_rtx_PARALLEL (VOIDmode,
10402 gen_rtvec (3, set, use, clob)));
10404 else
10405 emit_insn (set);
10408 if (dst != operands[0])
10409 emit_move_insn (operands[0], dst);
10412 /* Expand a copysign operation. Special case operand 0 being a constant. */
10414 void
10415 ix86_expand_copysign (rtx operands[])
10417 enum machine_mode mode, vmode;
10418 rtx dest, op0, op1, mask, nmask;
10420 dest = operands[0];
10421 op0 = operands[1];
10422 op1 = operands[2];
10424 mode = GET_MODE (dest);
10425 vmode = mode == SFmode ? V4SFmode : V2DFmode;
10427 if (GET_CODE (op0) == CONST_DOUBLE)
10429 rtvec v;
10431 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
10432 op0 = simplify_unary_operation (ABS, mode, op0, mode);
10434 if (op0 == CONST0_RTX (mode))
10435 op0 = CONST0_RTX (vmode);
10436 else
10438 if (mode == SFmode)
10439 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
10440 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10441 else
10442 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
10443 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
10446 mask = ix86_build_signbit_mask (mode, 0, 0);
10448 if (mode == SFmode)
10449 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
10450 else
10451 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
10453 else
10455 nmask = ix86_build_signbit_mask (mode, 0, 1);
10456 mask = ix86_build_signbit_mask (mode, 0, 0);
10458 if (mode == SFmode)
10459 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
10460 else
10461 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
10465 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
10466 be a constant, and so has already been expanded into a vector constant. */
10468 void
10469 ix86_split_copysign_const (rtx operands[])
10471 enum machine_mode mode, vmode;
10472 rtx dest, op0, op1, mask, x;
10474 dest = operands[0];
10475 op0 = operands[1];
10476 op1 = operands[2];
10477 mask = operands[3];
10479 mode = GET_MODE (dest);
10480 vmode = GET_MODE (mask);
10482 dest = simplify_gen_subreg (vmode, dest, mode, 0);
10483 x = gen_rtx_AND (vmode, dest, mask);
10484 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10486 if (op0 != CONST0_RTX (vmode))
10488 x = gen_rtx_IOR (vmode, dest, op0);
10489 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10493 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
10494 so we have to do two masks. */
10496 void
10497 ix86_split_copysign_var (rtx operands[])
10499 enum machine_mode mode, vmode;
10500 rtx dest, scratch, op0, op1, mask, nmask, x;
10502 dest = operands[0];
10503 scratch = operands[1];
10504 op0 = operands[2];
10505 op1 = operands[3];
10506 nmask = operands[4];
10507 mask = operands[5];
10509 mode = GET_MODE (dest);
10510 vmode = GET_MODE (mask);
10512 if (rtx_equal_p (op0, op1))
10514 /* Shouldn't happen often (it's useless, obviously), but when it does
10515 we'd generate incorrect code if we continue below. */
10516 emit_move_insn (dest, op0);
10517 return;
10520 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
10522 gcc_assert (REGNO (op1) == REGNO (scratch));
10524 x = gen_rtx_AND (vmode, scratch, mask);
10525 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10527 dest = mask;
10528 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10529 x = gen_rtx_NOT (vmode, dest);
10530 x = gen_rtx_AND (vmode, x, op0);
10531 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10533 else
10535 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
10537 x = gen_rtx_AND (vmode, scratch, mask);
10539 else /* alternative 2,4 */
10541 gcc_assert (REGNO (mask) == REGNO (scratch));
10542 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
10543 x = gen_rtx_AND (vmode, scratch, op1);
10545 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10547 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
10549 dest = simplify_gen_subreg (vmode, op0, mode, 0);
10550 x = gen_rtx_AND (vmode, dest, nmask);
10552 else /* alternative 3,4 */
10554 gcc_assert (REGNO (nmask) == REGNO (dest));
10555 dest = nmask;
10556 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10557 x = gen_rtx_AND (vmode, dest, op0);
10559 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10562 x = gen_rtx_IOR (vmode, dest, scratch);
10563 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10566 /* Return TRUE or FALSE depending on whether the first SET in INSN
10567 has source and destination with matching CC modes, and that the
10568 CC mode is at least as constrained as REQ_MODE. */
10571 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
10573 rtx set;
10574 enum machine_mode set_mode;
10576 set = PATTERN (insn);
10577 if (GET_CODE (set) == PARALLEL)
10578 set = XVECEXP (set, 0, 0);
10579 gcc_assert (GET_CODE (set) == SET);
10580 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
10582 set_mode = GET_MODE (SET_DEST (set));
10583 switch (set_mode)
10585 case CCNOmode:
10586 if (req_mode != CCNOmode
10587 && (req_mode != CCmode
10588 || XEXP (SET_SRC (set), 1) != const0_rtx))
10589 return 0;
10590 break;
10591 case CCmode:
10592 if (req_mode == CCGCmode)
10593 return 0;
10594 /* FALLTHRU */
10595 case CCGCmode:
10596 if (req_mode == CCGOCmode || req_mode == CCNOmode)
10597 return 0;
10598 /* FALLTHRU */
10599 case CCGOCmode:
10600 if (req_mode == CCZmode)
10601 return 0;
10602 /* FALLTHRU */
10603 case CCZmode:
10604 break;
10606 default:
10607 gcc_unreachable ();
10610 return (GET_MODE (SET_SRC (set)) == set_mode);
10613 /* Generate insn patterns to do an integer compare of OPERANDS. */
10615 static rtx
10616 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
10618 enum machine_mode cmpmode;
10619 rtx tmp, flags;
10621 cmpmode = SELECT_CC_MODE (code, op0, op1);
10622 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
10624 /* This is very simple, but making the interface the same as in the
10625 FP case makes the rest of the code easier. */
10626 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
10627 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
10629 /* Return the test that should be put into the flags user, i.e.
10630 the bcc, scc, or cmov instruction. */
10631 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
10634 /* Figure out whether to use ordered or unordered fp comparisons.
10635 Return the appropriate mode to use. */
10637 enum machine_mode
10638 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
10640 /* ??? In order to make all comparisons reversible, we do all comparisons
10641 non-trapping when compiling for IEEE. Once gcc is able to distinguish
10642 all forms trapping and nontrapping comparisons, we can make inequality
10643 comparisons trapping again, since it results in better code when using
10644 FCOM based compares. */
10645 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
10648 enum machine_mode
10649 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
10651 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10652 return ix86_fp_compare_mode (code);
10653 switch (code)
10655 /* Only zero flag is needed. */
10656 case EQ: /* ZF=0 */
10657 case NE: /* ZF!=0 */
10658 return CCZmode;
10659 /* Codes needing carry flag. */
10660 case GEU: /* CF=0 */
10661 case GTU: /* CF=0 & ZF=0 */
10662 case LTU: /* CF=1 */
10663 case LEU: /* CF=1 | ZF=1 */
10664 return CCmode;
10665 /* Codes possibly doable only with sign flag when
10666 comparing against zero. */
10667 case GE: /* SF=OF or SF=0 */
10668 case LT: /* SF<>OF or SF=1 */
10669 if (op1 == const0_rtx)
10670 return CCGOCmode;
10671 else
10672 /* For other cases Carry flag is not required. */
10673 return CCGCmode;
10674 /* Codes doable only with sign flag when comparing
10675 against zero, but we miss jump instruction for it
10676 so we need to use relational tests against overflow
10677 that thus needs to be zero. */
10678 case GT: /* ZF=0 & SF=OF */
10679 case LE: /* ZF=1 | SF<>OF */
10680 if (op1 == const0_rtx)
10681 return CCNOmode;
10682 else
10683 return CCGCmode;
10684 /* strcmp pattern do (use flags) and combine may ask us for proper
10685 mode. */
10686 case USE:
10687 return CCmode;
10688 default:
10689 gcc_unreachable ();
10693 /* Return the fixed registers used for condition codes. */
10695 static bool
10696 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
10698 *p1 = FLAGS_REG;
10699 *p2 = FPSR_REG;
10700 return true;
10703 /* If two condition code modes are compatible, return a condition code
10704 mode which is compatible with both. Otherwise, return
10705 VOIDmode. */
10707 static enum machine_mode
10708 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
10710 if (m1 == m2)
10711 return m1;
10713 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
10714 return VOIDmode;
10716 if ((m1 == CCGCmode && m2 == CCGOCmode)
10717 || (m1 == CCGOCmode && m2 == CCGCmode))
10718 return CCGCmode;
10720 switch (m1)
10722 default:
10723 gcc_unreachable ();
10725 case CCmode:
10726 case CCGCmode:
10727 case CCGOCmode:
10728 case CCNOmode:
10729 case CCZmode:
10730 switch (m2)
10732 default:
10733 return VOIDmode;
10735 case CCmode:
10736 case CCGCmode:
10737 case CCGOCmode:
10738 case CCNOmode:
10739 case CCZmode:
10740 return CCmode;
10743 case CCFPmode:
10744 case CCFPUmode:
10745 /* These are only compatible with themselves, which we already
10746 checked above. */
10747 return VOIDmode;
10751 /* Return true if we should use an FCOMI instruction for this fp comparison. */
10754 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
10756 enum rtx_code swapped_code = swap_condition (code);
10757 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
10758 || (ix86_fp_comparison_cost (swapped_code)
10759 == ix86_fp_comparison_fcomi_cost (swapped_code)));
10762 /* Swap, force into registers, or otherwise massage the two operands
10763 to a fp comparison. The operands are updated in place; the new
10764 comparison code is returned. */
10766 static enum rtx_code
10767 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
10769 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
10770 rtx op0 = *pop0, op1 = *pop1;
10771 enum machine_mode op_mode = GET_MODE (op0);
10772 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
10774 /* All of the unordered compare instructions only work on registers.
10775 The same is true of the fcomi compare instructions. The XFmode
10776 compare instructions require registers except when comparing
10777 against zero or when converting operand 1 from fixed point to
10778 floating point. */
10780 if (!is_sse
10781 && (fpcmp_mode == CCFPUmode
10782 || (op_mode == XFmode
10783 && ! (standard_80387_constant_p (op0) == 1
10784 || standard_80387_constant_p (op1) == 1)
10785 && GET_CODE (op1) != FLOAT)
10786 || ix86_use_fcomi_compare (code)))
10788 op0 = force_reg (op_mode, op0);
10789 op1 = force_reg (op_mode, op1);
10791 else
10793 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
10794 things around if they appear profitable, otherwise force op0
10795 into a register. */
10797 if (standard_80387_constant_p (op0) == 0
10798 || (MEM_P (op0)
10799 && ! (standard_80387_constant_p (op1) == 0
10800 || MEM_P (op1))))
10802 rtx tmp;
10803 tmp = op0, op0 = op1, op1 = tmp;
10804 code = swap_condition (code);
10807 if (!REG_P (op0))
10808 op0 = force_reg (op_mode, op0);
10810 if (CONSTANT_P (op1))
10812 int tmp = standard_80387_constant_p (op1);
10813 if (tmp == 0)
10814 op1 = validize_mem (force_const_mem (op_mode, op1));
10815 else if (tmp == 1)
10817 if (TARGET_CMOVE)
10818 op1 = force_reg (op_mode, op1);
10820 else
10821 op1 = force_reg (op_mode, op1);
10825 /* Try to rearrange the comparison to make it cheaper. */
10826 if (ix86_fp_comparison_cost (code)
10827 > ix86_fp_comparison_cost (swap_condition (code))
10828 && (REG_P (op1) || !no_new_pseudos))
10830 rtx tmp;
10831 tmp = op0, op0 = op1, op1 = tmp;
10832 code = swap_condition (code);
10833 if (!REG_P (op0))
10834 op0 = force_reg (op_mode, op0);
10837 *pop0 = op0;
10838 *pop1 = op1;
10839 return code;
10842 /* Convert comparison codes we use to represent FP comparison to integer
10843 code that will result in proper branch. Return UNKNOWN if no such code
10844 is available. */
10846 enum rtx_code
10847 ix86_fp_compare_code_to_integer (enum rtx_code code)
10849 switch (code)
10851 case GT:
10852 return GTU;
10853 case GE:
10854 return GEU;
10855 case ORDERED:
10856 case UNORDERED:
10857 return code;
10858 break;
10859 case UNEQ:
10860 return EQ;
10861 break;
10862 case UNLT:
10863 return LTU;
10864 break;
10865 case UNLE:
10866 return LEU;
10867 break;
10868 case LTGT:
10869 return NE;
10870 break;
10871 default:
10872 return UNKNOWN;
10876 /* Split comparison code CODE into comparisons we can do using branch
10877 instructions. BYPASS_CODE is comparison code for branch that will
10878 branch around FIRST_CODE and SECOND_CODE. If some of branches
10879 is not required, set value to UNKNOWN.
10880 We never require more than two branches. */
10882 void
10883 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
10884 enum rtx_code *first_code,
10885 enum rtx_code *second_code)
10887 *first_code = code;
10888 *bypass_code = UNKNOWN;
10889 *second_code = UNKNOWN;
10891 /* The fcomi comparison sets flags as follows:
10893 cmp ZF PF CF
10894 > 0 0 0
10895 < 0 0 1
10896 = 1 0 0
10897 un 1 1 1 */
10899 switch (code)
10901 case GT: /* GTU - CF=0 & ZF=0 */
10902 case GE: /* GEU - CF=0 */
10903 case ORDERED: /* PF=0 */
10904 case UNORDERED: /* PF=1 */
10905 case UNEQ: /* EQ - ZF=1 */
10906 case UNLT: /* LTU - CF=1 */
10907 case UNLE: /* LEU - CF=1 | ZF=1 */
10908 case LTGT: /* EQ - ZF=0 */
10909 break;
10910 case LT: /* LTU - CF=1 - fails on unordered */
10911 *first_code = UNLT;
10912 *bypass_code = UNORDERED;
10913 break;
10914 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
10915 *first_code = UNLE;
10916 *bypass_code = UNORDERED;
10917 break;
10918 case EQ: /* EQ - ZF=1 - fails on unordered */
10919 *first_code = UNEQ;
10920 *bypass_code = UNORDERED;
10921 break;
10922 case NE: /* NE - ZF=0 - fails on unordered */
10923 *first_code = LTGT;
10924 *second_code = UNORDERED;
10925 break;
10926 case UNGE: /* GEU - CF=0 - fails on unordered */
10927 *first_code = GE;
10928 *second_code = UNORDERED;
10929 break;
10930 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
10931 *first_code = GT;
10932 *second_code = UNORDERED;
10933 break;
10934 default:
10935 gcc_unreachable ();
10937 if (!TARGET_IEEE_FP)
10939 *second_code = UNKNOWN;
10940 *bypass_code = UNKNOWN;
10944 /* Return cost of comparison done fcom + arithmetics operations on AX.
10945 All following functions do use number of instructions as a cost metrics.
10946 In future this should be tweaked to compute bytes for optimize_size and
10947 take into account performance of various instructions on various CPUs. */
10948 static int
10949 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10951 if (!TARGET_IEEE_FP)
10952 return 4;
10953 /* The cost of code output by ix86_expand_fp_compare. */
10954 switch (code)
10956 case UNLE:
10957 case UNLT:
10958 case LTGT:
10959 case GT:
10960 case GE:
10961 case UNORDERED:
10962 case ORDERED:
10963 case UNEQ:
10964 return 4;
10965 break;
10966 case LT:
10967 case NE:
10968 case EQ:
10969 case UNGE:
10970 return 5;
10971 break;
10972 case LE:
10973 case UNGT:
10974 return 6;
10975 break;
10976 default:
10977 gcc_unreachable ();
10981 /* Return cost of comparison done using fcomi operation.
10982 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10983 static int
10984 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10986 enum rtx_code bypass_code, first_code, second_code;
10987 /* Return arbitrarily high cost when instruction is not supported - this
10988 prevents gcc from using it. */
10989 if (!TARGET_CMOVE)
10990 return 1024;
10991 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10992 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10995 /* Return cost of comparison done using sahf operation.
10996 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10997 static int
10998 ix86_fp_comparison_sahf_cost (enum rtx_code code)
11000 enum rtx_code bypass_code, first_code, second_code;
11001 /* Return arbitrarily high cost when instruction is not preferred - this
11002 avoids gcc from using it. */
11003 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_size)))
11004 return 1024;
11005 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11006 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
11009 /* Compute cost of the comparison done using any method.
11010 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11011 static int
11012 ix86_fp_comparison_cost (enum rtx_code code)
11014 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
11015 int min;
11017 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
11018 sahf_cost = ix86_fp_comparison_sahf_cost (code);
11020 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
11021 if (min > sahf_cost)
11022 min = sahf_cost;
11023 if (min > fcomi_cost)
11024 min = fcomi_cost;
11025 return min;
11028 /* Generate insn patterns to do a floating point compare of OPERANDS. */
11030 static rtx
11031 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
11032 rtx *second_test, rtx *bypass_test)
11034 enum machine_mode fpcmp_mode, intcmp_mode;
11035 rtx tmp, tmp2;
11036 int cost = ix86_fp_comparison_cost (code);
11037 enum rtx_code bypass_code, first_code, second_code;
11039 fpcmp_mode = ix86_fp_compare_mode (code);
11040 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
11042 if (second_test)
11043 *second_test = NULL_RTX;
11044 if (bypass_test)
11045 *bypass_test = NULL_RTX;
11047 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11049 /* Do fcomi/sahf based test when profitable. */
11050 if ((TARGET_CMOVE || TARGET_SAHF)
11051 && (bypass_code == UNKNOWN || bypass_test)
11052 && (second_code == UNKNOWN || second_test)
11053 && ix86_fp_comparison_arithmetics_cost (code) > cost)
11055 if (TARGET_CMOVE)
11057 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11058 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
11059 tmp);
11060 emit_insn (tmp);
11062 else
11064 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11065 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11066 if (!scratch)
11067 scratch = gen_reg_rtx (HImode);
11068 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11069 emit_insn (gen_x86_sahf_1 (scratch));
11072 /* The FP codes work out to act like unsigned. */
11073 intcmp_mode = fpcmp_mode;
11074 code = first_code;
11075 if (bypass_code != UNKNOWN)
11076 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
11077 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11078 const0_rtx);
11079 if (second_code != UNKNOWN)
11080 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
11081 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11082 const0_rtx);
11084 else
11086 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
11087 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11088 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11089 if (!scratch)
11090 scratch = gen_reg_rtx (HImode);
11091 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11093 /* In the unordered case, we have to check C2 for NaN's, which
11094 doesn't happen to work out to anything nice combination-wise.
11095 So do some bit twiddling on the value we've got in AH to come
11096 up with an appropriate set of condition codes. */
11098 intcmp_mode = CCNOmode;
11099 switch (code)
11101 case GT:
11102 case UNGT:
11103 if (code == GT || !TARGET_IEEE_FP)
11105 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11106 code = EQ;
11108 else
11110 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11111 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11112 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
11113 intcmp_mode = CCmode;
11114 code = GEU;
11116 break;
11117 case LT:
11118 case UNLT:
11119 if (code == LT && TARGET_IEEE_FP)
11121 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11122 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
11123 intcmp_mode = CCmode;
11124 code = EQ;
11126 else
11128 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
11129 code = NE;
11131 break;
11132 case GE:
11133 case UNGE:
11134 if (code == GE || !TARGET_IEEE_FP)
11136 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
11137 code = EQ;
11139 else
11141 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11142 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11143 GEN_INT (0x01)));
11144 code = NE;
11146 break;
11147 case LE:
11148 case UNLE:
11149 if (code == LE && TARGET_IEEE_FP)
11151 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11152 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11153 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11154 intcmp_mode = CCmode;
11155 code = LTU;
11157 else
11159 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11160 code = NE;
11162 break;
11163 case EQ:
11164 case UNEQ:
11165 if (code == EQ && TARGET_IEEE_FP)
11167 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11168 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11169 intcmp_mode = CCmode;
11170 code = EQ;
11172 else
11174 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11175 code = NE;
11176 break;
11178 break;
11179 case NE:
11180 case LTGT:
11181 if (code == NE && TARGET_IEEE_FP)
11183 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11184 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11185 GEN_INT (0x40)));
11186 code = NE;
11188 else
11190 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11191 code = EQ;
11193 break;
11195 case UNORDERED:
11196 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11197 code = NE;
11198 break;
11199 case ORDERED:
11200 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11201 code = EQ;
11202 break;
11204 default:
11205 gcc_unreachable ();
11209 /* Return the test that should be put into the flags user, i.e.
11210 the bcc, scc, or cmov instruction. */
11211 return gen_rtx_fmt_ee (code, VOIDmode,
11212 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11213 const0_rtx);
11217 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
11219 rtx op0, op1, ret;
11220 op0 = ix86_compare_op0;
11221 op1 = ix86_compare_op1;
11223 if (second_test)
11224 *second_test = NULL_RTX;
11225 if (bypass_test)
11226 *bypass_test = NULL_RTX;
11228 if (ix86_compare_emitted)
11230 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
11231 ix86_compare_emitted = NULL_RTX;
11233 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
11234 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11235 second_test, bypass_test);
11236 else
11237 ret = ix86_expand_int_compare (code, op0, op1);
11239 return ret;
11242 /* Return true if the CODE will result in nontrivial jump sequence. */
11243 bool
11244 ix86_fp_jump_nontrivial_p (enum rtx_code code)
11246 enum rtx_code bypass_code, first_code, second_code;
11247 if (!TARGET_CMOVE)
11248 return true;
11249 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11250 return bypass_code != UNKNOWN || second_code != UNKNOWN;
11253 void
11254 ix86_expand_branch (enum rtx_code code, rtx label)
11256 rtx tmp;
11258 /* If we have emitted a compare insn, go straight to simple.
11259 ix86_expand_compare won't emit anything if ix86_compare_emitted
11260 is non NULL. */
11261 if (ix86_compare_emitted)
11262 goto simple;
11264 switch (GET_MODE (ix86_compare_op0))
11266 case QImode:
11267 case HImode:
11268 case SImode:
11269 simple:
11270 tmp = ix86_expand_compare (code, NULL, NULL);
11271 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11272 gen_rtx_LABEL_REF (VOIDmode, label),
11273 pc_rtx);
11274 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11275 return;
11277 case SFmode:
11278 case DFmode:
11279 case XFmode:
11281 rtvec vec;
11282 int use_fcomi;
11283 enum rtx_code bypass_code, first_code, second_code;
11285 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
11286 &ix86_compare_op1);
11288 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11290 /* Check whether we will use the natural sequence with one jump. If
11291 so, we can expand jump early. Otherwise delay expansion by
11292 creating compound insn to not confuse optimizers. */
11293 if (bypass_code == UNKNOWN && second_code == UNKNOWN
11294 && TARGET_CMOVE)
11296 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
11297 gen_rtx_LABEL_REF (VOIDmode, label),
11298 pc_rtx, NULL_RTX, NULL_RTX);
11300 else
11302 tmp = gen_rtx_fmt_ee (code, VOIDmode,
11303 ix86_compare_op0, ix86_compare_op1);
11304 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11305 gen_rtx_LABEL_REF (VOIDmode, label),
11306 pc_rtx);
11307 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
11309 use_fcomi = ix86_use_fcomi_compare (code);
11310 vec = rtvec_alloc (3 + !use_fcomi);
11311 RTVEC_ELT (vec, 0) = tmp;
11312 RTVEC_ELT (vec, 1)
11313 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
11314 RTVEC_ELT (vec, 2)
11315 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
11316 if (! use_fcomi)
11317 RTVEC_ELT (vec, 3)
11318 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
11320 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
11322 return;
11325 case DImode:
11326 if (TARGET_64BIT)
11327 goto simple;
11328 case TImode:
11329 /* Expand DImode branch into multiple compare+branch. */
11331 rtx lo[2], hi[2], label2;
11332 enum rtx_code code1, code2, code3;
11333 enum machine_mode submode;
11335 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
11337 tmp = ix86_compare_op0;
11338 ix86_compare_op0 = ix86_compare_op1;
11339 ix86_compare_op1 = tmp;
11340 code = swap_condition (code);
11342 if (GET_MODE (ix86_compare_op0) == DImode)
11344 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
11345 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
11346 submode = SImode;
11348 else
11350 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
11351 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
11352 submode = DImode;
11355 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
11356 avoid two branches. This costs one extra insn, so disable when
11357 optimizing for size. */
11359 if ((code == EQ || code == NE)
11360 && (!optimize_size
11361 || hi[1] == const0_rtx || lo[1] == const0_rtx))
11363 rtx xor0, xor1;
11365 xor1 = hi[0];
11366 if (hi[1] != const0_rtx)
11367 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
11368 NULL_RTX, 0, OPTAB_WIDEN);
11370 xor0 = lo[0];
11371 if (lo[1] != const0_rtx)
11372 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
11373 NULL_RTX, 0, OPTAB_WIDEN);
11375 tmp = expand_binop (submode, ior_optab, xor1, xor0,
11376 NULL_RTX, 0, OPTAB_WIDEN);
11378 ix86_compare_op0 = tmp;
11379 ix86_compare_op1 = const0_rtx;
11380 ix86_expand_branch (code, label);
11381 return;
11384 /* Otherwise, if we are doing less-than or greater-or-equal-than,
11385 op1 is a constant and the low word is zero, then we can just
11386 examine the high word. */
11388 if (CONST_INT_P (hi[1]) && lo[1] == const0_rtx)
11389 switch (code)
11391 case LT: case LTU: case GE: case GEU:
11392 ix86_compare_op0 = hi[0];
11393 ix86_compare_op1 = hi[1];
11394 ix86_expand_branch (code, label);
11395 return;
11396 default:
11397 break;
11400 /* Otherwise, we need two or three jumps. */
11402 label2 = gen_label_rtx ();
11404 code1 = code;
11405 code2 = swap_condition (code);
11406 code3 = unsigned_condition (code);
11408 switch (code)
11410 case LT: case GT: case LTU: case GTU:
11411 break;
11413 case LE: code1 = LT; code2 = GT; break;
11414 case GE: code1 = GT; code2 = LT; break;
11415 case LEU: code1 = LTU; code2 = GTU; break;
11416 case GEU: code1 = GTU; code2 = LTU; break;
11418 case EQ: code1 = UNKNOWN; code2 = NE; break;
11419 case NE: code2 = UNKNOWN; break;
11421 default:
11422 gcc_unreachable ();
11426 * a < b =>
11427 * if (hi(a) < hi(b)) goto true;
11428 * if (hi(a) > hi(b)) goto false;
11429 * if (lo(a) < lo(b)) goto true;
11430 * false:
11433 ix86_compare_op0 = hi[0];
11434 ix86_compare_op1 = hi[1];
11436 if (code1 != UNKNOWN)
11437 ix86_expand_branch (code1, label);
11438 if (code2 != UNKNOWN)
11439 ix86_expand_branch (code2, label2);
11441 ix86_compare_op0 = lo[0];
11442 ix86_compare_op1 = lo[1];
11443 ix86_expand_branch (code3, label);
11445 if (code2 != UNKNOWN)
11446 emit_label (label2);
11447 return;
11450 default:
11451 gcc_unreachable ();
11455 /* Split branch based on floating point condition. */
11456 void
11457 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
11458 rtx target1, rtx target2, rtx tmp, rtx pushed)
11460 rtx second, bypass;
11461 rtx label = NULL_RTX;
11462 rtx condition;
11463 int bypass_probability = -1, second_probability = -1, probability = -1;
11464 rtx i;
11466 if (target2 != pc_rtx)
11468 rtx tmp = target2;
11469 code = reverse_condition_maybe_unordered (code);
11470 target2 = target1;
11471 target1 = tmp;
11474 condition = ix86_expand_fp_compare (code, op1, op2,
11475 tmp, &second, &bypass);
11477 /* Remove pushed operand from stack. */
11478 if (pushed)
11479 ix86_free_from_memory (GET_MODE (pushed));
11481 if (split_branch_probability >= 0)
11483 /* Distribute the probabilities across the jumps.
11484 Assume the BYPASS and SECOND to be always test
11485 for UNORDERED. */
11486 probability = split_branch_probability;
11488 /* Value of 1 is low enough to make no need for probability
11489 to be updated. Later we may run some experiments and see
11490 if unordered values are more frequent in practice. */
11491 if (bypass)
11492 bypass_probability = 1;
11493 if (second)
11494 second_probability = 1;
11496 if (bypass != NULL_RTX)
11498 label = gen_label_rtx ();
11499 i = emit_jump_insn (gen_rtx_SET
11500 (VOIDmode, pc_rtx,
11501 gen_rtx_IF_THEN_ELSE (VOIDmode,
11502 bypass,
11503 gen_rtx_LABEL_REF (VOIDmode,
11504 label),
11505 pc_rtx)));
11506 if (bypass_probability >= 0)
11507 REG_NOTES (i)
11508 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11509 GEN_INT (bypass_probability),
11510 REG_NOTES (i));
11512 i = emit_jump_insn (gen_rtx_SET
11513 (VOIDmode, pc_rtx,
11514 gen_rtx_IF_THEN_ELSE (VOIDmode,
11515 condition, target1, target2)));
11516 if (probability >= 0)
11517 REG_NOTES (i)
11518 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11519 GEN_INT (probability),
11520 REG_NOTES (i));
11521 if (second != NULL_RTX)
11523 i = emit_jump_insn (gen_rtx_SET
11524 (VOIDmode, pc_rtx,
11525 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
11526 target2)));
11527 if (second_probability >= 0)
11528 REG_NOTES (i)
11529 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11530 GEN_INT (second_probability),
11531 REG_NOTES (i));
11533 if (label != NULL_RTX)
11534 emit_label (label);
11538 ix86_expand_setcc (enum rtx_code code, rtx dest)
11540 rtx ret, tmp, tmpreg, equiv;
11541 rtx second_test, bypass_test;
11543 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
11544 return 0; /* FAIL */
11546 gcc_assert (GET_MODE (dest) == QImode);
11548 ret = ix86_expand_compare (code, &second_test, &bypass_test);
11549 PUT_MODE (ret, QImode);
11551 tmp = dest;
11552 tmpreg = dest;
11554 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
11555 if (bypass_test || second_test)
11557 rtx test = second_test;
11558 int bypass = 0;
11559 rtx tmp2 = gen_reg_rtx (QImode);
11560 if (bypass_test)
11562 gcc_assert (!second_test);
11563 test = bypass_test;
11564 bypass = 1;
11565 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
11567 PUT_MODE (test, QImode);
11568 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
11570 if (bypass)
11571 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
11572 else
11573 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
11576 /* Attach a REG_EQUAL note describing the comparison result. */
11577 if (ix86_compare_op0 && ix86_compare_op1)
11579 equiv = simplify_gen_relational (code, QImode,
11580 GET_MODE (ix86_compare_op0),
11581 ix86_compare_op0, ix86_compare_op1);
11582 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
11585 return 1; /* DONE */
11588 /* Expand comparison setting or clearing carry flag. Return true when
11589 successful and set pop for the operation. */
11590 static bool
11591 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
11593 enum machine_mode mode =
11594 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
11596 /* Do not handle DImode compares that go through special path. Also we can't
11597 deal with FP compares yet. This is possible to add. */
11598 if (mode == (TARGET_64BIT ? TImode : DImode))
11599 return false;
11600 if (FLOAT_MODE_P (mode))
11602 rtx second_test = NULL, bypass_test = NULL;
11603 rtx compare_op, compare_seq;
11605 /* Shortcut: following common codes never translate into carry flag compares. */
11606 if (code == EQ || code == NE || code == UNEQ || code == LTGT
11607 || code == ORDERED || code == UNORDERED)
11608 return false;
11610 /* These comparisons require zero flag; swap operands so they won't. */
11611 if ((code == GT || code == UNLE || code == LE || code == UNGT)
11612 && !TARGET_IEEE_FP)
11614 rtx tmp = op0;
11615 op0 = op1;
11616 op1 = tmp;
11617 code = swap_condition (code);
11620 /* Try to expand the comparison and verify that we end up with carry flag
11621 based comparison. This is fails to be true only when we decide to expand
11622 comparison using arithmetic that is not too common scenario. */
11623 start_sequence ();
11624 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11625 &second_test, &bypass_test);
11626 compare_seq = get_insns ();
11627 end_sequence ();
11629 if (second_test || bypass_test)
11630 return false;
11631 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11632 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11633 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
11634 else
11635 code = GET_CODE (compare_op);
11636 if (code != LTU && code != GEU)
11637 return false;
11638 emit_insn (compare_seq);
11639 *pop = compare_op;
11640 return true;
11642 if (!INTEGRAL_MODE_P (mode))
11643 return false;
11644 switch (code)
11646 case LTU:
11647 case GEU:
11648 break;
11650 /* Convert a==0 into (unsigned)a<1. */
11651 case EQ:
11652 case NE:
11653 if (op1 != const0_rtx)
11654 return false;
11655 op1 = const1_rtx;
11656 code = (code == EQ ? LTU : GEU);
11657 break;
11659 /* Convert a>b into b<a or a>=b-1. */
11660 case GTU:
11661 case LEU:
11662 if (CONST_INT_P (op1))
11664 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
11665 /* Bail out on overflow. We still can swap operands but that
11666 would force loading of the constant into register. */
11667 if (op1 == const0_rtx
11668 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
11669 return false;
11670 code = (code == GTU ? GEU : LTU);
11672 else
11674 rtx tmp = op1;
11675 op1 = op0;
11676 op0 = tmp;
11677 code = (code == GTU ? LTU : GEU);
11679 break;
11681 /* Convert a>=0 into (unsigned)a<0x80000000. */
11682 case LT:
11683 case GE:
11684 if (mode == DImode || op1 != const0_rtx)
11685 return false;
11686 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11687 code = (code == LT ? GEU : LTU);
11688 break;
11689 case LE:
11690 case GT:
11691 if (mode == DImode || op1 != constm1_rtx)
11692 return false;
11693 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11694 code = (code == LE ? GEU : LTU);
11695 break;
11697 default:
11698 return false;
11700 /* Swapping operands may cause constant to appear as first operand. */
11701 if (!nonimmediate_operand (op0, VOIDmode))
11703 if (no_new_pseudos)
11704 return false;
11705 op0 = force_reg (mode, op0);
11707 ix86_compare_op0 = op0;
11708 ix86_compare_op1 = op1;
11709 *pop = ix86_expand_compare (code, NULL, NULL);
11710 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
11711 return true;
11715 ix86_expand_int_movcc (rtx operands[])
11717 enum rtx_code code = GET_CODE (operands[1]), compare_code;
11718 rtx compare_seq, compare_op;
11719 rtx second_test, bypass_test;
11720 enum machine_mode mode = GET_MODE (operands[0]);
11721 bool sign_bit_compare_p = false;;
11723 start_sequence ();
11724 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11725 compare_seq = get_insns ();
11726 end_sequence ();
11728 compare_code = GET_CODE (compare_op);
11730 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
11731 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
11732 sign_bit_compare_p = true;
11734 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
11735 HImode insns, we'd be swallowed in word prefix ops. */
11737 if ((mode != HImode || TARGET_FAST_PREFIX)
11738 && (mode != (TARGET_64BIT ? TImode : DImode))
11739 && CONST_INT_P (operands[2])
11740 && CONST_INT_P (operands[3]))
11742 rtx out = operands[0];
11743 HOST_WIDE_INT ct = INTVAL (operands[2]);
11744 HOST_WIDE_INT cf = INTVAL (operands[3]);
11745 HOST_WIDE_INT diff;
11747 diff = ct - cf;
11748 /* Sign bit compares are better done using shifts than we do by using
11749 sbb. */
11750 if (sign_bit_compare_p
11751 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11752 ix86_compare_op1, &compare_op))
11754 /* Detect overlap between destination and compare sources. */
11755 rtx tmp = out;
11757 if (!sign_bit_compare_p)
11759 bool fpcmp = false;
11761 compare_code = GET_CODE (compare_op);
11763 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11764 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11766 fpcmp = true;
11767 compare_code = ix86_fp_compare_code_to_integer (compare_code);
11770 /* To simplify rest of code, restrict to the GEU case. */
11771 if (compare_code == LTU)
11773 HOST_WIDE_INT tmp = ct;
11774 ct = cf;
11775 cf = tmp;
11776 compare_code = reverse_condition (compare_code);
11777 code = reverse_condition (code);
11779 else
11781 if (fpcmp)
11782 PUT_CODE (compare_op,
11783 reverse_condition_maybe_unordered
11784 (GET_CODE (compare_op)));
11785 else
11786 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11788 diff = ct - cf;
11790 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
11791 || reg_overlap_mentioned_p (out, ix86_compare_op1))
11792 tmp = gen_reg_rtx (mode);
11794 if (mode == DImode)
11795 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
11796 else
11797 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
11799 else
11801 if (code == GT || code == GE)
11802 code = reverse_condition (code);
11803 else
11805 HOST_WIDE_INT tmp = ct;
11806 ct = cf;
11807 cf = tmp;
11808 diff = ct - cf;
11810 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
11811 ix86_compare_op1, VOIDmode, 0, -1);
11814 if (diff == 1)
11817 * cmpl op0,op1
11818 * sbbl dest,dest
11819 * [addl dest, ct]
11821 * Size 5 - 8.
11823 if (ct)
11824 tmp = expand_simple_binop (mode, PLUS,
11825 tmp, GEN_INT (ct),
11826 copy_rtx (tmp), 1, OPTAB_DIRECT);
11828 else if (cf == -1)
11831 * cmpl op0,op1
11832 * sbbl dest,dest
11833 * orl $ct, dest
11835 * Size 8.
11837 tmp = expand_simple_binop (mode, IOR,
11838 tmp, GEN_INT (ct),
11839 copy_rtx (tmp), 1, OPTAB_DIRECT);
11841 else if (diff == -1 && ct)
11844 * cmpl op0,op1
11845 * sbbl dest,dest
11846 * notl dest
11847 * [addl dest, cf]
11849 * Size 8 - 11.
11851 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11852 if (cf)
11853 tmp = expand_simple_binop (mode, PLUS,
11854 copy_rtx (tmp), GEN_INT (cf),
11855 copy_rtx (tmp), 1, OPTAB_DIRECT);
11857 else
11860 * cmpl op0,op1
11861 * sbbl dest,dest
11862 * [notl dest]
11863 * andl cf - ct, dest
11864 * [addl dest, ct]
11866 * Size 8 - 11.
11869 if (cf == 0)
11871 cf = ct;
11872 ct = 0;
11873 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11876 tmp = expand_simple_binop (mode, AND,
11877 copy_rtx (tmp),
11878 gen_int_mode (cf - ct, mode),
11879 copy_rtx (tmp), 1, OPTAB_DIRECT);
11880 if (ct)
11881 tmp = expand_simple_binop (mode, PLUS,
11882 copy_rtx (tmp), GEN_INT (ct),
11883 copy_rtx (tmp), 1, OPTAB_DIRECT);
11886 if (!rtx_equal_p (tmp, out))
11887 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
11889 return 1; /* DONE */
11892 if (diff < 0)
11894 HOST_WIDE_INT tmp;
11895 tmp = ct, ct = cf, cf = tmp;
11896 diff = -diff;
11897 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11899 /* We may be reversing unordered compare to normal compare, that
11900 is not valid in general (we may convert non-trapping condition
11901 to trapping one), however on i386 we currently emit all
11902 comparisons unordered. */
11903 compare_code = reverse_condition_maybe_unordered (compare_code);
11904 code = reverse_condition_maybe_unordered (code);
11906 else
11908 compare_code = reverse_condition (compare_code);
11909 code = reverse_condition (code);
11913 compare_code = UNKNOWN;
11914 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
11915 && CONST_INT_P (ix86_compare_op1))
11917 if (ix86_compare_op1 == const0_rtx
11918 && (code == LT || code == GE))
11919 compare_code = code;
11920 else if (ix86_compare_op1 == constm1_rtx)
11922 if (code == LE)
11923 compare_code = LT;
11924 else if (code == GT)
11925 compare_code = GE;
11929 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11930 if (compare_code != UNKNOWN
11931 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
11932 && (cf == -1 || ct == -1))
11934 /* If lea code below could be used, only optimize
11935 if it results in a 2 insn sequence. */
11937 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
11938 || diff == 3 || diff == 5 || diff == 9)
11939 || (compare_code == LT && ct == -1)
11940 || (compare_code == GE && cf == -1))
11943 * notl op1 (if necessary)
11944 * sarl $31, op1
11945 * orl cf, op1
11947 if (ct != -1)
11949 cf = ct;
11950 ct = -1;
11951 code = reverse_condition (code);
11954 out = emit_store_flag (out, code, ix86_compare_op0,
11955 ix86_compare_op1, VOIDmode, 0, -1);
11957 out = expand_simple_binop (mode, IOR,
11958 out, GEN_INT (cf),
11959 out, 1, OPTAB_DIRECT);
11960 if (out != operands[0])
11961 emit_move_insn (operands[0], out);
11963 return 1; /* DONE */
11968 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
11969 || diff == 3 || diff == 5 || diff == 9)
11970 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
11971 && (mode != DImode
11972 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
11975 * xorl dest,dest
11976 * cmpl op1,op2
11977 * setcc dest
11978 * lea cf(dest*(ct-cf)),dest
11980 * Size 14.
11982 * This also catches the degenerate setcc-only case.
11985 rtx tmp;
11986 int nops;
11988 out = emit_store_flag (out, code, ix86_compare_op0,
11989 ix86_compare_op1, VOIDmode, 0, 1);
11991 nops = 0;
11992 /* On x86_64 the lea instruction operates on Pmode, so we need
11993 to get arithmetics done in proper mode to match. */
11994 if (diff == 1)
11995 tmp = copy_rtx (out);
11996 else
11998 rtx out1;
11999 out1 = copy_rtx (out);
12000 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
12001 nops++;
12002 if (diff & 1)
12004 tmp = gen_rtx_PLUS (mode, tmp, out1);
12005 nops++;
12008 if (cf != 0)
12010 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
12011 nops++;
12013 if (!rtx_equal_p (tmp, out))
12015 if (nops == 1)
12016 out = force_operand (tmp, copy_rtx (out));
12017 else
12018 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
12020 if (!rtx_equal_p (out, operands[0]))
12021 emit_move_insn (operands[0], copy_rtx (out));
12023 return 1; /* DONE */
12027 * General case: Jumpful:
12028 * xorl dest,dest cmpl op1, op2
12029 * cmpl op1, op2 movl ct, dest
12030 * setcc dest jcc 1f
12031 * decl dest movl cf, dest
12032 * andl (cf-ct),dest 1:
12033 * addl ct,dest
12035 * Size 20. Size 14.
12037 * This is reasonably steep, but branch mispredict costs are
12038 * high on modern cpus, so consider failing only if optimizing
12039 * for space.
12042 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12043 && BRANCH_COST >= 2)
12045 if (cf == 0)
12047 cf = ct;
12048 ct = 0;
12049 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
12050 /* We may be reversing unordered compare to normal compare,
12051 that is not valid in general (we may convert non-trapping
12052 condition to trapping one), however on i386 we currently
12053 emit all comparisons unordered. */
12054 code = reverse_condition_maybe_unordered (code);
12055 else
12057 code = reverse_condition (code);
12058 if (compare_code != UNKNOWN)
12059 compare_code = reverse_condition (compare_code);
12063 if (compare_code != UNKNOWN)
12065 /* notl op1 (if needed)
12066 sarl $31, op1
12067 andl (cf-ct), op1
12068 addl ct, op1
12070 For x < 0 (resp. x <= -1) there will be no notl,
12071 so if possible swap the constants to get rid of the
12072 complement.
12073 True/false will be -1/0 while code below (store flag
12074 followed by decrement) is 0/-1, so the constants need
12075 to be exchanged once more. */
12077 if (compare_code == GE || !cf)
12079 code = reverse_condition (code);
12080 compare_code = LT;
12082 else
12084 HOST_WIDE_INT tmp = cf;
12085 cf = ct;
12086 ct = tmp;
12089 out = emit_store_flag (out, code, ix86_compare_op0,
12090 ix86_compare_op1, VOIDmode, 0, -1);
12092 else
12094 out = emit_store_flag (out, code, ix86_compare_op0,
12095 ix86_compare_op1, VOIDmode, 0, 1);
12097 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
12098 copy_rtx (out), 1, OPTAB_DIRECT);
12101 out = expand_simple_binop (mode, AND, copy_rtx (out),
12102 gen_int_mode (cf - ct, mode),
12103 copy_rtx (out), 1, OPTAB_DIRECT);
12104 if (ct)
12105 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
12106 copy_rtx (out), 1, OPTAB_DIRECT);
12107 if (!rtx_equal_p (out, operands[0]))
12108 emit_move_insn (operands[0], copy_rtx (out));
12110 return 1; /* DONE */
12114 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12116 /* Try a few things more with specific constants and a variable. */
12118 optab op;
12119 rtx var, orig_out, out, tmp;
12121 if (BRANCH_COST <= 2)
12122 return 0; /* FAIL */
12124 /* If one of the two operands is an interesting constant, load a
12125 constant with the above and mask it in with a logical operation. */
12127 if (CONST_INT_P (operands[2]))
12129 var = operands[3];
12130 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
12131 operands[3] = constm1_rtx, op = and_optab;
12132 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
12133 operands[3] = const0_rtx, op = ior_optab;
12134 else
12135 return 0; /* FAIL */
12137 else if (CONST_INT_P (operands[3]))
12139 var = operands[2];
12140 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
12141 operands[2] = constm1_rtx, op = and_optab;
12142 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
12143 operands[2] = const0_rtx, op = ior_optab;
12144 else
12145 return 0; /* FAIL */
12147 else
12148 return 0; /* FAIL */
12150 orig_out = operands[0];
12151 tmp = gen_reg_rtx (mode);
12152 operands[0] = tmp;
12154 /* Recurse to get the constant loaded. */
12155 if (ix86_expand_int_movcc (operands) == 0)
12156 return 0; /* FAIL */
12158 /* Mask in the interesting variable. */
12159 out = expand_binop (mode, op, var, tmp, orig_out, 0,
12160 OPTAB_WIDEN);
12161 if (!rtx_equal_p (out, orig_out))
12162 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
12164 return 1; /* DONE */
12168 * For comparison with above,
12170 * movl cf,dest
12171 * movl ct,tmp
12172 * cmpl op1,op2
12173 * cmovcc tmp,dest
12175 * Size 15.
12178 if (! nonimmediate_operand (operands[2], mode))
12179 operands[2] = force_reg (mode, operands[2]);
12180 if (! nonimmediate_operand (operands[3], mode))
12181 operands[3] = force_reg (mode, operands[3]);
12183 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12185 rtx tmp = gen_reg_rtx (mode);
12186 emit_move_insn (tmp, operands[3]);
12187 operands[3] = tmp;
12189 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12191 rtx tmp = gen_reg_rtx (mode);
12192 emit_move_insn (tmp, operands[2]);
12193 operands[2] = tmp;
12196 if (! register_operand (operands[2], VOIDmode)
12197 && (mode == QImode
12198 || ! register_operand (operands[3], VOIDmode)))
12199 operands[2] = force_reg (mode, operands[2]);
12201 if (mode == QImode
12202 && ! register_operand (operands[3], VOIDmode))
12203 operands[3] = force_reg (mode, operands[3]);
12205 emit_insn (compare_seq);
12206 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12207 gen_rtx_IF_THEN_ELSE (mode,
12208 compare_op, operands[2],
12209 operands[3])));
12210 if (bypass_test)
12211 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12212 gen_rtx_IF_THEN_ELSE (mode,
12213 bypass_test,
12214 copy_rtx (operands[3]),
12215 copy_rtx (operands[0]))));
12216 if (second_test)
12217 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12218 gen_rtx_IF_THEN_ELSE (mode,
12219 second_test,
12220 copy_rtx (operands[2]),
12221 copy_rtx (operands[0]))));
12223 return 1; /* DONE */
12226 /* Swap, force into registers, or otherwise massage the two operands
12227 to an sse comparison with a mask result. Thus we differ a bit from
12228 ix86_prepare_fp_compare_args which expects to produce a flags result.
12230 The DEST operand exists to help determine whether to commute commutative
12231 operators. The POP0/POP1 operands are updated in place. The new
12232 comparison code is returned, or UNKNOWN if not implementable. */
12234 static enum rtx_code
12235 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
12236 rtx *pop0, rtx *pop1)
12238 rtx tmp;
12240 switch (code)
12242 case LTGT:
12243 case UNEQ:
12244 /* We have no LTGT as an operator. We could implement it with
12245 NE & ORDERED, but this requires an extra temporary. It's
12246 not clear that it's worth it. */
12247 return UNKNOWN;
12249 case LT:
12250 case LE:
12251 case UNGT:
12252 case UNGE:
12253 /* These are supported directly. */
12254 break;
12256 case EQ:
12257 case NE:
12258 case UNORDERED:
12259 case ORDERED:
12260 /* For commutative operators, try to canonicalize the destination
12261 operand to be first in the comparison - this helps reload to
12262 avoid extra moves. */
12263 if (!dest || !rtx_equal_p (dest, *pop1))
12264 break;
12265 /* FALLTHRU */
12267 case GE:
12268 case GT:
12269 case UNLE:
12270 case UNLT:
12271 /* These are not supported directly. Swap the comparison operands
12272 to transform into something that is supported. */
12273 tmp = *pop0;
12274 *pop0 = *pop1;
12275 *pop1 = tmp;
12276 code = swap_condition (code);
12277 break;
12279 default:
12280 gcc_unreachable ();
12283 return code;
12286 /* Detect conditional moves that exactly match min/max operational
12287 semantics. Note that this is IEEE safe, as long as we don't
12288 interchange the operands.
12290 Returns FALSE if this conditional move doesn't match a MIN/MAX,
12291 and TRUE if the operation is successful and instructions are emitted. */
12293 static bool
12294 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
12295 rtx cmp_op1, rtx if_true, rtx if_false)
12297 enum machine_mode mode;
12298 bool is_min;
12299 rtx tmp;
12301 if (code == LT)
12303 else if (code == UNGE)
12305 tmp = if_true;
12306 if_true = if_false;
12307 if_false = tmp;
12309 else
12310 return false;
12312 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
12313 is_min = true;
12314 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
12315 is_min = false;
12316 else
12317 return false;
12319 mode = GET_MODE (dest);
12321 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
12322 but MODE may be a vector mode and thus not appropriate. */
12323 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
12325 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
12326 rtvec v;
12328 if_true = force_reg (mode, if_true);
12329 v = gen_rtvec (2, if_true, if_false);
12330 tmp = gen_rtx_UNSPEC (mode, v, u);
12332 else
12334 code = is_min ? SMIN : SMAX;
12335 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
12338 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
12339 return true;
12342 /* Expand an sse vector comparison. Return the register with the result. */
12344 static rtx
12345 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
12346 rtx op_true, rtx op_false)
12348 enum machine_mode mode = GET_MODE (dest);
12349 rtx x;
12351 cmp_op0 = force_reg (mode, cmp_op0);
12352 if (!nonimmediate_operand (cmp_op1, mode))
12353 cmp_op1 = force_reg (mode, cmp_op1);
12355 if (optimize
12356 || reg_overlap_mentioned_p (dest, op_true)
12357 || reg_overlap_mentioned_p (dest, op_false))
12358 dest = gen_reg_rtx (mode);
12360 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
12361 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12363 return dest;
12366 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
12367 operations. This is used for both scalar and vector conditional moves. */
12369 static void
12370 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
12372 enum machine_mode mode = GET_MODE (dest);
12373 rtx t2, t3, x;
12375 if (op_false == CONST0_RTX (mode))
12377 op_true = force_reg (mode, op_true);
12378 x = gen_rtx_AND (mode, cmp, op_true);
12379 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12381 else if (op_true == CONST0_RTX (mode))
12383 op_false = force_reg (mode, op_false);
12384 x = gen_rtx_NOT (mode, cmp);
12385 x = gen_rtx_AND (mode, x, op_false);
12386 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12388 else
12390 op_true = force_reg (mode, op_true);
12391 op_false = force_reg (mode, op_false);
12393 t2 = gen_reg_rtx (mode);
12394 if (optimize)
12395 t3 = gen_reg_rtx (mode);
12396 else
12397 t3 = dest;
12399 x = gen_rtx_AND (mode, op_true, cmp);
12400 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
12402 x = gen_rtx_NOT (mode, cmp);
12403 x = gen_rtx_AND (mode, x, op_false);
12404 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
12406 x = gen_rtx_IOR (mode, t3, t2);
12407 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12411 /* Expand a floating-point conditional move. Return true if successful. */
12414 ix86_expand_fp_movcc (rtx operands[])
12416 enum machine_mode mode = GET_MODE (operands[0]);
12417 enum rtx_code code = GET_CODE (operands[1]);
12418 rtx tmp, compare_op, second_test, bypass_test;
12420 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
12422 enum machine_mode cmode;
12424 /* Since we've no cmove for sse registers, don't force bad register
12425 allocation just to gain access to it. Deny movcc when the
12426 comparison mode doesn't match the move mode. */
12427 cmode = GET_MODE (ix86_compare_op0);
12428 if (cmode == VOIDmode)
12429 cmode = GET_MODE (ix86_compare_op1);
12430 if (cmode != mode)
12431 return 0;
12433 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12434 &ix86_compare_op0,
12435 &ix86_compare_op1);
12436 if (code == UNKNOWN)
12437 return 0;
12439 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
12440 ix86_compare_op1, operands[2],
12441 operands[3]))
12442 return 1;
12444 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
12445 ix86_compare_op1, operands[2], operands[3]);
12446 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
12447 return 1;
12450 /* The floating point conditional move instructions don't directly
12451 support conditions resulting from a signed integer comparison. */
12453 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12455 /* The floating point conditional move instructions don't directly
12456 support signed integer comparisons. */
12458 if (!fcmov_comparison_operator (compare_op, VOIDmode))
12460 gcc_assert (!second_test && !bypass_test);
12461 tmp = gen_reg_rtx (QImode);
12462 ix86_expand_setcc (code, tmp);
12463 code = NE;
12464 ix86_compare_op0 = tmp;
12465 ix86_compare_op1 = const0_rtx;
12466 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12468 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12470 tmp = gen_reg_rtx (mode);
12471 emit_move_insn (tmp, operands[3]);
12472 operands[3] = tmp;
12474 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12476 tmp = gen_reg_rtx (mode);
12477 emit_move_insn (tmp, operands[2]);
12478 operands[2] = tmp;
12481 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12482 gen_rtx_IF_THEN_ELSE (mode, compare_op,
12483 operands[2], operands[3])));
12484 if (bypass_test)
12485 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12486 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
12487 operands[3], operands[0])));
12488 if (second_test)
12489 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12490 gen_rtx_IF_THEN_ELSE (mode, second_test,
12491 operands[2], operands[0])));
12493 return 1;
12496 /* Expand a floating-point vector conditional move; a vcond operation
12497 rather than a movcc operation. */
12499 bool
12500 ix86_expand_fp_vcond (rtx operands[])
12502 enum rtx_code code = GET_CODE (operands[3]);
12503 rtx cmp;
12505 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12506 &operands[4], &operands[5]);
12507 if (code == UNKNOWN)
12508 return false;
12510 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
12511 operands[5], operands[1], operands[2]))
12512 return true;
12514 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
12515 operands[1], operands[2]);
12516 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
12517 return true;
12520 /* Expand a signed integral vector conditional move. */
12522 bool
12523 ix86_expand_int_vcond (rtx operands[])
12525 enum machine_mode mode = GET_MODE (operands[0]);
12526 enum rtx_code code = GET_CODE (operands[3]);
12527 bool negate = false;
12528 rtx x, cop0, cop1;
12530 cop0 = operands[4];
12531 cop1 = operands[5];
12533 /* Canonicalize the comparison to EQ, GT, GTU. */
12534 switch (code)
12536 case EQ:
12537 case GT:
12538 case GTU:
12539 break;
12541 case NE:
12542 case LE:
12543 case LEU:
12544 code = reverse_condition (code);
12545 negate = true;
12546 break;
12548 case GE:
12549 case GEU:
12550 code = reverse_condition (code);
12551 negate = true;
12552 /* FALLTHRU */
12554 case LT:
12555 case LTU:
12556 code = swap_condition (code);
12557 x = cop0, cop0 = cop1, cop1 = x;
12558 break;
12560 default:
12561 gcc_unreachable ();
12564 /* Unsigned parallel compare is not supported by the hardware. Play some
12565 tricks to turn this into a signed comparison against 0. */
12566 if (code == GTU)
12568 cop0 = force_reg (mode, cop0);
12570 switch (mode)
12572 case V4SImode:
12574 rtx t1, t2, mask;
12576 /* Perform a parallel modulo subtraction. */
12577 t1 = gen_reg_rtx (mode);
12578 emit_insn (gen_subv4si3 (t1, cop0, cop1));
12580 /* Extract the original sign bit of op0. */
12581 mask = GEN_INT (-0x80000000);
12582 mask = gen_rtx_CONST_VECTOR (mode,
12583 gen_rtvec (4, mask, mask, mask, mask));
12584 mask = force_reg (mode, mask);
12585 t2 = gen_reg_rtx (mode);
12586 emit_insn (gen_andv4si3 (t2, cop0, mask));
12588 /* XOR it back into the result of the subtraction. This results
12589 in the sign bit set iff we saw unsigned underflow. */
12590 x = gen_reg_rtx (mode);
12591 emit_insn (gen_xorv4si3 (x, t1, t2));
12593 code = GT;
12595 break;
12597 case V16QImode:
12598 case V8HImode:
12599 /* Perform a parallel unsigned saturating subtraction. */
12600 x = gen_reg_rtx (mode);
12601 emit_insn (gen_rtx_SET (VOIDmode, x,
12602 gen_rtx_US_MINUS (mode, cop0, cop1)));
12604 code = EQ;
12605 negate = !negate;
12606 break;
12608 default:
12609 gcc_unreachable ();
12612 cop0 = x;
12613 cop1 = CONST0_RTX (mode);
12616 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
12617 operands[1+negate], operands[2-negate]);
12619 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
12620 operands[2-negate]);
12621 return true;
12624 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
12625 true if we should do zero extension, else sign extension. HIGH_P is
12626 true if we want the N/2 high elements, else the low elements. */
12628 void
12629 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
12631 enum machine_mode imode = GET_MODE (operands[1]);
12632 rtx (*unpack)(rtx, rtx, rtx);
12633 rtx se, dest;
12635 switch (imode)
12637 case V16QImode:
12638 if (high_p)
12639 unpack = gen_vec_interleave_highv16qi;
12640 else
12641 unpack = gen_vec_interleave_lowv16qi;
12642 break;
12643 case V8HImode:
12644 if (high_p)
12645 unpack = gen_vec_interleave_highv8hi;
12646 else
12647 unpack = gen_vec_interleave_lowv8hi;
12648 break;
12649 case V4SImode:
12650 if (high_p)
12651 unpack = gen_vec_interleave_highv4si;
12652 else
12653 unpack = gen_vec_interleave_lowv4si;
12654 break;
12655 default:
12656 gcc_unreachable ();
12659 dest = gen_lowpart (imode, operands[0]);
12661 if (unsigned_p)
12662 se = force_reg (imode, CONST0_RTX (imode));
12663 else
12664 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
12665 operands[1], pc_rtx, pc_rtx);
12667 emit_insn (unpack (dest, operands[1], se));
12670 /* Expand conditional increment or decrement using adb/sbb instructions.
12671 The default case using setcc followed by the conditional move can be
12672 done by generic code. */
12674 ix86_expand_int_addcc (rtx operands[])
12676 enum rtx_code code = GET_CODE (operands[1]);
12677 rtx compare_op;
12678 rtx val = const0_rtx;
12679 bool fpcmp = false;
12680 enum machine_mode mode = GET_MODE (operands[0]);
12682 if (operands[3] != const1_rtx
12683 && operands[3] != constm1_rtx)
12684 return 0;
12685 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12686 ix86_compare_op1, &compare_op))
12687 return 0;
12688 code = GET_CODE (compare_op);
12690 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12691 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12693 fpcmp = true;
12694 code = ix86_fp_compare_code_to_integer (code);
12697 if (code != LTU)
12699 val = constm1_rtx;
12700 if (fpcmp)
12701 PUT_CODE (compare_op,
12702 reverse_condition_maybe_unordered
12703 (GET_CODE (compare_op)));
12704 else
12705 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12707 PUT_MODE (compare_op, mode);
12709 /* Construct either adc or sbb insn. */
12710 if ((code == LTU) == (operands[3] == constm1_rtx))
12712 switch (GET_MODE (operands[0]))
12714 case QImode:
12715 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
12716 break;
12717 case HImode:
12718 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
12719 break;
12720 case SImode:
12721 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
12722 break;
12723 case DImode:
12724 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12725 break;
12726 default:
12727 gcc_unreachable ();
12730 else
12732 switch (GET_MODE (operands[0]))
12734 case QImode:
12735 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
12736 break;
12737 case HImode:
12738 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
12739 break;
12740 case SImode:
12741 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
12742 break;
12743 case DImode:
12744 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12745 break;
12746 default:
12747 gcc_unreachable ();
12750 return 1; /* DONE */
12754 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
12755 works for floating pointer parameters and nonoffsetable memories.
12756 For pushes, it returns just stack offsets; the values will be saved
12757 in the right order. Maximally three parts are generated. */
12759 static int
12760 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
12762 int size;
12764 if (!TARGET_64BIT)
12765 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
12766 else
12767 size = (GET_MODE_SIZE (mode) + 4) / 8;
12769 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
12770 gcc_assert (size >= 2 && size <= 3);
12772 /* Optimize constant pool reference to immediates. This is used by fp
12773 moves, that force all constants to memory to allow combining. */
12774 if (MEM_P (operand) && MEM_READONLY_P (operand))
12776 rtx tmp = maybe_get_pool_constant (operand);
12777 if (tmp)
12778 operand = tmp;
12781 if (MEM_P (operand) && !offsettable_memref_p (operand))
12783 /* The only non-offsetable memories we handle are pushes. */
12784 int ok = push_operand (operand, VOIDmode);
12786 gcc_assert (ok);
12788 operand = copy_rtx (operand);
12789 PUT_MODE (operand, Pmode);
12790 parts[0] = parts[1] = parts[2] = operand;
12791 return size;
12794 if (GET_CODE (operand) == CONST_VECTOR)
12796 enum machine_mode imode = int_mode_for_mode (mode);
12797 /* Caution: if we looked through a constant pool memory above,
12798 the operand may actually have a different mode now. That's
12799 ok, since we want to pun this all the way back to an integer. */
12800 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
12801 gcc_assert (operand != NULL);
12802 mode = imode;
12805 if (!TARGET_64BIT)
12807 if (mode == DImode)
12808 split_di (&operand, 1, &parts[0], &parts[1]);
12809 else
12811 if (REG_P (operand))
12813 gcc_assert (reload_completed);
12814 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
12815 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
12816 if (size == 3)
12817 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
12819 else if (offsettable_memref_p (operand))
12821 operand = adjust_address (operand, SImode, 0);
12822 parts[0] = operand;
12823 parts[1] = adjust_address (operand, SImode, 4);
12824 if (size == 3)
12825 parts[2] = adjust_address (operand, SImode, 8);
12827 else if (GET_CODE (operand) == CONST_DOUBLE)
12829 REAL_VALUE_TYPE r;
12830 long l[4];
12832 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12833 switch (mode)
12835 case XFmode:
12836 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
12837 parts[2] = gen_int_mode (l[2], SImode);
12838 break;
12839 case DFmode:
12840 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
12841 break;
12842 default:
12843 gcc_unreachable ();
12845 parts[1] = gen_int_mode (l[1], SImode);
12846 parts[0] = gen_int_mode (l[0], SImode);
12848 else
12849 gcc_unreachable ();
12852 else
12854 if (mode == TImode)
12855 split_ti (&operand, 1, &parts[0], &parts[1]);
12856 if (mode == XFmode || mode == TFmode)
12858 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
12859 if (REG_P (operand))
12861 gcc_assert (reload_completed);
12862 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
12863 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
12865 else if (offsettable_memref_p (operand))
12867 operand = adjust_address (operand, DImode, 0);
12868 parts[0] = operand;
12869 parts[1] = adjust_address (operand, upper_mode, 8);
12871 else if (GET_CODE (operand) == CONST_DOUBLE)
12873 REAL_VALUE_TYPE r;
12874 long l[4];
12876 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12877 real_to_target (l, &r, mode);
12879 /* Do not use shift by 32 to avoid warning on 32bit systems. */
12880 if (HOST_BITS_PER_WIDE_INT >= 64)
12881 parts[0]
12882 = gen_int_mode
12883 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
12884 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
12885 DImode);
12886 else
12887 parts[0] = immed_double_const (l[0], l[1], DImode);
12889 if (upper_mode == SImode)
12890 parts[1] = gen_int_mode (l[2], SImode);
12891 else if (HOST_BITS_PER_WIDE_INT >= 64)
12892 parts[1]
12893 = gen_int_mode
12894 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
12895 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
12896 DImode);
12897 else
12898 parts[1] = immed_double_const (l[2], l[3], DImode);
12900 else
12901 gcc_unreachable ();
12905 return size;
12908 /* Emit insns to perform a move or push of DI, DF, and XF values.
12909 Return false when normal moves are needed; true when all required
12910 insns have been emitted. Operands 2-4 contain the input values
12911 int the correct order; operands 5-7 contain the output values. */
12913 void
12914 ix86_split_long_move (rtx operands[])
12916 rtx part[2][3];
12917 int nparts;
12918 int push = 0;
12919 int collisions = 0;
12920 enum machine_mode mode = GET_MODE (operands[0]);
12922 /* The DFmode expanders may ask us to move double.
12923 For 64bit target this is single move. By hiding the fact
12924 here we simplify i386.md splitters. */
12925 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
12927 /* Optimize constant pool reference to immediates. This is used by
12928 fp moves, that force all constants to memory to allow combining. */
12930 if (MEM_P (operands[1])
12931 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
12932 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
12933 operands[1] = get_pool_constant (XEXP (operands[1], 0));
12934 if (push_operand (operands[0], VOIDmode))
12936 operands[0] = copy_rtx (operands[0]);
12937 PUT_MODE (operands[0], Pmode);
12939 else
12940 operands[0] = gen_lowpart (DImode, operands[0]);
12941 operands[1] = gen_lowpart (DImode, operands[1]);
12942 emit_move_insn (operands[0], operands[1]);
12943 return;
12946 /* The only non-offsettable memory we handle is push. */
12947 if (push_operand (operands[0], VOIDmode))
12948 push = 1;
12949 else
12950 gcc_assert (!MEM_P (operands[0])
12951 || offsettable_memref_p (operands[0]));
12953 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
12954 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
12956 /* When emitting push, take care for source operands on the stack. */
12957 if (push && MEM_P (operands[1])
12958 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
12960 if (nparts == 3)
12961 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
12962 XEXP (part[1][2], 0));
12963 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
12964 XEXP (part[1][1], 0));
12967 /* We need to do copy in the right order in case an address register
12968 of the source overlaps the destination. */
12969 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
12971 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
12972 collisions++;
12973 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12974 collisions++;
12975 if (nparts == 3
12976 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
12977 collisions++;
12979 /* Collision in the middle part can be handled by reordering. */
12980 if (collisions == 1 && nparts == 3
12981 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12983 rtx tmp;
12984 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
12985 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
12988 /* If there are more collisions, we can't handle it by reordering.
12989 Do an lea to the last part and use only one colliding move. */
12990 else if (collisions > 1)
12992 rtx base;
12994 collisions = 1;
12996 base = part[0][nparts - 1];
12998 /* Handle the case when the last part isn't valid for lea.
12999 Happens in 64-bit mode storing the 12-byte XFmode. */
13000 if (GET_MODE (base) != Pmode)
13001 base = gen_rtx_REG (Pmode, REGNO (base));
13003 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
13004 part[1][0] = replace_equiv_address (part[1][0], base);
13005 part[1][1] = replace_equiv_address (part[1][1],
13006 plus_constant (base, UNITS_PER_WORD));
13007 if (nparts == 3)
13008 part[1][2] = replace_equiv_address (part[1][2],
13009 plus_constant (base, 8));
13013 if (push)
13015 if (!TARGET_64BIT)
13017 if (nparts == 3)
13019 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
13020 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
13021 emit_move_insn (part[0][2], part[1][2]);
13024 else
13026 /* In 64bit mode we don't have 32bit push available. In case this is
13027 register, it is OK - we will just use larger counterpart. We also
13028 retype memory - these comes from attempt to avoid REX prefix on
13029 moving of second half of TFmode value. */
13030 if (GET_MODE (part[1][1]) == SImode)
13032 switch (GET_CODE (part[1][1]))
13034 case MEM:
13035 part[1][1] = adjust_address (part[1][1], DImode, 0);
13036 break;
13038 case REG:
13039 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
13040 break;
13042 default:
13043 gcc_unreachable ();
13046 if (GET_MODE (part[1][0]) == SImode)
13047 part[1][0] = part[1][1];
13050 emit_move_insn (part[0][1], part[1][1]);
13051 emit_move_insn (part[0][0], part[1][0]);
13052 return;
13055 /* Choose correct order to not overwrite the source before it is copied. */
13056 if ((REG_P (part[0][0])
13057 && REG_P (part[1][1])
13058 && (REGNO (part[0][0]) == REGNO (part[1][1])
13059 || (nparts == 3
13060 && REGNO (part[0][0]) == REGNO (part[1][2]))))
13061 || (collisions > 0
13062 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
13064 if (nparts == 3)
13066 operands[2] = part[0][2];
13067 operands[3] = part[0][1];
13068 operands[4] = part[0][0];
13069 operands[5] = part[1][2];
13070 operands[6] = part[1][1];
13071 operands[7] = part[1][0];
13073 else
13075 operands[2] = part[0][1];
13076 operands[3] = part[0][0];
13077 operands[5] = part[1][1];
13078 operands[6] = part[1][0];
13081 else
13083 if (nparts == 3)
13085 operands[2] = part[0][0];
13086 operands[3] = part[0][1];
13087 operands[4] = part[0][2];
13088 operands[5] = part[1][0];
13089 operands[6] = part[1][1];
13090 operands[7] = part[1][2];
13092 else
13094 operands[2] = part[0][0];
13095 operands[3] = part[0][1];
13096 operands[5] = part[1][0];
13097 operands[6] = part[1][1];
13101 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
13102 if (optimize_size)
13104 if (CONST_INT_P (operands[5])
13105 && operands[5] != const0_rtx
13106 && REG_P (operands[2]))
13108 if (CONST_INT_P (operands[6])
13109 && INTVAL (operands[6]) == INTVAL (operands[5]))
13110 operands[6] = operands[2];
13112 if (nparts == 3
13113 && CONST_INT_P (operands[7])
13114 && INTVAL (operands[7]) == INTVAL (operands[5]))
13115 operands[7] = operands[2];
13118 if (nparts == 3
13119 && CONST_INT_P (operands[6])
13120 && operands[6] != const0_rtx
13121 && REG_P (operands[3])
13122 && CONST_INT_P (operands[7])
13123 && INTVAL (operands[7]) == INTVAL (operands[6]))
13124 operands[7] = operands[3];
13127 emit_move_insn (operands[2], operands[5]);
13128 emit_move_insn (operands[3], operands[6]);
13129 if (nparts == 3)
13130 emit_move_insn (operands[4], operands[7]);
13132 return;
13135 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
13136 left shift by a constant, either using a single shift or
13137 a sequence of add instructions. */
13139 static void
13140 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
13142 if (count == 1)
13144 emit_insn ((mode == DImode
13145 ? gen_addsi3
13146 : gen_adddi3) (operand, operand, operand));
13148 else if (!optimize_size
13149 && count * ix86_cost->add <= ix86_cost->shift_const)
13151 int i;
13152 for (i=0; i<count; i++)
13154 emit_insn ((mode == DImode
13155 ? gen_addsi3
13156 : gen_adddi3) (operand, operand, operand));
13159 else
13160 emit_insn ((mode == DImode
13161 ? gen_ashlsi3
13162 : gen_ashldi3) (operand, operand, GEN_INT (count)));
13165 void
13166 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
13168 rtx low[2], high[2];
13169 int count;
13170 const int single_width = mode == DImode ? 32 : 64;
13172 if (CONST_INT_P (operands[2]))
13174 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13175 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13177 if (count >= single_width)
13179 emit_move_insn (high[0], low[1]);
13180 emit_move_insn (low[0], const0_rtx);
13182 if (count > single_width)
13183 ix86_expand_ashl_const (high[0], count - single_width, mode);
13185 else
13187 if (!rtx_equal_p (operands[0], operands[1]))
13188 emit_move_insn (operands[0], operands[1]);
13189 emit_insn ((mode == DImode
13190 ? gen_x86_shld_1
13191 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
13192 ix86_expand_ashl_const (low[0], count, mode);
13194 return;
13197 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13199 if (operands[1] == const1_rtx)
13201 /* Assuming we've chosen a QImode capable registers, then 1 << N
13202 can be done with two 32/64-bit shifts, no branches, no cmoves. */
13203 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
13205 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
13207 ix86_expand_clear (low[0]);
13208 ix86_expand_clear (high[0]);
13209 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
13211 d = gen_lowpart (QImode, low[0]);
13212 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13213 s = gen_rtx_EQ (QImode, flags, const0_rtx);
13214 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13216 d = gen_lowpart (QImode, high[0]);
13217 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13218 s = gen_rtx_NE (QImode, flags, const0_rtx);
13219 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13222 /* Otherwise, we can get the same results by manually performing
13223 a bit extract operation on bit 5/6, and then performing the two
13224 shifts. The two methods of getting 0/1 into low/high are exactly
13225 the same size. Avoiding the shift in the bit extract case helps
13226 pentium4 a bit; no one else seems to care much either way. */
13227 else
13229 rtx x;
13231 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
13232 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
13233 else
13234 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
13235 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
13237 emit_insn ((mode == DImode
13238 ? gen_lshrsi3
13239 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
13240 emit_insn ((mode == DImode
13241 ? gen_andsi3
13242 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
13243 emit_move_insn (low[0], high[0]);
13244 emit_insn ((mode == DImode
13245 ? gen_xorsi3
13246 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
13249 emit_insn ((mode == DImode
13250 ? gen_ashlsi3
13251 : gen_ashldi3) (low[0], low[0], operands[2]));
13252 emit_insn ((mode == DImode
13253 ? gen_ashlsi3
13254 : gen_ashldi3) (high[0], high[0], operands[2]));
13255 return;
13258 if (operands[1] == constm1_rtx)
13260 /* For -1 << N, we can avoid the shld instruction, because we
13261 know that we're shifting 0...31/63 ones into a -1. */
13262 emit_move_insn (low[0], constm1_rtx);
13263 if (optimize_size)
13264 emit_move_insn (high[0], low[0]);
13265 else
13266 emit_move_insn (high[0], constm1_rtx);
13268 else
13270 if (!rtx_equal_p (operands[0], operands[1]))
13271 emit_move_insn (operands[0], operands[1]);
13273 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13274 emit_insn ((mode == DImode
13275 ? gen_x86_shld_1
13276 : gen_x86_64_shld) (high[0], low[0], operands[2]));
13279 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
13281 if (TARGET_CMOVE && scratch)
13283 ix86_expand_clear (scratch);
13284 emit_insn ((mode == DImode
13285 ? gen_x86_shift_adj_1
13286 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
13288 else
13289 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
13292 void
13293 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
13295 rtx low[2], high[2];
13296 int count;
13297 const int single_width = mode == DImode ? 32 : 64;
13299 if (CONST_INT_P (operands[2]))
13301 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13302 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13304 if (count == single_width * 2 - 1)
13306 emit_move_insn (high[0], high[1]);
13307 emit_insn ((mode == DImode
13308 ? gen_ashrsi3
13309 : gen_ashrdi3) (high[0], high[0],
13310 GEN_INT (single_width - 1)));
13311 emit_move_insn (low[0], high[0]);
13314 else if (count >= single_width)
13316 emit_move_insn (low[0], high[1]);
13317 emit_move_insn (high[0], low[0]);
13318 emit_insn ((mode == DImode
13319 ? gen_ashrsi3
13320 : gen_ashrdi3) (high[0], high[0],
13321 GEN_INT (single_width - 1)));
13322 if (count > single_width)
13323 emit_insn ((mode == DImode
13324 ? gen_ashrsi3
13325 : gen_ashrdi3) (low[0], low[0],
13326 GEN_INT (count - single_width)));
13328 else
13330 if (!rtx_equal_p (operands[0], operands[1]))
13331 emit_move_insn (operands[0], operands[1]);
13332 emit_insn ((mode == DImode
13333 ? gen_x86_shrd_1
13334 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13335 emit_insn ((mode == DImode
13336 ? gen_ashrsi3
13337 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
13340 else
13342 if (!rtx_equal_p (operands[0], operands[1]))
13343 emit_move_insn (operands[0], operands[1]);
13345 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13347 emit_insn ((mode == DImode
13348 ? gen_x86_shrd_1
13349 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13350 emit_insn ((mode == DImode
13351 ? gen_ashrsi3
13352 : gen_ashrdi3) (high[0], high[0], operands[2]));
13354 if (TARGET_CMOVE && scratch)
13356 emit_move_insn (scratch, high[0]);
13357 emit_insn ((mode == DImode
13358 ? gen_ashrsi3
13359 : gen_ashrdi3) (scratch, scratch,
13360 GEN_INT (single_width - 1)));
13361 emit_insn ((mode == DImode
13362 ? gen_x86_shift_adj_1
13363 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13364 scratch));
13366 else
13367 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
13371 void
13372 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
13374 rtx low[2], high[2];
13375 int count;
13376 const int single_width = mode == DImode ? 32 : 64;
13378 if (CONST_INT_P (operands[2]))
13380 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13381 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13383 if (count >= single_width)
13385 emit_move_insn (low[0], high[1]);
13386 ix86_expand_clear (high[0]);
13388 if (count > single_width)
13389 emit_insn ((mode == DImode
13390 ? gen_lshrsi3
13391 : gen_lshrdi3) (low[0], low[0],
13392 GEN_INT (count - single_width)));
13394 else
13396 if (!rtx_equal_p (operands[0], operands[1]))
13397 emit_move_insn (operands[0], operands[1]);
13398 emit_insn ((mode == DImode
13399 ? gen_x86_shrd_1
13400 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13401 emit_insn ((mode == DImode
13402 ? gen_lshrsi3
13403 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
13406 else
13408 if (!rtx_equal_p (operands[0], operands[1]))
13409 emit_move_insn (operands[0], operands[1]);
13411 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13413 emit_insn ((mode == DImode
13414 ? gen_x86_shrd_1
13415 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13416 emit_insn ((mode == DImode
13417 ? gen_lshrsi3
13418 : gen_lshrdi3) (high[0], high[0], operands[2]));
13420 /* Heh. By reversing the arguments, we can reuse this pattern. */
13421 if (TARGET_CMOVE && scratch)
13423 ix86_expand_clear (scratch);
13424 emit_insn ((mode == DImode
13425 ? gen_x86_shift_adj_1
13426 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13427 scratch));
13429 else
13430 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
13434 /* Predict just emitted jump instruction to be taken with probability PROB. */
13435 static void
13436 predict_jump (int prob)
13438 rtx insn = get_last_insn ();
13439 gcc_assert (JUMP_P (insn));
13440 REG_NOTES (insn)
13441 = gen_rtx_EXPR_LIST (REG_BR_PROB,
13442 GEN_INT (prob),
13443 REG_NOTES (insn));
13446 /* Helper function for the string operations below. Dest VARIABLE whether
13447 it is aligned to VALUE bytes. If true, jump to the label. */
13448 static rtx
13449 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
13451 rtx label = gen_label_rtx ();
13452 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
13453 if (GET_MODE (variable) == DImode)
13454 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
13455 else
13456 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
13457 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
13458 1, label);
13459 if (epilogue)
13460 predict_jump (REG_BR_PROB_BASE * 50 / 100);
13461 else
13462 predict_jump (REG_BR_PROB_BASE * 90 / 100);
13463 return label;
13466 /* Adjust COUNTER by the VALUE. */
13467 static void
13468 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
13470 if (GET_MODE (countreg) == DImode)
13471 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
13472 else
13473 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
13476 /* Zero extend possibly SImode EXP to Pmode register. */
13478 ix86_zero_extend_to_Pmode (rtx exp)
13480 rtx r;
13481 if (GET_MODE (exp) == VOIDmode)
13482 return force_reg (Pmode, exp);
13483 if (GET_MODE (exp) == Pmode)
13484 return copy_to_mode_reg (Pmode, exp);
13485 r = gen_reg_rtx (Pmode);
13486 emit_insn (gen_zero_extendsidi2 (r, exp));
13487 return r;
13490 /* Divide COUNTREG by SCALE. */
13491 static rtx
13492 scale_counter (rtx countreg, int scale)
13494 rtx sc;
13495 rtx piece_size_mask;
13497 if (scale == 1)
13498 return countreg;
13499 if (CONST_INT_P (countreg))
13500 return GEN_INT (INTVAL (countreg) / scale);
13501 gcc_assert (REG_P (countreg));
13503 piece_size_mask = GEN_INT (scale - 1);
13504 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
13505 GEN_INT (exact_log2 (scale)),
13506 NULL, 1, OPTAB_DIRECT);
13507 return sc;
13510 /* Return mode for the memcpy/memset loop counter. Preffer SImode over DImode
13511 for constant loop counts. */
13513 static enum machine_mode
13514 counter_mode (rtx count_exp)
13516 if (GET_MODE (count_exp) != VOIDmode)
13517 return GET_MODE (count_exp);
13518 if (GET_CODE (count_exp) != CONST_INT)
13519 return Pmode;
13520 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
13521 return DImode;
13522 return SImode;
13525 /* When SRCPTR is non-NULL, output simple loop to move memory
13526 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
13527 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
13528 equivalent loop to set memory by VALUE (supposed to be in MODE).
13530 The size is rounded down to whole number of chunk size moved at once.
13531 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
13534 static void
13535 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
13536 rtx destptr, rtx srcptr, rtx value,
13537 rtx count, enum machine_mode mode, int unroll,
13538 int expected_size)
13540 rtx out_label, top_label, iter, tmp;
13541 enum machine_mode iter_mode = counter_mode (count);
13542 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
13543 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
13544 rtx size;
13545 rtx x_addr;
13546 rtx y_addr;
13547 int i;
13549 top_label = gen_label_rtx ();
13550 out_label = gen_label_rtx ();
13551 iter = gen_reg_rtx (iter_mode);
13553 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
13554 NULL, 1, OPTAB_DIRECT);
13555 /* Those two should combine. */
13556 if (piece_size == const1_rtx)
13558 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
13559 true, out_label);
13560 predict_jump (REG_BR_PROB_BASE * 10 / 100);
13562 emit_move_insn (iter, const0_rtx);
13564 emit_label (top_label);
13566 tmp = convert_modes (Pmode, iter_mode, iter, true);
13567 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
13568 destmem = change_address (destmem, mode, x_addr);
13570 if (srcmem)
13572 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
13573 srcmem = change_address (srcmem, mode, y_addr);
13575 /* When unrolling for chips that reorder memory reads and writes,
13576 we can save registers by using single temporary.
13577 Also using 4 temporaries is overkill in 32bit mode. */
13578 if (!TARGET_64BIT && 0)
13580 for (i = 0; i < unroll; i++)
13582 if (i)
13584 destmem =
13585 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13586 srcmem =
13587 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
13589 emit_move_insn (destmem, srcmem);
13592 else
13594 rtx tmpreg[4];
13595 gcc_assert (unroll <= 4);
13596 for (i = 0; i < unroll; i++)
13598 tmpreg[i] = gen_reg_rtx (mode);
13599 if (i)
13601 srcmem =
13602 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
13604 emit_move_insn (tmpreg[i], srcmem);
13606 for (i = 0; i < unroll; i++)
13608 if (i)
13610 destmem =
13611 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13613 emit_move_insn (destmem, tmpreg[i]);
13617 else
13618 for (i = 0; i < unroll; i++)
13620 if (i)
13621 destmem =
13622 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13623 emit_move_insn (destmem, value);
13626 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
13627 true, OPTAB_LIB_WIDEN);
13628 if (tmp != iter)
13629 emit_move_insn (iter, tmp);
13631 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
13632 true, top_label);
13633 if (expected_size != -1)
13635 expected_size /= GET_MODE_SIZE (mode) * unroll;
13636 if (expected_size == 0)
13637 predict_jump (0);
13638 else if (expected_size > REG_BR_PROB_BASE)
13639 predict_jump (REG_BR_PROB_BASE - 1);
13640 else
13641 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
13643 else
13644 predict_jump (REG_BR_PROB_BASE * 80 / 100);
13645 iter = ix86_zero_extend_to_Pmode (iter);
13646 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
13647 true, OPTAB_LIB_WIDEN);
13648 if (tmp != destptr)
13649 emit_move_insn (destptr, tmp);
13650 if (srcptr)
13652 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
13653 true, OPTAB_LIB_WIDEN);
13654 if (tmp != srcptr)
13655 emit_move_insn (srcptr, tmp);
13657 emit_label (out_label);
13660 /* Output "rep; mov" instruction.
13661 Arguments have same meaning as for previous function */
13662 static void
13663 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
13664 rtx destptr, rtx srcptr,
13665 rtx count,
13666 enum machine_mode mode)
13668 rtx destexp;
13669 rtx srcexp;
13670 rtx countreg;
13672 /* If the size is known, it is shorter to use rep movs. */
13673 if (mode == QImode && CONST_INT_P (count)
13674 && !(INTVAL (count) & 3))
13675 mode = SImode;
13677 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
13678 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
13679 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
13680 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
13681 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
13682 if (mode != QImode)
13684 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13685 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13686 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
13687 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
13688 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13689 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
13691 else
13693 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
13694 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
13696 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
13697 destexp, srcexp));
13700 /* Output "rep; stos" instruction.
13701 Arguments have same meaning as for previous function */
13702 static void
13703 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
13704 rtx count,
13705 enum machine_mode mode)
13707 rtx destexp;
13708 rtx countreg;
13710 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
13711 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
13712 value = force_reg (mode, gen_lowpart (mode, value));
13713 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
13714 if (mode != QImode)
13716 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13717 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13718 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
13720 else
13721 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
13722 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
13725 static void
13726 emit_strmov (rtx destmem, rtx srcmem,
13727 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
13729 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
13730 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
13731 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13734 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
13735 static void
13736 expand_movmem_epilogue (rtx destmem, rtx srcmem,
13737 rtx destptr, rtx srcptr, rtx count, int max_size)
13739 rtx src, dest;
13740 if (CONST_INT_P (count))
13742 HOST_WIDE_INT countval = INTVAL (count);
13743 int offset = 0;
13745 if ((countval & 0x10) && max_size > 16)
13747 if (TARGET_64BIT)
13749 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13750 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
13752 else
13753 gcc_unreachable ();
13754 offset += 16;
13756 if ((countval & 0x08) && max_size > 8)
13758 if (TARGET_64BIT)
13759 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13760 else
13762 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
13763 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
13765 offset += 8;
13767 if ((countval & 0x04) && max_size > 4)
13769 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
13770 offset += 4;
13772 if ((countval & 0x02) && max_size > 2)
13774 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
13775 offset += 2;
13777 if ((countval & 0x01) && max_size > 1)
13779 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
13780 offset += 1;
13782 return;
13784 if (max_size > 8)
13786 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
13787 count, 1, OPTAB_DIRECT);
13788 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
13789 count, QImode, 1, 4);
13790 return;
13793 /* When there are stringops, we can cheaply increase dest and src pointers.
13794 Otherwise we save code size by maintaining offset (zero is readily
13795 available from preceding rep operation) and using x86 addressing modes.
13797 if (TARGET_SINGLE_STRINGOP)
13799 if (max_size > 4)
13801 rtx label = ix86_expand_aligntest (count, 4, true);
13802 src = change_address (srcmem, SImode, srcptr);
13803 dest = change_address (destmem, SImode, destptr);
13804 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13805 emit_label (label);
13806 LABEL_NUSES (label) = 1;
13808 if (max_size > 2)
13810 rtx label = ix86_expand_aligntest (count, 2, true);
13811 src = change_address (srcmem, HImode, srcptr);
13812 dest = change_address (destmem, HImode, destptr);
13813 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13814 emit_label (label);
13815 LABEL_NUSES (label) = 1;
13817 if (max_size > 1)
13819 rtx label = ix86_expand_aligntest (count, 1, true);
13820 src = change_address (srcmem, QImode, srcptr);
13821 dest = change_address (destmem, QImode, destptr);
13822 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13823 emit_label (label);
13824 LABEL_NUSES (label) = 1;
13827 else
13829 rtx offset = force_reg (Pmode, const0_rtx);
13830 rtx tmp;
13832 if (max_size > 4)
13834 rtx label = ix86_expand_aligntest (count, 4, true);
13835 src = change_address (srcmem, SImode, srcptr);
13836 dest = change_address (destmem, SImode, destptr);
13837 emit_move_insn (dest, src);
13838 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
13839 true, OPTAB_LIB_WIDEN);
13840 if (tmp != offset)
13841 emit_move_insn (offset, tmp);
13842 emit_label (label);
13843 LABEL_NUSES (label) = 1;
13845 if (max_size > 2)
13847 rtx label = ix86_expand_aligntest (count, 2, true);
13848 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
13849 src = change_address (srcmem, HImode, tmp);
13850 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
13851 dest = change_address (destmem, HImode, tmp);
13852 emit_move_insn (dest, src);
13853 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
13854 true, OPTAB_LIB_WIDEN);
13855 if (tmp != offset)
13856 emit_move_insn (offset, tmp);
13857 emit_label (label);
13858 LABEL_NUSES (label) = 1;
13860 if (max_size > 1)
13862 rtx label = ix86_expand_aligntest (count, 1, true);
13863 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
13864 src = change_address (srcmem, QImode, tmp);
13865 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
13866 dest = change_address (destmem, QImode, tmp);
13867 emit_move_insn (dest, src);
13868 emit_label (label);
13869 LABEL_NUSES (label) = 1;
13874 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13875 static void
13876 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
13877 rtx count, int max_size)
13879 count =
13880 expand_simple_binop (counter_mode (count), AND, count,
13881 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
13882 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
13883 gen_lowpart (QImode, value), count, QImode,
13884 1, max_size / 2);
13887 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13888 static void
13889 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
13891 rtx dest;
13893 if (CONST_INT_P (count))
13895 HOST_WIDE_INT countval = INTVAL (count);
13896 int offset = 0;
13898 if ((countval & 0x10) && max_size > 16)
13900 if (TARGET_64BIT)
13902 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
13903 emit_insn (gen_strset (destptr, dest, value));
13904 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
13905 emit_insn (gen_strset (destptr, dest, value));
13907 else
13908 gcc_unreachable ();
13909 offset += 16;
13911 if ((countval & 0x08) && max_size > 8)
13913 if (TARGET_64BIT)
13915 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
13916 emit_insn (gen_strset (destptr, dest, value));
13918 else
13920 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
13921 emit_insn (gen_strset (destptr, dest, value));
13922 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
13923 emit_insn (gen_strset (destptr, dest, value));
13925 offset += 8;
13927 if ((countval & 0x04) && max_size > 4)
13929 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
13930 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
13931 offset += 4;
13933 if ((countval & 0x02) && max_size > 2)
13935 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
13936 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
13937 offset += 2;
13939 if ((countval & 0x01) && max_size > 1)
13941 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
13942 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
13943 offset += 1;
13945 return;
13947 if (max_size > 32)
13949 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
13950 return;
13952 if (max_size > 16)
13954 rtx label = ix86_expand_aligntest (count, 16, true);
13955 if (TARGET_64BIT)
13957 dest = change_address (destmem, DImode, destptr);
13958 emit_insn (gen_strset (destptr, dest, value));
13959 emit_insn (gen_strset (destptr, dest, value));
13961 else
13963 dest = change_address (destmem, SImode, destptr);
13964 emit_insn (gen_strset (destptr, dest, value));
13965 emit_insn (gen_strset (destptr, dest, value));
13966 emit_insn (gen_strset (destptr, dest, value));
13967 emit_insn (gen_strset (destptr, dest, value));
13969 emit_label (label);
13970 LABEL_NUSES (label) = 1;
13972 if (max_size > 8)
13974 rtx label = ix86_expand_aligntest (count, 8, true);
13975 if (TARGET_64BIT)
13977 dest = change_address (destmem, DImode, destptr);
13978 emit_insn (gen_strset (destptr, dest, value));
13980 else
13982 dest = change_address (destmem, SImode, destptr);
13983 emit_insn (gen_strset (destptr, dest, value));
13984 emit_insn (gen_strset (destptr, dest, value));
13986 emit_label (label);
13987 LABEL_NUSES (label) = 1;
13989 if (max_size > 4)
13991 rtx label = ix86_expand_aligntest (count, 4, true);
13992 dest = change_address (destmem, SImode, destptr);
13993 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
13994 emit_label (label);
13995 LABEL_NUSES (label) = 1;
13997 if (max_size > 2)
13999 rtx label = ix86_expand_aligntest (count, 2, true);
14000 dest = change_address (destmem, HImode, destptr);
14001 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
14002 emit_label (label);
14003 LABEL_NUSES (label) = 1;
14005 if (max_size > 1)
14007 rtx label = ix86_expand_aligntest (count, 1, true);
14008 dest = change_address (destmem, QImode, destptr);
14009 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
14010 emit_label (label);
14011 LABEL_NUSES (label) = 1;
14015 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
14016 DESIRED_ALIGNMENT. */
14017 static void
14018 expand_movmem_prologue (rtx destmem, rtx srcmem,
14019 rtx destptr, rtx srcptr, rtx count,
14020 int align, int desired_alignment)
14022 if (align <= 1 && desired_alignment > 1)
14024 rtx label = ix86_expand_aligntest (destptr, 1, false);
14025 srcmem = change_address (srcmem, QImode, srcptr);
14026 destmem = change_address (destmem, QImode, destptr);
14027 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14028 ix86_adjust_counter (count, 1);
14029 emit_label (label);
14030 LABEL_NUSES (label) = 1;
14032 if (align <= 2 && desired_alignment > 2)
14034 rtx label = ix86_expand_aligntest (destptr, 2, false);
14035 srcmem = change_address (srcmem, HImode, srcptr);
14036 destmem = change_address (destmem, HImode, destptr);
14037 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14038 ix86_adjust_counter (count, 2);
14039 emit_label (label);
14040 LABEL_NUSES (label) = 1;
14042 if (align <= 4 && desired_alignment > 4)
14044 rtx label = ix86_expand_aligntest (destptr, 4, false);
14045 srcmem = change_address (srcmem, SImode, srcptr);
14046 destmem = change_address (destmem, SImode, destptr);
14047 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14048 ix86_adjust_counter (count, 4);
14049 emit_label (label);
14050 LABEL_NUSES (label) = 1;
14052 gcc_assert (desired_alignment <= 8);
14055 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
14056 DESIRED_ALIGNMENT. */
14057 static void
14058 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
14059 int align, int desired_alignment)
14061 if (align <= 1 && desired_alignment > 1)
14063 rtx label = ix86_expand_aligntest (destptr, 1, false);
14064 destmem = change_address (destmem, QImode, destptr);
14065 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
14066 ix86_adjust_counter (count, 1);
14067 emit_label (label);
14068 LABEL_NUSES (label) = 1;
14070 if (align <= 2 && desired_alignment > 2)
14072 rtx label = ix86_expand_aligntest (destptr, 2, false);
14073 destmem = change_address (destmem, HImode, destptr);
14074 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
14075 ix86_adjust_counter (count, 2);
14076 emit_label (label);
14077 LABEL_NUSES (label) = 1;
14079 if (align <= 4 && desired_alignment > 4)
14081 rtx label = ix86_expand_aligntest (destptr, 4, false);
14082 destmem = change_address (destmem, SImode, destptr);
14083 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
14084 ix86_adjust_counter (count, 4);
14085 emit_label (label);
14086 LABEL_NUSES (label) = 1;
14088 gcc_assert (desired_alignment <= 8);
14091 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
14092 static enum stringop_alg
14093 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
14094 int *dynamic_check)
14096 const struct stringop_algs * algs;
14098 *dynamic_check = -1;
14099 if (memset)
14100 algs = &ix86_cost->memset[TARGET_64BIT != 0];
14101 else
14102 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
14103 if (stringop_alg != no_stringop)
14104 return stringop_alg;
14105 /* rep; movq or rep; movl is the smallest variant. */
14106 else if (optimize_size)
14108 if (!count || (count & 3))
14109 return rep_prefix_1_byte;
14110 else
14111 return rep_prefix_4_byte;
14113 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
14115 else if (expected_size != -1 && expected_size < 4)
14116 return loop_1_byte;
14117 else if (expected_size != -1)
14119 unsigned int i;
14120 enum stringop_alg alg = libcall;
14121 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
14123 gcc_assert (algs->size[i].max);
14124 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
14126 if (algs->size[i].alg != libcall)
14127 alg = algs->size[i].alg;
14128 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
14129 last non-libcall inline algorithm. */
14130 if (TARGET_INLINE_ALL_STRINGOPS)
14132 /* When the current size is best to be copied by a libcall,
14133 but we are still forced to inline, run the heuristic bellow
14134 that will pick code for medium sized blocks. */
14135 if (alg != libcall)
14136 return alg;
14137 break;
14139 else
14140 return algs->size[i].alg;
14143 gcc_assert (TARGET_INLINE_ALL_STRINGOPS);
14145 /* When asked to inline the call anyway, try to pick meaningful choice.
14146 We look for maximal size of block that is faster to copy by hand and
14147 take blocks of at most of that size guessing that average size will
14148 be roughly half of the block.
14150 If this turns out to be bad, we might simply specify the preferred
14151 choice in ix86_costs. */
14152 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
14153 && algs->unknown_size == libcall)
14155 int max = -1;
14156 enum stringop_alg alg;
14157 int i;
14159 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
14160 if (algs->size[i].alg != libcall && algs->size[i].alg)
14161 max = algs->size[i].max;
14162 if (max == -1)
14163 max = 4096;
14164 alg = decide_alg (count, max / 2, memset, dynamic_check);
14165 gcc_assert (*dynamic_check == -1);
14166 gcc_assert (alg != libcall);
14167 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
14168 *dynamic_check = max;
14169 return alg;
14171 return algs->unknown_size;
14174 /* Decide on alignment. We know that the operand is already aligned to ALIGN
14175 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
14176 static int
14177 decide_alignment (int align,
14178 enum stringop_alg alg,
14179 int expected_size)
14181 int desired_align = 0;
14182 switch (alg)
14184 case no_stringop:
14185 gcc_unreachable ();
14186 case loop:
14187 case unrolled_loop:
14188 desired_align = GET_MODE_SIZE (Pmode);
14189 break;
14190 case rep_prefix_8_byte:
14191 desired_align = 8;
14192 break;
14193 case rep_prefix_4_byte:
14194 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14195 copying whole cacheline at once. */
14196 if (TARGET_PENTIUMPRO)
14197 desired_align = 8;
14198 else
14199 desired_align = 4;
14200 break;
14201 case rep_prefix_1_byte:
14202 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14203 copying whole cacheline at once. */
14204 if (TARGET_PENTIUMPRO)
14205 desired_align = 8;
14206 else
14207 desired_align = 1;
14208 break;
14209 case loop_1_byte:
14210 desired_align = 1;
14211 break;
14212 case libcall:
14213 return 0;
14216 if (optimize_size)
14217 desired_align = 1;
14218 if (desired_align < align)
14219 desired_align = align;
14220 if (expected_size != -1 && expected_size < 4)
14221 desired_align = align;
14222 return desired_align;
14225 /* Return the smallest power of 2 greater than VAL. */
14226 static int
14227 smallest_pow2_greater_than (int val)
14229 int ret = 1;
14230 while (ret <= val)
14231 ret <<= 1;
14232 return ret;
14235 /* Expand string move (memcpy) operation. Use i386 string operations when
14236 profitable. expand_clrmem contains similar code. The code depends upon
14237 architecture, block size and alignment, but always has the same
14238 overall structure:
14240 1) Prologue guard: Conditional that jumps up to epilogues for small
14241 blocks that can be handled by epilogue alone. This is faster but
14242 also needed for correctness, since prologue assume the block is larger
14243 than the desired alignment.
14245 Optional dynamic check for size and libcall for large
14246 blocks is emitted here too, with -minline-stringops-dynamically.
14248 2) Prologue: copy first few bytes in order to get destination aligned
14249 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
14250 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
14251 We emit either a jump tree on power of two sized blocks, or a byte loop.
14253 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
14254 with specified algorithm.
14256 4) Epilogue: code copying tail of the block that is too small to be
14257 handled by main body (or up to size guarded by prologue guard). */
14260 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
14261 rtx expected_align_exp, rtx expected_size_exp)
14263 rtx destreg;
14264 rtx srcreg;
14265 rtx label = NULL;
14266 rtx tmp;
14267 rtx jump_around_label = NULL;
14268 HOST_WIDE_INT align = 1;
14269 unsigned HOST_WIDE_INT count = 0;
14270 HOST_WIDE_INT expected_size = -1;
14271 int size_needed = 0, epilogue_size_needed;
14272 int desired_align = 0;
14273 enum stringop_alg alg;
14274 int dynamic_check;
14276 if (CONST_INT_P (align_exp))
14277 align = INTVAL (align_exp);
14278 /* i386 can do misaligned access on reasonably increased cost. */
14279 if (CONST_INT_P (expected_align_exp)
14280 && INTVAL (expected_align_exp) > align)
14281 align = INTVAL (expected_align_exp);
14282 if (CONST_INT_P (count_exp))
14283 count = expected_size = INTVAL (count_exp);
14284 if (CONST_INT_P (expected_size_exp) && count == 0)
14285 expected_size = INTVAL (expected_size_exp);
14287 /* Step 0: Decide on preferred algorithm, desired alignment and
14288 size of chunks to be copied by main loop. */
14290 alg = decide_alg (count, expected_size, false, &dynamic_check);
14291 desired_align = decide_alignment (align, alg, expected_size);
14293 if (!TARGET_ALIGN_STRINGOPS)
14294 align = desired_align;
14296 if (alg == libcall)
14297 return 0;
14298 gcc_assert (alg != no_stringop);
14299 if (!count)
14300 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
14301 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14302 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
14303 switch (alg)
14305 case libcall:
14306 case no_stringop:
14307 gcc_unreachable ();
14308 case loop:
14309 size_needed = GET_MODE_SIZE (Pmode);
14310 break;
14311 case unrolled_loop:
14312 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
14313 break;
14314 case rep_prefix_8_byte:
14315 size_needed = 8;
14316 break;
14317 case rep_prefix_4_byte:
14318 size_needed = 4;
14319 break;
14320 case rep_prefix_1_byte:
14321 case loop_1_byte:
14322 size_needed = 1;
14323 break;
14326 epilogue_size_needed = size_needed;
14328 /* Step 1: Prologue guard. */
14330 /* Alignment code needs count to be in register. */
14331 if (CONST_INT_P (count_exp) && desired_align > align)
14333 enum machine_mode mode = SImode;
14334 if (TARGET_64BIT && (count & ~0xffffffff))
14335 mode = DImode;
14336 count_exp = force_reg (mode, count_exp);
14338 gcc_assert (desired_align >= 1 && align >= 1);
14340 /* Ensure that alignment prologue won't copy past end of block. */
14341 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
14343 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14344 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14345 Make sure it is power of 2. */
14346 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14348 label = gen_label_rtx ();
14349 emit_cmp_and_jump_insns (count_exp,
14350 GEN_INT (epilogue_size_needed),
14351 LTU, 0, counter_mode (count_exp), 1, label);
14352 if (GET_CODE (count_exp) == CONST_INT)
14354 else if (expected_size == -1 || expected_size < epilogue_size_needed)
14355 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14356 else
14357 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14359 /* Emit code to decide on runtime whether library call or inline should be
14360 used. */
14361 if (dynamic_check != -1)
14363 rtx hot_label = gen_label_rtx ();
14364 jump_around_label = gen_label_rtx ();
14365 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14366 LEU, 0, GET_MODE (count_exp), 1, hot_label);
14367 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14368 emit_block_move_via_libcall (dst, src, count_exp, false);
14369 emit_jump (jump_around_label);
14370 emit_label (hot_label);
14373 /* Step 2: Alignment prologue. */
14375 if (desired_align > align)
14377 /* Except for the first move in epilogue, we no longer know
14378 constant offset in aliasing info. It don't seems to worth
14379 the pain to maintain it for the first move, so throw away
14380 the info early. */
14381 src = change_address (src, BLKmode, srcreg);
14382 dst = change_address (dst, BLKmode, destreg);
14383 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
14384 desired_align);
14386 if (label && size_needed == 1)
14388 emit_label (label);
14389 LABEL_NUSES (label) = 1;
14390 label = NULL;
14393 /* Step 3: Main loop. */
14395 switch (alg)
14397 case libcall:
14398 case no_stringop:
14399 gcc_unreachable ();
14400 case loop_1_byte:
14401 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14402 count_exp, QImode, 1, expected_size);
14403 break;
14404 case loop:
14405 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14406 count_exp, Pmode, 1, expected_size);
14407 break;
14408 case unrolled_loop:
14409 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
14410 registers for 4 temporaries anyway. */
14411 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14412 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
14413 expected_size);
14414 break;
14415 case rep_prefix_8_byte:
14416 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14417 DImode);
14418 break;
14419 case rep_prefix_4_byte:
14420 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14421 SImode);
14422 break;
14423 case rep_prefix_1_byte:
14424 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14425 QImode);
14426 break;
14428 /* Adjust properly the offset of src and dest memory for aliasing. */
14429 if (CONST_INT_P (count_exp))
14431 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
14432 (count / size_needed) * size_needed);
14433 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14434 (count / size_needed) * size_needed);
14436 else
14438 src = change_address (src, BLKmode, srcreg);
14439 dst = change_address (dst, BLKmode, destreg);
14442 /* Step 4: Epilogue to copy the remaining bytes. */
14444 if (label)
14446 /* When the main loop is done, COUNT_EXP might hold original count,
14447 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14448 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14449 bytes. Compensate if needed. */
14451 if (size_needed < epilogue_size_needed)
14453 tmp =
14454 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
14455 GEN_INT (size_needed - 1), count_exp, 1,
14456 OPTAB_DIRECT);
14457 if (tmp != count_exp)
14458 emit_move_insn (count_exp, tmp);
14460 emit_label (label);
14461 LABEL_NUSES (label) = 1;
14464 if (count_exp != const0_rtx && epilogue_size_needed > 1)
14465 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
14466 epilogue_size_needed);
14467 if (jump_around_label)
14468 emit_label (jump_around_label);
14469 return 1;
14472 /* Helper function for memcpy. For QImode value 0xXY produce
14473 0xXYXYXYXY of wide specified by MODE. This is essentially
14474 a * 0x10101010, but we can do slightly better than
14475 synth_mult by unwinding the sequence by hand on CPUs with
14476 slow multiply. */
14477 static rtx
14478 promote_duplicated_reg (enum machine_mode mode, rtx val)
14480 enum machine_mode valmode = GET_MODE (val);
14481 rtx tmp;
14482 int nops = mode == DImode ? 3 : 2;
14484 gcc_assert (mode == SImode || mode == DImode);
14485 if (val == const0_rtx)
14486 return copy_to_mode_reg (mode, const0_rtx);
14487 if (CONST_INT_P (val))
14489 HOST_WIDE_INT v = INTVAL (val) & 255;
14491 v |= v << 8;
14492 v |= v << 16;
14493 if (mode == DImode)
14494 v |= (v << 16) << 16;
14495 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
14498 if (valmode == VOIDmode)
14499 valmode = QImode;
14500 if (valmode != QImode)
14501 val = gen_lowpart (QImode, val);
14502 if (mode == QImode)
14503 return val;
14504 if (!TARGET_PARTIAL_REG_STALL)
14505 nops--;
14506 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
14507 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
14508 <= (ix86_cost->shift_const + ix86_cost->add) * nops
14509 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
14511 rtx reg = convert_modes (mode, QImode, val, true);
14512 tmp = promote_duplicated_reg (mode, const1_rtx);
14513 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
14514 OPTAB_DIRECT);
14516 else
14518 rtx reg = convert_modes (mode, QImode, val, true);
14520 if (!TARGET_PARTIAL_REG_STALL)
14521 if (mode == SImode)
14522 emit_insn (gen_movsi_insv_1 (reg, reg));
14523 else
14524 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
14525 else
14527 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
14528 NULL, 1, OPTAB_DIRECT);
14529 reg =
14530 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14532 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
14533 NULL, 1, OPTAB_DIRECT);
14534 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14535 if (mode == SImode)
14536 return reg;
14537 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
14538 NULL, 1, OPTAB_DIRECT);
14539 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14540 return reg;
14544 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
14545 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
14546 alignment from ALIGN to DESIRED_ALIGN. */
14547 static rtx
14548 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
14550 rtx promoted_val;
14552 if (TARGET_64BIT
14553 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
14554 promoted_val = promote_duplicated_reg (DImode, val);
14555 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
14556 promoted_val = promote_duplicated_reg (SImode, val);
14557 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
14558 promoted_val = promote_duplicated_reg (HImode, val);
14559 else
14560 promoted_val = val;
14562 return promoted_val;
14565 /* Expand string clear operation (bzero). Use i386 string operations when
14566 profitable. See expand_movmem comment for explanation of individual
14567 steps performed. */
14569 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
14570 rtx expected_align_exp, rtx expected_size_exp)
14572 rtx destreg;
14573 rtx label = NULL;
14574 rtx tmp;
14575 rtx jump_around_label = NULL;
14576 HOST_WIDE_INT align = 1;
14577 unsigned HOST_WIDE_INT count = 0;
14578 HOST_WIDE_INT expected_size = -1;
14579 int size_needed = 0, epilogue_size_needed;
14580 int desired_align = 0;
14581 enum stringop_alg alg;
14582 rtx promoted_val = NULL;
14583 bool force_loopy_epilogue = false;
14584 int dynamic_check;
14586 if (CONST_INT_P (align_exp))
14587 align = INTVAL (align_exp);
14588 /* i386 can do misaligned access on reasonably increased cost. */
14589 if (CONST_INT_P (expected_align_exp)
14590 && INTVAL (expected_align_exp) > align)
14591 align = INTVAL (expected_align_exp);
14592 if (CONST_INT_P (count_exp))
14593 count = expected_size = INTVAL (count_exp);
14594 if (CONST_INT_P (expected_size_exp) && count == 0)
14595 expected_size = INTVAL (expected_size_exp);
14597 /* Step 0: Decide on preferred algorithm, desired alignment and
14598 size of chunks to be copied by main loop. */
14600 alg = decide_alg (count, expected_size, true, &dynamic_check);
14601 desired_align = decide_alignment (align, alg, expected_size);
14603 if (!TARGET_ALIGN_STRINGOPS)
14604 align = desired_align;
14606 if (alg == libcall)
14607 return 0;
14608 gcc_assert (alg != no_stringop);
14609 if (!count)
14610 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
14611 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14612 switch (alg)
14614 case libcall:
14615 case no_stringop:
14616 gcc_unreachable ();
14617 case loop:
14618 size_needed = GET_MODE_SIZE (Pmode);
14619 break;
14620 case unrolled_loop:
14621 size_needed = GET_MODE_SIZE (Pmode) * 4;
14622 break;
14623 case rep_prefix_8_byte:
14624 size_needed = 8;
14625 break;
14626 case rep_prefix_4_byte:
14627 size_needed = 4;
14628 break;
14629 case rep_prefix_1_byte:
14630 case loop_1_byte:
14631 size_needed = 1;
14632 break;
14634 epilogue_size_needed = size_needed;
14636 /* Step 1: Prologue guard. */
14638 /* Alignment code needs count to be in register. */
14639 if (CONST_INT_P (count_exp) && desired_align > align)
14641 enum machine_mode mode = SImode;
14642 if (TARGET_64BIT && (count & ~0xffffffff))
14643 mode = DImode;
14644 count_exp = force_reg (mode, count_exp);
14646 /* Do the cheap promotion to allow better CSE across the
14647 main loop and epilogue (ie one load of the big constant in the
14648 front of all code. */
14649 if (CONST_INT_P (val_exp))
14650 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
14651 desired_align, align);
14652 /* Ensure that alignment prologue won't copy past end of block. */
14653 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
14655 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14656 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14657 Make sure it is power of 2. */
14658 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14660 /* To improve performance of small blocks, we jump around the VAL
14661 promoting mode. This mean that if the promoted VAL is not constant,
14662 we might not use it in the epilogue and have to use byte
14663 loop variant. */
14664 if (epilogue_size_needed > 2 && !promoted_val)
14665 force_loopy_epilogue = true;
14666 label = gen_label_rtx ();
14667 emit_cmp_and_jump_insns (count_exp,
14668 GEN_INT (epilogue_size_needed),
14669 LTU, 0, counter_mode (count_exp), 1, label);
14670 if (GET_CODE (count_exp) == CONST_INT)
14672 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
14673 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14674 else
14675 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14677 if (dynamic_check != -1)
14679 rtx hot_label = gen_label_rtx ();
14680 jump_around_label = gen_label_rtx ();
14681 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14682 LEU, 0, counter_mode (count_exp), 1, hot_label);
14683 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14684 set_storage_via_libcall (dst, count_exp, val_exp, false);
14685 emit_jump (jump_around_label);
14686 emit_label (hot_label);
14689 /* Step 2: Alignment prologue. */
14691 /* Do the expensive promotion once we branched off the small blocks. */
14692 if (!promoted_val)
14693 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
14694 desired_align, align);
14695 gcc_assert (desired_align >= 1 && align >= 1);
14697 if (desired_align > align)
14699 /* Except for the first move in epilogue, we no longer know
14700 constant offset in aliasing info. It don't seems to worth
14701 the pain to maintain it for the first move, so throw away
14702 the info early. */
14703 dst = change_address (dst, BLKmode, destreg);
14704 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
14705 desired_align);
14707 if (label && size_needed == 1)
14709 emit_label (label);
14710 LABEL_NUSES (label) = 1;
14711 label = NULL;
14714 /* Step 3: Main loop. */
14716 switch (alg)
14718 case libcall:
14719 case no_stringop:
14720 gcc_unreachable ();
14721 case loop_1_byte:
14722 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14723 count_exp, QImode, 1, expected_size);
14724 break;
14725 case loop:
14726 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14727 count_exp, Pmode, 1, expected_size);
14728 break;
14729 case unrolled_loop:
14730 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14731 count_exp, Pmode, 4, expected_size);
14732 break;
14733 case rep_prefix_8_byte:
14734 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14735 DImode);
14736 break;
14737 case rep_prefix_4_byte:
14738 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14739 SImode);
14740 break;
14741 case rep_prefix_1_byte:
14742 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14743 QImode);
14744 break;
14746 /* Adjust properly the offset of src and dest memory for aliasing. */
14747 if (CONST_INT_P (count_exp))
14748 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14749 (count / size_needed) * size_needed);
14750 else
14751 dst = change_address (dst, BLKmode, destreg);
14753 /* Step 4: Epilogue to copy the remaining bytes. */
14755 if (label)
14757 /* When the main loop is done, COUNT_EXP might hold original count,
14758 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14759 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14760 bytes. Compensate if needed. */
14762 if (size_needed < desired_align - align)
14764 tmp =
14765 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
14766 GEN_INT (size_needed - 1), count_exp, 1,
14767 OPTAB_DIRECT);
14768 size_needed = desired_align - align + 1;
14769 if (tmp != count_exp)
14770 emit_move_insn (count_exp, tmp);
14772 emit_label (label);
14773 LABEL_NUSES (label) = 1;
14775 if (count_exp != const0_rtx && epilogue_size_needed > 1)
14777 if (force_loopy_epilogue)
14778 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
14779 size_needed);
14780 else
14781 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
14782 size_needed);
14784 if (jump_around_label)
14785 emit_label (jump_around_label);
14786 return 1;
14789 /* Expand strlen. */
14791 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
14793 rtx addr, scratch1, scratch2, scratch3, scratch4;
14795 /* The generic case of strlen expander is long. Avoid it's
14796 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
14798 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
14799 && !TARGET_INLINE_ALL_STRINGOPS
14800 && !optimize_size
14801 && (!CONST_INT_P (align) || INTVAL (align) < 4))
14802 return 0;
14804 addr = force_reg (Pmode, XEXP (src, 0));
14805 scratch1 = gen_reg_rtx (Pmode);
14807 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
14808 && !optimize_size)
14810 /* Well it seems that some optimizer does not combine a call like
14811 foo(strlen(bar), strlen(bar));
14812 when the move and the subtraction is done here. It does calculate
14813 the length just once when these instructions are done inside of
14814 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
14815 often used and I use one fewer register for the lifetime of
14816 output_strlen_unroll() this is better. */
14818 emit_move_insn (out, addr);
14820 ix86_expand_strlensi_unroll_1 (out, src, align);
14822 /* strlensi_unroll_1 returns the address of the zero at the end of
14823 the string, like memchr(), so compute the length by subtracting
14824 the start address. */
14825 if (TARGET_64BIT)
14826 emit_insn (gen_subdi3 (out, out, addr));
14827 else
14828 emit_insn (gen_subsi3 (out, out, addr));
14830 else
14832 rtx unspec;
14833 scratch2 = gen_reg_rtx (Pmode);
14834 scratch3 = gen_reg_rtx (Pmode);
14835 scratch4 = force_reg (Pmode, constm1_rtx);
14837 emit_move_insn (scratch3, addr);
14838 eoschar = force_reg (QImode, eoschar);
14840 src = replace_equiv_address_nv (src, scratch3);
14842 /* If .md starts supporting :P, this can be done in .md. */
14843 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
14844 scratch4), UNSPEC_SCAS);
14845 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
14846 if (TARGET_64BIT)
14848 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
14849 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
14851 else
14853 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
14854 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
14857 return 1;
14860 /* Expand the appropriate insns for doing strlen if not just doing
14861 repnz; scasb
14863 out = result, initialized with the start address
14864 align_rtx = alignment of the address.
14865 scratch = scratch register, initialized with the startaddress when
14866 not aligned, otherwise undefined
14868 This is just the body. It needs the initializations mentioned above and
14869 some address computing at the end. These things are done in i386.md. */
14871 static void
14872 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
14874 int align;
14875 rtx tmp;
14876 rtx align_2_label = NULL_RTX;
14877 rtx align_3_label = NULL_RTX;
14878 rtx align_4_label = gen_label_rtx ();
14879 rtx end_0_label = gen_label_rtx ();
14880 rtx mem;
14881 rtx tmpreg = gen_reg_rtx (SImode);
14882 rtx scratch = gen_reg_rtx (SImode);
14883 rtx cmp;
14885 align = 0;
14886 if (CONST_INT_P (align_rtx))
14887 align = INTVAL (align_rtx);
14889 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
14891 /* Is there a known alignment and is it less than 4? */
14892 if (align < 4)
14894 rtx scratch1 = gen_reg_rtx (Pmode);
14895 emit_move_insn (scratch1, out);
14896 /* Is there a known alignment and is it not 2? */
14897 if (align != 2)
14899 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
14900 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
14902 /* Leave just the 3 lower bits. */
14903 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
14904 NULL_RTX, 0, OPTAB_WIDEN);
14906 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
14907 Pmode, 1, align_4_label);
14908 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
14909 Pmode, 1, align_2_label);
14910 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
14911 Pmode, 1, align_3_label);
14913 else
14915 /* Since the alignment is 2, we have to check 2 or 0 bytes;
14916 check if is aligned to 4 - byte. */
14918 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
14919 NULL_RTX, 0, OPTAB_WIDEN);
14921 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
14922 Pmode, 1, align_4_label);
14925 mem = change_address (src, QImode, out);
14927 /* Now compare the bytes. */
14929 /* Compare the first n unaligned byte on a byte per byte basis. */
14930 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
14931 QImode, 1, end_0_label);
14933 /* Increment the address. */
14934 if (TARGET_64BIT)
14935 emit_insn (gen_adddi3 (out, out, const1_rtx));
14936 else
14937 emit_insn (gen_addsi3 (out, out, const1_rtx));
14939 /* Not needed with an alignment of 2 */
14940 if (align != 2)
14942 emit_label (align_2_label);
14944 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
14945 end_0_label);
14947 if (TARGET_64BIT)
14948 emit_insn (gen_adddi3 (out, out, const1_rtx));
14949 else
14950 emit_insn (gen_addsi3 (out, out, const1_rtx));
14952 emit_label (align_3_label);
14955 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
14956 end_0_label);
14958 if (TARGET_64BIT)
14959 emit_insn (gen_adddi3 (out, out, const1_rtx));
14960 else
14961 emit_insn (gen_addsi3 (out, out, const1_rtx));
14964 /* Generate loop to check 4 bytes at a time. It is not a good idea to
14965 align this loop. It gives only huge programs, but does not help to
14966 speed up. */
14967 emit_label (align_4_label);
14969 mem = change_address (src, SImode, out);
14970 emit_move_insn (scratch, mem);
14971 if (TARGET_64BIT)
14972 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
14973 else
14974 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
14976 /* This formula yields a nonzero result iff one of the bytes is zero.
14977 This saves three branches inside loop and many cycles. */
14979 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
14980 emit_insn (gen_one_cmplsi2 (scratch, scratch));
14981 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
14982 emit_insn (gen_andsi3 (tmpreg, tmpreg,
14983 gen_int_mode (0x80808080, SImode)));
14984 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
14985 align_4_label);
14987 if (TARGET_CMOVE)
14989 rtx reg = gen_reg_rtx (SImode);
14990 rtx reg2 = gen_reg_rtx (Pmode);
14991 emit_move_insn (reg, tmpreg);
14992 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
14994 /* If zero is not in the first two bytes, move two bytes forward. */
14995 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
14996 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14997 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
14998 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
14999 gen_rtx_IF_THEN_ELSE (SImode, tmp,
15000 reg,
15001 tmpreg)));
15002 /* Emit lea manually to avoid clobbering of flags. */
15003 emit_insn (gen_rtx_SET (SImode, reg2,
15004 gen_rtx_PLUS (Pmode, out, const2_rtx)));
15006 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15007 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
15008 emit_insn (gen_rtx_SET (VOIDmode, out,
15009 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
15010 reg2,
15011 out)));
15014 else
15016 rtx end_2_label = gen_label_rtx ();
15017 /* Is zero in the first two bytes? */
15019 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
15020 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15021 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
15022 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
15023 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
15024 pc_rtx);
15025 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
15026 JUMP_LABEL (tmp) = end_2_label;
15028 /* Not in the first two. Move two bytes forward. */
15029 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
15030 if (TARGET_64BIT)
15031 emit_insn (gen_adddi3 (out, out, const2_rtx));
15032 else
15033 emit_insn (gen_addsi3 (out, out, const2_rtx));
15035 emit_label (end_2_label);
15039 /* Avoid branch in fixing the byte. */
15040 tmpreg = gen_lowpart (QImode, tmpreg);
15041 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
15042 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
15043 if (TARGET_64BIT)
15044 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
15045 else
15046 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
15048 emit_label (end_0_label);
15051 /* For given symbol (function) construct code to compute address of it's PLT
15052 entry in large x86-64 PIC model. */
15054 construct_plt_address (rtx symbol)
15056 rtx tmp = gen_reg_rtx (Pmode);
15057 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
15059 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
15060 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
15062 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
15063 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
15064 return tmp;
15067 void
15068 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
15069 rtx callarg2 ATTRIBUTE_UNUSED,
15070 rtx pop, int sibcall)
15072 rtx use = NULL, call;
15074 if (pop == const0_rtx)
15075 pop = NULL;
15076 gcc_assert (!TARGET_64BIT || !pop);
15078 if (TARGET_MACHO && !TARGET_64BIT)
15080 #if TARGET_MACHO
15081 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
15082 fnaddr = machopic_indirect_call_target (fnaddr);
15083 #endif
15085 else
15087 /* Static functions and indirect calls don't need the pic register. */
15088 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
15089 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
15090 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
15091 use_reg (&use, pic_offset_table_rtx);
15094 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
15096 rtx al = gen_rtx_REG (QImode, 0);
15097 emit_move_insn (al, callarg2);
15098 use_reg (&use, al);
15101 if (ix86_cmodel == CM_LARGE_PIC
15102 && GET_CODE (fnaddr) == MEM
15103 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
15104 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
15105 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
15106 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
15108 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
15109 fnaddr = gen_rtx_MEM (QImode, fnaddr);
15111 if (sibcall && TARGET_64BIT
15112 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
15114 rtx addr;
15115 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
15116 fnaddr = gen_rtx_REG (Pmode, R11_REG);
15117 emit_move_insn (fnaddr, addr);
15118 fnaddr = gen_rtx_MEM (QImode, fnaddr);
15121 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
15122 if (retval)
15123 call = gen_rtx_SET (VOIDmode, retval, call);
15124 if (pop)
15126 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
15127 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
15128 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
15131 call = emit_call_insn (call);
15132 if (use)
15133 CALL_INSN_FUNCTION_USAGE (call) = use;
15137 /* Clear stack slot assignments remembered from previous functions.
15138 This is called from INIT_EXPANDERS once before RTL is emitted for each
15139 function. */
15141 static struct machine_function *
15142 ix86_init_machine_status (void)
15144 struct machine_function *f;
15146 f = ggc_alloc_cleared (sizeof (struct machine_function));
15147 f->use_fast_prologue_epilogue_nregs = -1;
15148 f->tls_descriptor_call_expanded_p = 0;
15150 return f;
15153 /* Return a MEM corresponding to a stack slot with mode MODE.
15154 Allocate a new slot if necessary.
15156 The RTL for a function can have several slots available: N is
15157 which slot to use. */
15160 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
15162 struct stack_local_entry *s;
15164 gcc_assert (n < MAX_386_STACK_LOCALS);
15166 for (s = ix86_stack_locals; s; s = s->next)
15167 if (s->mode == mode && s->n == n)
15168 return copy_rtx (s->rtl);
15170 s = (struct stack_local_entry *)
15171 ggc_alloc (sizeof (struct stack_local_entry));
15172 s->n = n;
15173 s->mode = mode;
15174 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
15176 s->next = ix86_stack_locals;
15177 ix86_stack_locals = s;
15178 return s->rtl;
15181 /* Construct the SYMBOL_REF for the tls_get_addr function. */
15183 static GTY(()) rtx ix86_tls_symbol;
15185 ix86_tls_get_addr (void)
15188 if (!ix86_tls_symbol)
15190 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
15191 (TARGET_ANY_GNU_TLS
15192 && !TARGET_64BIT)
15193 ? "___tls_get_addr"
15194 : "__tls_get_addr");
15197 return ix86_tls_symbol;
15200 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
15202 static GTY(()) rtx ix86_tls_module_base_symbol;
15204 ix86_tls_module_base (void)
15207 if (!ix86_tls_module_base_symbol)
15209 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
15210 "_TLS_MODULE_BASE_");
15211 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
15212 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
15215 return ix86_tls_module_base_symbol;
15218 /* Calculate the length of the memory address in the instruction
15219 encoding. Does not include the one-byte modrm, opcode, or prefix. */
15222 memory_address_length (rtx addr)
15224 struct ix86_address parts;
15225 rtx base, index, disp;
15226 int len;
15227 int ok;
15229 if (GET_CODE (addr) == PRE_DEC
15230 || GET_CODE (addr) == POST_INC
15231 || GET_CODE (addr) == PRE_MODIFY
15232 || GET_CODE (addr) == POST_MODIFY)
15233 return 0;
15235 ok = ix86_decompose_address (addr, &parts);
15236 gcc_assert (ok);
15238 if (parts.base && GET_CODE (parts.base) == SUBREG)
15239 parts.base = SUBREG_REG (parts.base);
15240 if (parts.index && GET_CODE (parts.index) == SUBREG)
15241 parts.index = SUBREG_REG (parts.index);
15243 base = parts.base;
15244 index = parts.index;
15245 disp = parts.disp;
15246 len = 0;
15248 /* Rule of thumb:
15249 - esp as the base always wants an index,
15250 - ebp as the base always wants a displacement. */
15252 /* Register Indirect. */
15253 if (base && !index && !disp)
15255 /* esp (for its index) and ebp (for its displacement) need
15256 the two-byte modrm form. */
15257 if (addr == stack_pointer_rtx
15258 || addr == arg_pointer_rtx
15259 || addr == frame_pointer_rtx
15260 || addr == hard_frame_pointer_rtx)
15261 len = 1;
15264 /* Direct Addressing. */
15265 else if (disp && !base && !index)
15266 len = 4;
15268 else
15270 /* Find the length of the displacement constant. */
15271 if (disp)
15273 if (base && satisfies_constraint_K (disp))
15274 len = 1;
15275 else
15276 len = 4;
15278 /* ebp always wants a displacement. */
15279 else if (base == hard_frame_pointer_rtx)
15280 len = 1;
15282 /* An index requires the two-byte modrm form.... */
15283 if (index
15284 /* ...like esp, which always wants an index. */
15285 || base == stack_pointer_rtx
15286 || base == arg_pointer_rtx
15287 || base == frame_pointer_rtx)
15288 len += 1;
15291 return len;
15294 /* Compute default value for "length_immediate" attribute. When SHORTFORM
15295 is set, expect that insn have 8bit immediate alternative. */
15297 ix86_attr_length_immediate_default (rtx insn, int shortform)
15299 int len = 0;
15300 int i;
15301 extract_insn_cached (insn);
15302 for (i = recog_data.n_operands - 1; i >= 0; --i)
15303 if (CONSTANT_P (recog_data.operand[i]))
15305 gcc_assert (!len);
15306 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
15307 len = 1;
15308 else
15310 switch (get_attr_mode (insn))
15312 case MODE_QI:
15313 len+=1;
15314 break;
15315 case MODE_HI:
15316 len+=2;
15317 break;
15318 case MODE_SI:
15319 len+=4;
15320 break;
15321 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
15322 case MODE_DI:
15323 len+=4;
15324 break;
15325 default:
15326 fatal_insn ("unknown insn mode", insn);
15330 return len;
15332 /* Compute default value for "length_address" attribute. */
15334 ix86_attr_length_address_default (rtx insn)
15336 int i;
15338 if (get_attr_type (insn) == TYPE_LEA)
15340 rtx set = PATTERN (insn);
15342 if (GET_CODE (set) == PARALLEL)
15343 set = XVECEXP (set, 0, 0);
15345 gcc_assert (GET_CODE (set) == SET);
15347 return memory_address_length (SET_SRC (set));
15350 extract_insn_cached (insn);
15351 for (i = recog_data.n_operands - 1; i >= 0; --i)
15352 if (MEM_P (recog_data.operand[i]))
15354 return memory_address_length (XEXP (recog_data.operand[i], 0));
15355 break;
15357 return 0;
15360 /* Return the maximum number of instructions a cpu can issue. */
15362 static int
15363 ix86_issue_rate (void)
15365 switch (ix86_tune)
15367 case PROCESSOR_PENTIUM:
15368 case PROCESSOR_K6:
15369 return 2;
15371 case PROCESSOR_PENTIUMPRO:
15372 case PROCESSOR_PENTIUM4:
15373 case PROCESSOR_ATHLON:
15374 case PROCESSOR_K8:
15375 case PROCESSOR_AMDFAM10:
15376 case PROCESSOR_NOCONA:
15377 case PROCESSOR_GENERIC32:
15378 case PROCESSOR_GENERIC64:
15379 return 3;
15381 case PROCESSOR_CORE2:
15382 return 4;
15384 default:
15385 return 1;
15389 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
15390 by DEP_INSN and nothing set by DEP_INSN. */
15392 static int
15393 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15395 rtx set, set2;
15397 /* Simplify the test for uninteresting insns. */
15398 if (insn_type != TYPE_SETCC
15399 && insn_type != TYPE_ICMOV
15400 && insn_type != TYPE_FCMOV
15401 && insn_type != TYPE_IBR)
15402 return 0;
15404 if ((set = single_set (dep_insn)) != 0)
15406 set = SET_DEST (set);
15407 set2 = NULL_RTX;
15409 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
15410 && XVECLEN (PATTERN (dep_insn), 0) == 2
15411 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
15412 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
15414 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15415 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15417 else
15418 return 0;
15420 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
15421 return 0;
15423 /* This test is true if the dependent insn reads the flags but
15424 not any other potentially set register. */
15425 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
15426 return 0;
15428 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
15429 return 0;
15431 return 1;
15434 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
15435 address with operands set by DEP_INSN. */
15437 static int
15438 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15440 rtx addr;
15442 if (insn_type == TYPE_LEA
15443 && TARGET_PENTIUM)
15445 addr = PATTERN (insn);
15447 if (GET_CODE (addr) == PARALLEL)
15448 addr = XVECEXP (addr, 0, 0);
15450 gcc_assert (GET_CODE (addr) == SET);
15452 addr = SET_SRC (addr);
15454 else
15456 int i;
15457 extract_insn_cached (insn);
15458 for (i = recog_data.n_operands - 1; i >= 0; --i)
15459 if (MEM_P (recog_data.operand[i]))
15461 addr = XEXP (recog_data.operand[i], 0);
15462 goto found;
15464 return 0;
15465 found:;
15468 return modified_in_p (addr, dep_insn);
15471 static int
15472 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
15474 enum attr_type insn_type, dep_insn_type;
15475 enum attr_memory memory;
15476 rtx set, set2;
15477 int dep_insn_code_number;
15479 /* Anti and output dependencies have zero cost on all CPUs. */
15480 if (REG_NOTE_KIND (link) != 0)
15481 return 0;
15483 dep_insn_code_number = recog_memoized (dep_insn);
15485 /* If we can't recognize the insns, we can't really do anything. */
15486 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
15487 return cost;
15489 insn_type = get_attr_type (insn);
15490 dep_insn_type = get_attr_type (dep_insn);
15492 switch (ix86_tune)
15494 case PROCESSOR_PENTIUM:
15495 /* Address Generation Interlock adds a cycle of latency. */
15496 if (ix86_agi_dependent (insn, dep_insn, insn_type))
15497 cost += 1;
15499 /* ??? Compares pair with jump/setcc. */
15500 if (ix86_flags_dependent (insn, dep_insn, insn_type))
15501 cost = 0;
15503 /* Floating point stores require value to be ready one cycle earlier. */
15504 if (insn_type == TYPE_FMOV
15505 && get_attr_memory (insn) == MEMORY_STORE
15506 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15507 cost += 1;
15508 break;
15510 case PROCESSOR_PENTIUMPRO:
15511 memory = get_attr_memory (insn);
15513 /* INT->FP conversion is expensive. */
15514 if (get_attr_fp_int_src (dep_insn))
15515 cost += 5;
15517 /* There is one cycle extra latency between an FP op and a store. */
15518 if (insn_type == TYPE_FMOV
15519 && (set = single_set (dep_insn)) != NULL_RTX
15520 && (set2 = single_set (insn)) != NULL_RTX
15521 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
15522 && MEM_P (SET_DEST (set2)))
15523 cost += 1;
15525 /* Show ability of reorder buffer to hide latency of load by executing
15526 in parallel with previous instruction in case
15527 previous instruction is not needed to compute the address. */
15528 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15529 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15531 /* Claim moves to take one cycle, as core can issue one load
15532 at time and the next load can start cycle later. */
15533 if (dep_insn_type == TYPE_IMOV
15534 || dep_insn_type == TYPE_FMOV)
15535 cost = 1;
15536 else if (cost > 1)
15537 cost--;
15539 break;
15541 case PROCESSOR_K6:
15542 memory = get_attr_memory (insn);
15544 /* The esp dependency is resolved before the instruction is really
15545 finished. */
15546 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
15547 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
15548 return 1;
15550 /* INT->FP conversion is expensive. */
15551 if (get_attr_fp_int_src (dep_insn))
15552 cost += 5;
15554 /* Show ability of reorder buffer to hide latency of load by executing
15555 in parallel with previous instruction in case
15556 previous instruction is not needed to compute the address. */
15557 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15558 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15560 /* Claim moves to take one cycle, as core can issue one load
15561 at time and the next load can start cycle later. */
15562 if (dep_insn_type == TYPE_IMOV
15563 || dep_insn_type == TYPE_FMOV)
15564 cost = 1;
15565 else if (cost > 2)
15566 cost -= 2;
15567 else
15568 cost = 1;
15570 break;
15572 case PROCESSOR_ATHLON:
15573 case PROCESSOR_K8:
15574 case PROCESSOR_AMDFAM10:
15575 case PROCESSOR_GENERIC32:
15576 case PROCESSOR_GENERIC64:
15577 memory = get_attr_memory (insn);
15579 /* Show ability of reorder buffer to hide latency of load by executing
15580 in parallel with previous instruction in case
15581 previous instruction is not needed to compute the address. */
15582 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15583 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15585 enum attr_unit unit = get_attr_unit (insn);
15586 int loadcost = 3;
15588 /* Because of the difference between the length of integer and
15589 floating unit pipeline preparation stages, the memory operands
15590 for floating point are cheaper.
15592 ??? For Athlon it the difference is most probably 2. */
15593 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
15594 loadcost = 3;
15595 else
15596 loadcost = TARGET_ATHLON ? 2 : 0;
15598 if (cost >= loadcost)
15599 cost -= loadcost;
15600 else
15601 cost = 0;
15604 default:
15605 break;
15608 return cost;
15611 /* How many alternative schedules to try. This should be as wide as the
15612 scheduling freedom in the DFA, but no wider. Making this value too
15613 large results extra work for the scheduler. */
15615 static int
15616 ia32_multipass_dfa_lookahead (void)
15618 if (ix86_tune == PROCESSOR_PENTIUM)
15619 return 2;
15621 if (ix86_tune == PROCESSOR_PENTIUMPRO
15622 || ix86_tune == PROCESSOR_K6)
15623 return 1;
15625 else
15626 return 0;
15630 /* Compute the alignment given to a constant that is being placed in memory.
15631 EXP is the constant and ALIGN is the alignment that the object would
15632 ordinarily have.
15633 The value of this function is used instead of that alignment to align
15634 the object. */
15637 ix86_constant_alignment (tree exp, int align)
15639 if (TREE_CODE (exp) == REAL_CST)
15641 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
15642 return 64;
15643 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
15644 return 128;
15646 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
15647 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
15648 return BITS_PER_WORD;
15650 return align;
15653 /* Compute the alignment for a static variable.
15654 TYPE is the data type, and ALIGN is the alignment that
15655 the object would ordinarily have. The value of this function is used
15656 instead of that alignment to align the object. */
15659 ix86_data_alignment (tree type, int align)
15661 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
15663 if (AGGREGATE_TYPE_P (type)
15664 && TYPE_SIZE (type)
15665 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15666 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
15667 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
15668 && align < max_align)
15669 align = max_align;
15671 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15672 to 16byte boundary. */
15673 if (TARGET_64BIT)
15675 if (AGGREGATE_TYPE_P (type)
15676 && TYPE_SIZE (type)
15677 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15678 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
15679 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
15680 return 128;
15683 if (TREE_CODE (type) == ARRAY_TYPE)
15685 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
15686 return 64;
15687 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
15688 return 128;
15690 else if (TREE_CODE (type) == COMPLEX_TYPE)
15693 if (TYPE_MODE (type) == DCmode && align < 64)
15694 return 64;
15695 if (TYPE_MODE (type) == XCmode && align < 128)
15696 return 128;
15698 else if ((TREE_CODE (type) == RECORD_TYPE
15699 || TREE_CODE (type) == UNION_TYPE
15700 || TREE_CODE (type) == QUAL_UNION_TYPE)
15701 && TYPE_FIELDS (type))
15703 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
15704 return 64;
15705 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
15706 return 128;
15708 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
15709 || TREE_CODE (type) == INTEGER_TYPE)
15711 if (TYPE_MODE (type) == DFmode && align < 64)
15712 return 64;
15713 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
15714 return 128;
15717 return align;
15720 /* Compute the alignment for a local variable.
15721 TYPE is the data type, and ALIGN is the alignment that
15722 the object would ordinarily have. The value of this macro is used
15723 instead of that alignment to align the object. */
15726 ix86_local_alignment (tree type, int align)
15728 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15729 to 16byte boundary. */
15730 if (TARGET_64BIT)
15732 if (AGGREGATE_TYPE_P (type)
15733 && TYPE_SIZE (type)
15734 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15735 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
15736 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
15737 return 128;
15739 if (TREE_CODE (type) == ARRAY_TYPE)
15741 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
15742 return 64;
15743 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
15744 return 128;
15746 else if (TREE_CODE (type) == COMPLEX_TYPE)
15748 if (TYPE_MODE (type) == DCmode && align < 64)
15749 return 64;
15750 if (TYPE_MODE (type) == XCmode && align < 128)
15751 return 128;
15753 else if ((TREE_CODE (type) == RECORD_TYPE
15754 || TREE_CODE (type) == UNION_TYPE
15755 || TREE_CODE (type) == QUAL_UNION_TYPE)
15756 && TYPE_FIELDS (type))
15758 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
15759 return 64;
15760 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
15761 return 128;
15763 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
15764 || TREE_CODE (type) == INTEGER_TYPE)
15767 if (TYPE_MODE (type) == DFmode && align < 64)
15768 return 64;
15769 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
15770 return 128;
15772 return align;
15775 /* Emit RTL insns to initialize the variable parts of a trampoline.
15776 FNADDR is an RTX for the address of the function's pure code.
15777 CXT is an RTX for the static chain value for the function. */
15778 void
15779 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
15781 if (!TARGET_64BIT)
15783 /* Compute offset from the end of the jmp to the target function. */
15784 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
15785 plus_constant (tramp, 10),
15786 NULL_RTX, 1, OPTAB_DIRECT);
15787 emit_move_insn (gen_rtx_MEM (QImode, tramp),
15788 gen_int_mode (0xb9, QImode));
15789 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
15790 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
15791 gen_int_mode (0xe9, QImode));
15792 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
15794 else
15796 int offset = 0;
15797 /* Try to load address using shorter movl instead of movabs.
15798 We may want to support movq for kernel mode, but kernel does not use
15799 trampolines at the moment. */
15800 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
15802 fnaddr = copy_to_mode_reg (DImode, fnaddr);
15803 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15804 gen_int_mode (0xbb41, HImode));
15805 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
15806 gen_lowpart (SImode, fnaddr));
15807 offset += 6;
15809 else
15811 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15812 gen_int_mode (0xbb49, HImode));
15813 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
15814 fnaddr);
15815 offset += 10;
15817 /* Load static chain using movabs to r10. */
15818 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15819 gen_int_mode (0xba49, HImode));
15820 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
15821 cxt);
15822 offset += 10;
15823 /* Jump to the r11 */
15824 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15825 gen_int_mode (0xff49, HImode));
15826 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
15827 gen_int_mode (0xe3, QImode));
15828 offset += 3;
15829 gcc_assert (offset <= TRAMPOLINE_SIZE);
15832 #ifdef ENABLE_EXECUTE_STACK
15833 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
15834 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
15835 #endif
15838 /* Codes for all the SSE/MMX builtins. */
15839 enum ix86_builtins
15841 IX86_BUILTIN_ADDPS,
15842 IX86_BUILTIN_ADDSS,
15843 IX86_BUILTIN_DIVPS,
15844 IX86_BUILTIN_DIVSS,
15845 IX86_BUILTIN_MULPS,
15846 IX86_BUILTIN_MULSS,
15847 IX86_BUILTIN_SUBPS,
15848 IX86_BUILTIN_SUBSS,
15850 IX86_BUILTIN_CMPEQPS,
15851 IX86_BUILTIN_CMPLTPS,
15852 IX86_BUILTIN_CMPLEPS,
15853 IX86_BUILTIN_CMPGTPS,
15854 IX86_BUILTIN_CMPGEPS,
15855 IX86_BUILTIN_CMPNEQPS,
15856 IX86_BUILTIN_CMPNLTPS,
15857 IX86_BUILTIN_CMPNLEPS,
15858 IX86_BUILTIN_CMPNGTPS,
15859 IX86_BUILTIN_CMPNGEPS,
15860 IX86_BUILTIN_CMPORDPS,
15861 IX86_BUILTIN_CMPUNORDPS,
15862 IX86_BUILTIN_CMPEQSS,
15863 IX86_BUILTIN_CMPLTSS,
15864 IX86_BUILTIN_CMPLESS,
15865 IX86_BUILTIN_CMPNEQSS,
15866 IX86_BUILTIN_CMPNLTSS,
15867 IX86_BUILTIN_CMPNLESS,
15868 IX86_BUILTIN_CMPNGTSS,
15869 IX86_BUILTIN_CMPNGESS,
15870 IX86_BUILTIN_CMPORDSS,
15871 IX86_BUILTIN_CMPUNORDSS,
15873 IX86_BUILTIN_COMIEQSS,
15874 IX86_BUILTIN_COMILTSS,
15875 IX86_BUILTIN_COMILESS,
15876 IX86_BUILTIN_COMIGTSS,
15877 IX86_BUILTIN_COMIGESS,
15878 IX86_BUILTIN_COMINEQSS,
15879 IX86_BUILTIN_UCOMIEQSS,
15880 IX86_BUILTIN_UCOMILTSS,
15881 IX86_BUILTIN_UCOMILESS,
15882 IX86_BUILTIN_UCOMIGTSS,
15883 IX86_BUILTIN_UCOMIGESS,
15884 IX86_BUILTIN_UCOMINEQSS,
15886 IX86_BUILTIN_CVTPI2PS,
15887 IX86_BUILTIN_CVTPS2PI,
15888 IX86_BUILTIN_CVTSI2SS,
15889 IX86_BUILTIN_CVTSI642SS,
15890 IX86_BUILTIN_CVTSS2SI,
15891 IX86_BUILTIN_CVTSS2SI64,
15892 IX86_BUILTIN_CVTTPS2PI,
15893 IX86_BUILTIN_CVTTSS2SI,
15894 IX86_BUILTIN_CVTTSS2SI64,
15896 IX86_BUILTIN_MAXPS,
15897 IX86_BUILTIN_MAXSS,
15898 IX86_BUILTIN_MINPS,
15899 IX86_BUILTIN_MINSS,
15901 IX86_BUILTIN_LOADUPS,
15902 IX86_BUILTIN_STOREUPS,
15903 IX86_BUILTIN_MOVSS,
15905 IX86_BUILTIN_MOVHLPS,
15906 IX86_BUILTIN_MOVLHPS,
15907 IX86_BUILTIN_LOADHPS,
15908 IX86_BUILTIN_LOADLPS,
15909 IX86_BUILTIN_STOREHPS,
15910 IX86_BUILTIN_STORELPS,
15912 IX86_BUILTIN_MASKMOVQ,
15913 IX86_BUILTIN_MOVMSKPS,
15914 IX86_BUILTIN_PMOVMSKB,
15916 IX86_BUILTIN_MOVNTPS,
15917 IX86_BUILTIN_MOVNTQ,
15919 IX86_BUILTIN_LOADDQU,
15920 IX86_BUILTIN_STOREDQU,
15922 IX86_BUILTIN_PACKSSWB,
15923 IX86_BUILTIN_PACKSSDW,
15924 IX86_BUILTIN_PACKUSWB,
15926 IX86_BUILTIN_PADDB,
15927 IX86_BUILTIN_PADDW,
15928 IX86_BUILTIN_PADDD,
15929 IX86_BUILTIN_PADDQ,
15930 IX86_BUILTIN_PADDSB,
15931 IX86_BUILTIN_PADDSW,
15932 IX86_BUILTIN_PADDUSB,
15933 IX86_BUILTIN_PADDUSW,
15934 IX86_BUILTIN_PSUBB,
15935 IX86_BUILTIN_PSUBW,
15936 IX86_BUILTIN_PSUBD,
15937 IX86_BUILTIN_PSUBQ,
15938 IX86_BUILTIN_PSUBSB,
15939 IX86_BUILTIN_PSUBSW,
15940 IX86_BUILTIN_PSUBUSB,
15941 IX86_BUILTIN_PSUBUSW,
15943 IX86_BUILTIN_PAND,
15944 IX86_BUILTIN_PANDN,
15945 IX86_BUILTIN_POR,
15946 IX86_BUILTIN_PXOR,
15948 IX86_BUILTIN_PAVGB,
15949 IX86_BUILTIN_PAVGW,
15951 IX86_BUILTIN_PCMPEQB,
15952 IX86_BUILTIN_PCMPEQW,
15953 IX86_BUILTIN_PCMPEQD,
15954 IX86_BUILTIN_PCMPGTB,
15955 IX86_BUILTIN_PCMPGTW,
15956 IX86_BUILTIN_PCMPGTD,
15958 IX86_BUILTIN_PMADDWD,
15960 IX86_BUILTIN_PMAXSW,
15961 IX86_BUILTIN_PMAXUB,
15962 IX86_BUILTIN_PMINSW,
15963 IX86_BUILTIN_PMINUB,
15965 IX86_BUILTIN_PMULHUW,
15966 IX86_BUILTIN_PMULHW,
15967 IX86_BUILTIN_PMULLW,
15969 IX86_BUILTIN_PSADBW,
15970 IX86_BUILTIN_PSHUFW,
15972 IX86_BUILTIN_PSLLW,
15973 IX86_BUILTIN_PSLLD,
15974 IX86_BUILTIN_PSLLQ,
15975 IX86_BUILTIN_PSRAW,
15976 IX86_BUILTIN_PSRAD,
15977 IX86_BUILTIN_PSRLW,
15978 IX86_BUILTIN_PSRLD,
15979 IX86_BUILTIN_PSRLQ,
15980 IX86_BUILTIN_PSLLWI,
15981 IX86_BUILTIN_PSLLDI,
15982 IX86_BUILTIN_PSLLQI,
15983 IX86_BUILTIN_PSRAWI,
15984 IX86_BUILTIN_PSRADI,
15985 IX86_BUILTIN_PSRLWI,
15986 IX86_BUILTIN_PSRLDI,
15987 IX86_BUILTIN_PSRLQI,
15989 IX86_BUILTIN_PUNPCKHBW,
15990 IX86_BUILTIN_PUNPCKHWD,
15991 IX86_BUILTIN_PUNPCKHDQ,
15992 IX86_BUILTIN_PUNPCKLBW,
15993 IX86_BUILTIN_PUNPCKLWD,
15994 IX86_BUILTIN_PUNPCKLDQ,
15996 IX86_BUILTIN_SHUFPS,
15998 IX86_BUILTIN_RCPPS,
15999 IX86_BUILTIN_RCPSS,
16000 IX86_BUILTIN_RSQRTPS,
16001 IX86_BUILTIN_RSQRTSS,
16002 IX86_BUILTIN_SQRTPS,
16003 IX86_BUILTIN_SQRTSS,
16005 IX86_BUILTIN_UNPCKHPS,
16006 IX86_BUILTIN_UNPCKLPS,
16008 IX86_BUILTIN_ANDPS,
16009 IX86_BUILTIN_ANDNPS,
16010 IX86_BUILTIN_ORPS,
16011 IX86_BUILTIN_XORPS,
16013 IX86_BUILTIN_EMMS,
16014 IX86_BUILTIN_LDMXCSR,
16015 IX86_BUILTIN_STMXCSR,
16016 IX86_BUILTIN_SFENCE,
16018 /* 3DNow! Original */
16019 IX86_BUILTIN_FEMMS,
16020 IX86_BUILTIN_PAVGUSB,
16021 IX86_BUILTIN_PF2ID,
16022 IX86_BUILTIN_PFACC,
16023 IX86_BUILTIN_PFADD,
16024 IX86_BUILTIN_PFCMPEQ,
16025 IX86_BUILTIN_PFCMPGE,
16026 IX86_BUILTIN_PFCMPGT,
16027 IX86_BUILTIN_PFMAX,
16028 IX86_BUILTIN_PFMIN,
16029 IX86_BUILTIN_PFMUL,
16030 IX86_BUILTIN_PFRCP,
16031 IX86_BUILTIN_PFRCPIT1,
16032 IX86_BUILTIN_PFRCPIT2,
16033 IX86_BUILTIN_PFRSQIT1,
16034 IX86_BUILTIN_PFRSQRT,
16035 IX86_BUILTIN_PFSUB,
16036 IX86_BUILTIN_PFSUBR,
16037 IX86_BUILTIN_PI2FD,
16038 IX86_BUILTIN_PMULHRW,
16040 /* 3DNow! Athlon Extensions */
16041 IX86_BUILTIN_PF2IW,
16042 IX86_BUILTIN_PFNACC,
16043 IX86_BUILTIN_PFPNACC,
16044 IX86_BUILTIN_PI2FW,
16045 IX86_BUILTIN_PSWAPDSI,
16046 IX86_BUILTIN_PSWAPDSF,
16048 /* SSE2 */
16049 IX86_BUILTIN_ADDPD,
16050 IX86_BUILTIN_ADDSD,
16051 IX86_BUILTIN_DIVPD,
16052 IX86_BUILTIN_DIVSD,
16053 IX86_BUILTIN_MULPD,
16054 IX86_BUILTIN_MULSD,
16055 IX86_BUILTIN_SUBPD,
16056 IX86_BUILTIN_SUBSD,
16058 IX86_BUILTIN_CMPEQPD,
16059 IX86_BUILTIN_CMPLTPD,
16060 IX86_BUILTIN_CMPLEPD,
16061 IX86_BUILTIN_CMPGTPD,
16062 IX86_BUILTIN_CMPGEPD,
16063 IX86_BUILTIN_CMPNEQPD,
16064 IX86_BUILTIN_CMPNLTPD,
16065 IX86_BUILTIN_CMPNLEPD,
16066 IX86_BUILTIN_CMPNGTPD,
16067 IX86_BUILTIN_CMPNGEPD,
16068 IX86_BUILTIN_CMPORDPD,
16069 IX86_BUILTIN_CMPUNORDPD,
16070 IX86_BUILTIN_CMPNEPD,
16071 IX86_BUILTIN_CMPEQSD,
16072 IX86_BUILTIN_CMPLTSD,
16073 IX86_BUILTIN_CMPLESD,
16074 IX86_BUILTIN_CMPNEQSD,
16075 IX86_BUILTIN_CMPNLTSD,
16076 IX86_BUILTIN_CMPNLESD,
16077 IX86_BUILTIN_CMPORDSD,
16078 IX86_BUILTIN_CMPUNORDSD,
16079 IX86_BUILTIN_CMPNESD,
16081 IX86_BUILTIN_COMIEQSD,
16082 IX86_BUILTIN_COMILTSD,
16083 IX86_BUILTIN_COMILESD,
16084 IX86_BUILTIN_COMIGTSD,
16085 IX86_BUILTIN_COMIGESD,
16086 IX86_BUILTIN_COMINEQSD,
16087 IX86_BUILTIN_UCOMIEQSD,
16088 IX86_BUILTIN_UCOMILTSD,
16089 IX86_BUILTIN_UCOMILESD,
16090 IX86_BUILTIN_UCOMIGTSD,
16091 IX86_BUILTIN_UCOMIGESD,
16092 IX86_BUILTIN_UCOMINEQSD,
16094 IX86_BUILTIN_MAXPD,
16095 IX86_BUILTIN_MAXSD,
16096 IX86_BUILTIN_MINPD,
16097 IX86_BUILTIN_MINSD,
16099 IX86_BUILTIN_ANDPD,
16100 IX86_BUILTIN_ANDNPD,
16101 IX86_BUILTIN_ORPD,
16102 IX86_BUILTIN_XORPD,
16104 IX86_BUILTIN_SQRTPD,
16105 IX86_BUILTIN_SQRTSD,
16107 IX86_BUILTIN_UNPCKHPD,
16108 IX86_BUILTIN_UNPCKLPD,
16110 IX86_BUILTIN_SHUFPD,
16112 IX86_BUILTIN_LOADUPD,
16113 IX86_BUILTIN_STOREUPD,
16114 IX86_BUILTIN_MOVSD,
16116 IX86_BUILTIN_LOADHPD,
16117 IX86_BUILTIN_LOADLPD,
16119 IX86_BUILTIN_CVTDQ2PD,
16120 IX86_BUILTIN_CVTDQ2PS,
16122 IX86_BUILTIN_CVTPD2DQ,
16123 IX86_BUILTIN_CVTPD2PI,
16124 IX86_BUILTIN_CVTPD2PS,
16125 IX86_BUILTIN_CVTTPD2DQ,
16126 IX86_BUILTIN_CVTTPD2PI,
16128 IX86_BUILTIN_CVTPI2PD,
16129 IX86_BUILTIN_CVTSI2SD,
16130 IX86_BUILTIN_CVTSI642SD,
16132 IX86_BUILTIN_CVTSD2SI,
16133 IX86_BUILTIN_CVTSD2SI64,
16134 IX86_BUILTIN_CVTSD2SS,
16135 IX86_BUILTIN_CVTSS2SD,
16136 IX86_BUILTIN_CVTTSD2SI,
16137 IX86_BUILTIN_CVTTSD2SI64,
16139 IX86_BUILTIN_CVTPS2DQ,
16140 IX86_BUILTIN_CVTPS2PD,
16141 IX86_BUILTIN_CVTTPS2DQ,
16143 IX86_BUILTIN_MOVNTI,
16144 IX86_BUILTIN_MOVNTPD,
16145 IX86_BUILTIN_MOVNTDQ,
16147 /* SSE2 MMX */
16148 IX86_BUILTIN_MASKMOVDQU,
16149 IX86_BUILTIN_MOVMSKPD,
16150 IX86_BUILTIN_PMOVMSKB128,
16152 IX86_BUILTIN_PACKSSWB128,
16153 IX86_BUILTIN_PACKSSDW128,
16154 IX86_BUILTIN_PACKUSWB128,
16156 IX86_BUILTIN_PADDB128,
16157 IX86_BUILTIN_PADDW128,
16158 IX86_BUILTIN_PADDD128,
16159 IX86_BUILTIN_PADDQ128,
16160 IX86_BUILTIN_PADDSB128,
16161 IX86_BUILTIN_PADDSW128,
16162 IX86_BUILTIN_PADDUSB128,
16163 IX86_BUILTIN_PADDUSW128,
16164 IX86_BUILTIN_PSUBB128,
16165 IX86_BUILTIN_PSUBW128,
16166 IX86_BUILTIN_PSUBD128,
16167 IX86_BUILTIN_PSUBQ128,
16168 IX86_BUILTIN_PSUBSB128,
16169 IX86_BUILTIN_PSUBSW128,
16170 IX86_BUILTIN_PSUBUSB128,
16171 IX86_BUILTIN_PSUBUSW128,
16173 IX86_BUILTIN_PAND128,
16174 IX86_BUILTIN_PANDN128,
16175 IX86_BUILTIN_POR128,
16176 IX86_BUILTIN_PXOR128,
16178 IX86_BUILTIN_PAVGB128,
16179 IX86_BUILTIN_PAVGW128,
16181 IX86_BUILTIN_PCMPEQB128,
16182 IX86_BUILTIN_PCMPEQW128,
16183 IX86_BUILTIN_PCMPEQD128,
16184 IX86_BUILTIN_PCMPGTB128,
16185 IX86_BUILTIN_PCMPGTW128,
16186 IX86_BUILTIN_PCMPGTD128,
16188 IX86_BUILTIN_PMADDWD128,
16190 IX86_BUILTIN_PMAXSW128,
16191 IX86_BUILTIN_PMAXUB128,
16192 IX86_BUILTIN_PMINSW128,
16193 IX86_BUILTIN_PMINUB128,
16195 IX86_BUILTIN_PMULUDQ,
16196 IX86_BUILTIN_PMULUDQ128,
16197 IX86_BUILTIN_PMULHUW128,
16198 IX86_BUILTIN_PMULHW128,
16199 IX86_BUILTIN_PMULLW128,
16201 IX86_BUILTIN_PSADBW128,
16202 IX86_BUILTIN_PSHUFHW,
16203 IX86_BUILTIN_PSHUFLW,
16204 IX86_BUILTIN_PSHUFD,
16206 IX86_BUILTIN_PSLLW128,
16207 IX86_BUILTIN_PSLLD128,
16208 IX86_BUILTIN_PSLLQ128,
16209 IX86_BUILTIN_PSRAW128,
16210 IX86_BUILTIN_PSRAD128,
16211 IX86_BUILTIN_PSRLW128,
16212 IX86_BUILTIN_PSRLD128,
16213 IX86_BUILTIN_PSRLQ128,
16214 IX86_BUILTIN_PSLLDQI128,
16215 IX86_BUILTIN_PSLLWI128,
16216 IX86_BUILTIN_PSLLDI128,
16217 IX86_BUILTIN_PSLLQI128,
16218 IX86_BUILTIN_PSRAWI128,
16219 IX86_BUILTIN_PSRADI128,
16220 IX86_BUILTIN_PSRLDQI128,
16221 IX86_BUILTIN_PSRLWI128,
16222 IX86_BUILTIN_PSRLDI128,
16223 IX86_BUILTIN_PSRLQI128,
16225 IX86_BUILTIN_PUNPCKHBW128,
16226 IX86_BUILTIN_PUNPCKHWD128,
16227 IX86_BUILTIN_PUNPCKHDQ128,
16228 IX86_BUILTIN_PUNPCKHQDQ128,
16229 IX86_BUILTIN_PUNPCKLBW128,
16230 IX86_BUILTIN_PUNPCKLWD128,
16231 IX86_BUILTIN_PUNPCKLDQ128,
16232 IX86_BUILTIN_PUNPCKLQDQ128,
16234 IX86_BUILTIN_CLFLUSH,
16235 IX86_BUILTIN_MFENCE,
16236 IX86_BUILTIN_LFENCE,
16238 /* Prescott New Instructions. */
16239 IX86_BUILTIN_ADDSUBPS,
16240 IX86_BUILTIN_HADDPS,
16241 IX86_BUILTIN_HSUBPS,
16242 IX86_BUILTIN_MOVSHDUP,
16243 IX86_BUILTIN_MOVSLDUP,
16244 IX86_BUILTIN_ADDSUBPD,
16245 IX86_BUILTIN_HADDPD,
16246 IX86_BUILTIN_HSUBPD,
16247 IX86_BUILTIN_LDDQU,
16249 IX86_BUILTIN_MONITOR,
16250 IX86_BUILTIN_MWAIT,
16252 /* SSSE3. */
16253 IX86_BUILTIN_PHADDW,
16254 IX86_BUILTIN_PHADDD,
16255 IX86_BUILTIN_PHADDSW,
16256 IX86_BUILTIN_PHSUBW,
16257 IX86_BUILTIN_PHSUBD,
16258 IX86_BUILTIN_PHSUBSW,
16259 IX86_BUILTIN_PMADDUBSW,
16260 IX86_BUILTIN_PMULHRSW,
16261 IX86_BUILTIN_PSHUFB,
16262 IX86_BUILTIN_PSIGNB,
16263 IX86_BUILTIN_PSIGNW,
16264 IX86_BUILTIN_PSIGND,
16265 IX86_BUILTIN_PALIGNR,
16266 IX86_BUILTIN_PABSB,
16267 IX86_BUILTIN_PABSW,
16268 IX86_BUILTIN_PABSD,
16270 IX86_BUILTIN_PHADDW128,
16271 IX86_BUILTIN_PHADDD128,
16272 IX86_BUILTIN_PHADDSW128,
16273 IX86_BUILTIN_PHSUBW128,
16274 IX86_BUILTIN_PHSUBD128,
16275 IX86_BUILTIN_PHSUBSW128,
16276 IX86_BUILTIN_PMADDUBSW128,
16277 IX86_BUILTIN_PMULHRSW128,
16278 IX86_BUILTIN_PSHUFB128,
16279 IX86_BUILTIN_PSIGNB128,
16280 IX86_BUILTIN_PSIGNW128,
16281 IX86_BUILTIN_PSIGND128,
16282 IX86_BUILTIN_PALIGNR128,
16283 IX86_BUILTIN_PABSB128,
16284 IX86_BUILTIN_PABSW128,
16285 IX86_BUILTIN_PABSD128,
16287 /* AMDFAM10 - SSE4A New Instructions. */
16288 IX86_BUILTIN_MOVNTSD,
16289 IX86_BUILTIN_MOVNTSS,
16290 IX86_BUILTIN_EXTRQI,
16291 IX86_BUILTIN_EXTRQ,
16292 IX86_BUILTIN_INSERTQI,
16293 IX86_BUILTIN_INSERTQ,
16295 IX86_BUILTIN_VEC_INIT_V2SI,
16296 IX86_BUILTIN_VEC_INIT_V4HI,
16297 IX86_BUILTIN_VEC_INIT_V8QI,
16298 IX86_BUILTIN_VEC_EXT_V2DF,
16299 IX86_BUILTIN_VEC_EXT_V2DI,
16300 IX86_BUILTIN_VEC_EXT_V4SF,
16301 IX86_BUILTIN_VEC_EXT_V4SI,
16302 IX86_BUILTIN_VEC_EXT_V8HI,
16303 IX86_BUILTIN_VEC_EXT_V2SI,
16304 IX86_BUILTIN_VEC_EXT_V4HI,
16305 IX86_BUILTIN_VEC_SET_V8HI,
16306 IX86_BUILTIN_VEC_SET_V4HI,
16308 IX86_BUILTIN_MAX
16311 /* Table for the ix86 builtin decls. */
16312 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
16314 /* Add a ix86 target builtin function with CODE, NAME and TYPE. Do so,
16315 * if the target_flags include one of MASK. Stores the function decl
16316 * in the ix86_builtins array.
16317 * Returns the function decl or NULL_TREE, if the builtin was not added. */
16319 static inline tree
16320 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
16322 tree decl = NULL_TREE;
16324 if (mask & target_flags
16325 && (!(mask & MASK_64BIT) || TARGET_64BIT))
16327 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
16328 NULL, NULL_TREE);
16329 ix86_builtins[(int) code] = decl;
16332 return decl;
16335 /* Like def_builtin, but also marks the function decl "const". */
16337 static inline tree
16338 def_builtin_const (int mask, const char *name, tree type,
16339 enum ix86_builtins code)
16341 tree decl = def_builtin (mask, name, type, code);
16342 if (decl)
16343 TREE_READONLY (decl) = 1;
16344 return decl;
16347 /* Bits for builtin_description.flag. */
16349 /* Set when we don't support the comparison natively, and should
16350 swap_comparison in order to support it. */
16351 #define BUILTIN_DESC_SWAP_OPERANDS 1
16353 struct builtin_description
16355 const unsigned int mask;
16356 const enum insn_code icode;
16357 const char *const name;
16358 const enum ix86_builtins code;
16359 const enum rtx_code comparison;
16360 const unsigned int flag;
16363 static const struct builtin_description bdesc_comi[] =
16365 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
16366 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
16367 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
16368 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
16369 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
16370 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
16371 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
16372 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
16373 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
16374 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
16375 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
16376 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
16377 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
16378 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
16379 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
16380 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
16381 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
16382 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
16383 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
16384 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
16385 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
16386 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
16387 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
16388 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
16391 static const struct builtin_description bdesc_2arg[] =
16393 /* SSE */
16394 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
16395 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
16396 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
16397 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
16398 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
16399 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
16400 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
16401 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
16403 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
16404 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
16405 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
16406 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
16407 BUILTIN_DESC_SWAP_OPERANDS },
16408 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
16409 BUILTIN_DESC_SWAP_OPERANDS },
16410 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
16411 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
16412 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
16413 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
16414 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
16415 BUILTIN_DESC_SWAP_OPERANDS },
16416 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
16417 BUILTIN_DESC_SWAP_OPERANDS },
16418 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
16419 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
16420 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
16421 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
16422 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
16423 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
16424 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
16425 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
16426 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
16427 BUILTIN_DESC_SWAP_OPERANDS },
16428 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
16429 BUILTIN_DESC_SWAP_OPERANDS },
16430 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
16432 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
16433 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
16434 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
16435 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
16437 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
16438 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
16439 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
16440 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
16442 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
16443 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
16444 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
16445 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
16446 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
16448 /* MMX */
16449 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
16450 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
16451 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
16452 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
16453 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
16454 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
16455 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
16456 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
16458 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
16459 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
16460 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
16461 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
16462 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
16463 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
16464 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
16465 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
16467 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
16468 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
16469 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
16471 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
16472 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
16473 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
16474 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
16476 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
16477 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
16479 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
16480 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
16481 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
16482 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
16483 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
16484 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
16486 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
16487 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
16488 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
16489 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
16491 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
16492 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
16493 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
16494 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
16495 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
16496 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
16498 /* Special. */
16499 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
16500 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
16501 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
16503 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
16504 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
16505 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
16507 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
16508 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
16509 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
16510 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
16511 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
16512 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
16514 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
16515 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
16516 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
16517 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
16518 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
16519 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
16521 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
16522 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
16523 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
16524 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
16526 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
16527 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
16529 /* SSE2 */
16530 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
16531 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
16532 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
16533 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
16534 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
16535 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
16536 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
16537 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
16539 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
16540 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
16541 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
16542 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
16543 BUILTIN_DESC_SWAP_OPERANDS },
16544 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
16545 BUILTIN_DESC_SWAP_OPERANDS },
16546 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
16547 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
16548 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
16549 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
16550 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
16551 BUILTIN_DESC_SWAP_OPERANDS },
16552 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
16553 BUILTIN_DESC_SWAP_OPERANDS },
16554 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
16555 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
16556 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
16557 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
16558 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
16559 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
16560 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
16561 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
16562 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
16564 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
16565 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
16566 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
16567 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
16569 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
16570 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
16571 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
16572 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
16574 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
16575 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
16576 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
16578 /* SSE2 MMX */
16579 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
16580 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
16581 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
16582 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
16583 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
16584 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
16585 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
16586 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
16588 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
16589 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
16590 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
16591 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
16592 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
16593 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
16594 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
16595 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
16597 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
16598 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
16600 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
16601 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
16602 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
16603 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
16605 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
16606 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
16608 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
16609 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
16610 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
16611 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
16612 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
16613 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
16615 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
16616 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
16617 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
16618 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
16620 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
16621 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
16622 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
16623 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
16624 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
16625 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
16626 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
16627 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
16629 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
16630 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
16631 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
16633 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
16634 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
16636 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
16637 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
16639 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
16640 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
16641 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
16643 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
16644 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
16645 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
16647 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
16648 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
16650 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
16652 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
16653 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
16654 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
16655 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
16657 /* SSE3 MMX */
16658 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
16659 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
16660 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
16661 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
16662 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
16663 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
16665 /* SSSE3 */
16666 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
16667 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
16668 { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
16669 { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
16670 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
16671 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
16672 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
16673 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
16674 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
16675 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
16676 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
16677 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
16678 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
16679 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
16680 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
16681 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
16682 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
16683 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
16684 { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
16685 { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
16686 { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
16687 { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
16688 { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
16689 { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 }
16692 static const struct builtin_description bdesc_1arg[] =
16694 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
16695 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
16697 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
16698 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
16699 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
16701 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
16702 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
16703 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
16704 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
16705 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
16706 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
16708 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
16709 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
16711 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
16713 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
16714 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
16716 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
16717 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
16718 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
16719 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
16720 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
16722 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
16724 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
16725 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
16726 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
16727 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
16729 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
16730 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
16731 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
16733 /* SSE3 */
16734 { MASK_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, 0, 0 },
16735 { MASK_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, 0, 0 },
16737 /* SSSE3 */
16738 { MASK_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
16739 { MASK_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
16740 { MASK_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
16741 { MASK_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
16742 { MASK_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
16743 { MASK_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
16746 static void
16747 ix86_init_builtins (void)
16749 if (TARGET_MMX)
16750 ix86_init_mmx_sse_builtins ();
16753 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
16754 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
16755 builtins. */
16756 static void
16757 ix86_init_mmx_sse_builtins (void)
16759 const struct builtin_description * d;
16760 size_t i;
16762 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
16763 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
16764 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
16765 tree V2DI_type_node
16766 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
16767 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
16768 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
16769 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
16770 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
16771 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
16772 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
16774 tree pchar_type_node = build_pointer_type (char_type_node);
16775 tree pcchar_type_node = build_pointer_type (
16776 build_type_variant (char_type_node, 1, 0));
16777 tree pfloat_type_node = build_pointer_type (float_type_node);
16778 tree pcfloat_type_node = build_pointer_type (
16779 build_type_variant (float_type_node, 1, 0));
16780 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
16781 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
16782 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
16784 /* Comparisons. */
16785 tree int_ftype_v4sf_v4sf
16786 = build_function_type_list (integer_type_node,
16787 V4SF_type_node, V4SF_type_node, NULL_TREE);
16788 tree v4si_ftype_v4sf_v4sf
16789 = build_function_type_list (V4SI_type_node,
16790 V4SF_type_node, V4SF_type_node, NULL_TREE);
16791 /* MMX/SSE/integer conversions. */
16792 tree int_ftype_v4sf
16793 = build_function_type_list (integer_type_node,
16794 V4SF_type_node, NULL_TREE);
16795 tree int64_ftype_v4sf
16796 = build_function_type_list (long_long_integer_type_node,
16797 V4SF_type_node, NULL_TREE);
16798 tree int_ftype_v8qi
16799 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
16800 tree v4sf_ftype_v4sf_int
16801 = build_function_type_list (V4SF_type_node,
16802 V4SF_type_node, integer_type_node, NULL_TREE);
16803 tree v4sf_ftype_v4sf_int64
16804 = build_function_type_list (V4SF_type_node,
16805 V4SF_type_node, long_long_integer_type_node,
16806 NULL_TREE);
16807 tree v4sf_ftype_v4sf_v2si
16808 = build_function_type_list (V4SF_type_node,
16809 V4SF_type_node, V2SI_type_node, NULL_TREE);
16811 /* Miscellaneous. */
16812 tree v8qi_ftype_v4hi_v4hi
16813 = build_function_type_list (V8QI_type_node,
16814 V4HI_type_node, V4HI_type_node, NULL_TREE);
16815 tree v4hi_ftype_v2si_v2si
16816 = build_function_type_list (V4HI_type_node,
16817 V2SI_type_node, V2SI_type_node, NULL_TREE);
16818 tree v4sf_ftype_v4sf_v4sf_int
16819 = build_function_type_list (V4SF_type_node,
16820 V4SF_type_node, V4SF_type_node,
16821 integer_type_node, NULL_TREE);
16822 tree v2si_ftype_v4hi_v4hi
16823 = build_function_type_list (V2SI_type_node,
16824 V4HI_type_node, V4HI_type_node, NULL_TREE);
16825 tree v4hi_ftype_v4hi_int
16826 = build_function_type_list (V4HI_type_node,
16827 V4HI_type_node, integer_type_node, NULL_TREE);
16828 tree v4hi_ftype_v4hi_di
16829 = build_function_type_list (V4HI_type_node,
16830 V4HI_type_node, long_long_unsigned_type_node,
16831 NULL_TREE);
16832 tree v2si_ftype_v2si_di
16833 = build_function_type_list (V2SI_type_node,
16834 V2SI_type_node, long_long_unsigned_type_node,
16835 NULL_TREE);
16836 tree void_ftype_void
16837 = build_function_type (void_type_node, void_list_node);
16838 tree void_ftype_unsigned
16839 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
16840 tree void_ftype_unsigned_unsigned
16841 = build_function_type_list (void_type_node, unsigned_type_node,
16842 unsigned_type_node, NULL_TREE);
16843 tree void_ftype_pcvoid_unsigned_unsigned
16844 = build_function_type_list (void_type_node, const_ptr_type_node,
16845 unsigned_type_node, unsigned_type_node,
16846 NULL_TREE);
16847 tree unsigned_ftype_void
16848 = build_function_type (unsigned_type_node, void_list_node);
16849 tree v2si_ftype_v4sf
16850 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
16851 /* Loads/stores. */
16852 tree void_ftype_v8qi_v8qi_pchar
16853 = build_function_type_list (void_type_node,
16854 V8QI_type_node, V8QI_type_node,
16855 pchar_type_node, NULL_TREE);
16856 tree v4sf_ftype_pcfloat
16857 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
16858 /* @@@ the type is bogus */
16859 tree v4sf_ftype_v4sf_pv2si
16860 = build_function_type_list (V4SF_type_node,
16861 V4SF_type_node, pv2si_type_node, NULL_TREE);
16862 tree void_ftype_pv2si_v4sf
16863 = build_function_type_list (void_type_node,
16864 pv2si_type_node, V4SF_type_node, NULL_TREE);
16865 tree void_ftype_pfloat_v4sf
16866 = build_function_type_list (void_type_node,
16867 pfloat_type_node, V4SF_type_node, NULL_TREE);
16868 tree void_ftype_pdi_di
16869 = build_function_type_list (void_type_node,
16870 pdi_type_node, long_long_unsigned_type_node,
16871 NULL_TREE);
16872 tree void_ftype_pv2di_v2di
16873 = build_function_type_list (void_type_node,
16874 pv2di_type_node, V2DI_type_node, NULL_TREE);
16875 /* Normal vector unops. */
16876 tree v4sf_ftype_v4sf
16877 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
16878 tree v16qi_ftype_v16qi
16879 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
16880 tree v8hi_ftype_v8hi
16881 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
16882 tree v4si_ftype_v4si
16883 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
16884 tree v8qi_ftype_v8qi
16885 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
16886 tree v4hi_ftype_v4hi
16887 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
16889 /* Normal vector binops. */
16890 tree v4sf_ftype_v4sf_v4sf
16891 = build_function_type_list (V4SF_type_node,
16892 V4SF_type_node, V4SF_type_node, NULL_TREE);
16893 tree v8qi_ftype_v8qi_v8qi
16894 = build_function_type_list (V8QI_type_node,
16895 V8QI_type_node, V8QI_type_node, NULL_TREE);
16896 tree v4hi_ftype_v4hi_v4hi
16897 = build_function_type_list (V4HI_type_node,
16898 V4HI_type_node, V4HI_type_node, NULL_TREE);
16899 tree v2si_ftype_v2si_v2si
16900 = build_function_type_list (V2SI_type_node,
16901 V2SI_type_node, V2SI_type_node, NULL_TREE);
16902 tree di_ftype_di_di
16903 = build_function_type_list (long_long_unsigned_type_node,
16904 long_long_unsigned_type_node,
16905 long_long_unsigned_type_node, NULL_TREE);
16907 tree di_ftype_di_di_int
16908 = build_function_type_list (long_long_unsigned_type_node,
16909 long_long_unsigned_type_node,
16910 long_long_unsigned_type_node,
16911 integer_type_node, NULL_TREE);
16913 tree v2si_ftype_v2sf
16914 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
16915 tree v2sf_ftype_v2si
16916 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
16917 tree v2si_ftype_v2si
16918 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
16919 tree v2sf_ftype_v2sf
16920 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
16921 tree v2sf_ftype_v2sf_v2sf
16922 = build_function_type_list (V2SF_type_node,
16923 V2SF_type_node, V2SF_type_node, NULL_TREE);
16924 tree v2si_ftype_v2sf_v2sf
16925 = build_function_type_list (V2SI_type_node,
16926 V2SF_type_node, V2SF_type_node, NULL_TREE);
16927 tree pint_type_node = build_pointer_type (integer_type_node);
16928 tree pdouble_type_node = build_pointer_type (double_type_node);
16929 tree pcdouble_type_node = build_pointer_type (
16930 build_type_variant (double_type_node, 1, 0));
16931 tree int_ftype_v2df_v2df
16932 = build_function_type_list (integer_type_node,
16933 V2DF_type_node, V2DF_type_node, NULL_TREE);
16935 tree void_ftype_pcvoid
16936 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
16937 tree v4sf_ftype_v4si
16938 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
16939 tree v4si_ftype_v4sf
16940 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
16941 tree v2df_ftype_v4si
16942 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
16943 tree v4si_ftype_v2df
16944 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
16945 tree v2si_ftype_v2df
16946 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
16947 tree v4sf_ftype_v2df
16948 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
16949 tree v2df_ftype_v2si
16950 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
16951 tree v2df_ftype_v4sf
16952 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
16953 tree int_ftype_v2df
16954 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
16955 tree int64_ftype_v2df
16956 = build_function_type_list (long_long_integer_type_node,
16957 V2DF_type_node, NULL_TREE);
16958 tree v2df_ftype_v2df_int
16959 = build_function_type_list (V2DF_type_node,
16960 V2DF_type_node, integer_type_node, NULL_TREE);
16961 tree v2df_ftype_v2df_int64
16962 = build_function_type_list (V2DF_type_node,
16963 V2DF_type_node, long_long_integer_type_node,
16964 NULL_TREE);
16965 tree v4sf_ftype_v4sf_v2df
16966 = build_function_type_list (V4SF_type_node,
16967 V4SF_type_node, V2DF_type_node, NULL_TREE);
16968 tree v2df_ftype_v2df_v4sf
16969 = build_function_type_list (V2DF_type_node,
16970 V2DF_type_node, V4SF_type_node, NULL_TREE);
16971 tree v2df_ftype_v2df_v2df_int
16972 = build_function_type_list (V2DF_type_node,
16973 V2DF_type_node, V2DF_type_node,
16974 integer_type_node,
16975 NULL_TREE);
16976 tree v2df_ftype_v2df_pcdouble
16977 = build_function_type_list (V2DF_type_node,
16978 V2DF_type_node, pcdouble_type_node, NULL_TREE);
16979 tree void_ftype_pdouble_v2df
16980 = build_function_type_list (void_type_node,
16981 pdouble_type_node, V2DF_type_node, NULL_TREE);
16982 tree void_ftype_pint_int
16983 = build_function_type_list (void_type_node,
16984 pint_type_node, integer_type_node, NULL_TREE);
16985 tree void_ftype_v16qi_v16qi_pchar
16986 = build_function_type_list (void_type_node,
16987 V16QI_type_node, V16QI_type_node,
16988 pchar_type_node, NULL_TREE);
16989 tree v2df_ftype_pcdouble
16990 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
16991 tree v2df_ftype_v2df_v2df
16992 = build_function_type_list (V2DF_type_node,
16993 V2DF_type_node, V2DF_type_node, NULL_TREE);
16994 tree v16qi_ftype_v16qi_v16qi
16995 = build_function_type_list (V16QI_type_node,
16996 V16QI_type_node, V16QI_type_node, NULL_TREE);
16997 tree v8hi_ftype_v8hi_v8hi
16998 = build_function_type_list (V8HI_type_node,
16999 V8HI_type_node, V8HI_type_node, NULL_TREE);
17000 tree v4si_ftype_v4si_v4si
17001 = build_function_type_list (V4SI_type_node,
17002 V4SI_type_node, V4SI_type_node, NULL_TREE);
17003 tree v2di_ftype_v2di_v2di
17004 = build_function_type_list (V2DI_type_node,
17005 V2DI_type_node, V2DI_type_node, NULL_TREE);
17006 tree v2di_ftype_v2df_v2df
17007 = build_function_type_list (V2DI_type_node,
17008 V2DF_type_node, V2DF_type_node, NULL_TREE);
17009 tree v2df_ftype_v2df
17010 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
17011 tree v2di_ftype_v2di_int
17012 = build_function_type_list (V2DI_type_node,
17013 V2DI_type_node, integer_type_node, NULL_TREE);
17014 tree v2di_ftype_v2di_v2di_int
17015 = build_function_type_list (V2DI_type_node, V2DI_type_node,
17016 V2DI_type_node, integer_type_node, NULL_TREE);
17017 tree v4si_ftype_v4si_int
17018 = build_function_type_list (V4SI_type_node,
17019 V4SI_type_node, integer_type_node, NULL_TREE);
17020 tree v8hi_ftype_v8hi_int
17021 = build_function_type_list (V8HI_type_node,
17022 V8HI_type_node, integer_type_node, NULL_TREE);
17023 tree v8hi_ftype_v8hi_v2di
17024 = build_function_type_list (V8HI_type_node,
17025 V8HI_type_node, V2DI_type_node, NULL_TREE);
17026 tree v4si_ftype_v4si_v2di
17027 = build_function_type_list (V4SI_type_node,
17028 V4SI_type_node, V2DI_type_node, NULL_TREE);
17029 tree v4si_ftype_v8hi_v8hi
17030 = build_function_type_list (V4SI_type_node,
17031 V8HI_type_node, V8HI_type_node, NULL_TREE);
17032 tree di_ftype_v8qi_v8qi
17033 = build_function_type_list (long_long_unsigned_type_node,
17034 V8QI_type_node, V8QI_type_node, NULL_TREE);
17035 tree di_ftype_v2si_v2si
17036 = build_function_type_list (long_long_unsigned_type_node,
17037 V2SI_type_node, V2SI_type_node, NULL_TREE);
17038 tree v2di_ftype_v16qi_v16qi
17039 = build_function_type_list (V2DI_type_node,
17040 V16QI_type_node, V16QI_type_node, NULL_TREE);
17041 tree v2di_ftype_v4si_v4si
17042 = build_function_type_list (V2DI_type_node,
17043 V4SI_type_node, V4SI_type_node, NULL_TREE);
17044 tree int_ftype_v16qi
17045 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
17046 tree v16qi_ftype_pcchar
17047 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
17048 tree void_ftype_pchar_v16qi
17049 = build_function_type_list (void_type_node,
17050 pchar_type_node, V16QI_type_node, NULL_TREE);
17052 tree v2di_ftype_v2di_unsigned_unsigned
17053 = build_function_type_list (V2DI_type_node, V2DI_type_node,
17054 unsigned_type_node, unsigned_type_node,
17055 NULL_TREE);
17056 tree v2di_ftype_v2di_v2di_unsigned_unsigned
17057 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
17058 unsigned_type_node, unsigned_type_node,
17059 NULL_TREE);
17060 tree v2di_ftype_v2di_v16qi
17061 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
17062 NULL_TREE);
17064 tree float80_type;
17065 tree float128_type;
17066 tree ftype;
17068 /* The __float80 type. */
17069 if (TYPE_MODE (long_double_type_node) == XFmode)
17070 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
17071 "__float80");
17072 else
17074 /* The __float80 type. */
17075 float80_type = make_node (REAL_TYPE);
17076 TYPE_PRECISION (float80_type) = 80;
17077 layout_type (float80_type);
17078 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
17081 if (TARGET_64BIT)
17083 float128_type = make_node (REAL_TYPE);
17084 TYPE_PRECISION (float128_type) = 128;
17085 layout_type (float128_type);
17086 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
17089 /* Add all builtins that are more or less simple operations on two
17090 operands. */
17091 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17093 /* Use one of the operands; the target can have a different mode for
17094 mask-generating compares. */
17095 enum machine_mode mode;
17096 tree type;
17098 if (d->name == 0)
17099 continue;
17100 mode = insn_data[d->icode].operand[1].mode;
17102 switch (mode)
17104 case V16QImode:
17105 type = v16qi_ftype_v16qi_v16qi;
17106 break;
17107 case V8HImode:
17108 type = v8hi_ftype_v8hi_v8hi;
17109 break;
17110 case V4SImode:
17111 type = v4si_ftype_v4si_v4si;
17112 break;
17113 case V2DImode:
17114 type = v2di_ftype_v2di_v2di;
17115 break;
17116 case V2DFmode:
17117 type = v2df_ftype_v2df_v2df;
17118 break;
17119 case V4SFmode:
17120 type = v4sf_ftype_v4sf_v4sf;
17121 break;
17122 case V8QImode:
17123 type = v8qi_ftype_v8qi_v8qi;
17124 break;
17125 case V4HImode:
17126 type = v4hi_ftype_v4hi_v4hi;
17127 break;
17128 case V2SImode:
17129 type = v2si_ftype_v2si_v2si;
17130 break;
17131 case DImode:
17132 type = di_ftype_di_di;
17133 break;
17135 default:
17136 gcc_unreachable ();
17139 /* Override for comparisons. */
17140 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
17141 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
17142 type = v4si_ftype_v4sf_v4sf;
17144 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
17145 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
17146 type = v2di_ftype_v2df_v2df;
17148 def_builtin (d->mask, d->name, type, d->code);
17151 /* Add all builtins that are more or less simple operations on 1 operand. */
17152 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17154 enum machine_mode mode;
17155 tree type;
17157 if (d->name == 0)
17158 continue;
17159 mode = insn_data[d->icode].operand[1].mode;
17161 switch (mode)
17163 case V16QImode:
17164 type = v16qi_ftype_v16qi;
17165 break;
17166 case V8HImode:
17167 type = v8hi_ftype_v8hi;
17168 break;
17169 case V4SImode:
17170 type = v4si_ftype_v4si;
17171 break;
17172 case V2DFmode:
17173 type = v2df_ftype_v2df;
17174 break;
17175 case V4SFmode:
17176 type = v4sf_ftype_v4sf;
17177 break;
17178 case V8QImode:
17179 type = v8qi_ftype_v8qi;
17180 break;
17181 case V4HImode:
17182 type = v4hi_ftype_v4hi;
17183 break;
17184 case V2SImode:
17185 type = v2si_ftype_v2si;
17186 break;
17188 default:
17189 abort ();
17192 def_builtin (d->mask, d->name, type, d->code);
17195 /* Add the remaining MMX insns with somewhat more complicated types. */
17196 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
17197 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
17198 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
17199 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
17201 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
17202 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
17203 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
17205 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
17206 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
17208 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
17209 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
17211 /* comi/ucomi insns. */
17212 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
17213 if (d->mask == MASK_SSE2)
17214 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
17215 else
17216 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
17218 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
17219 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
17220 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
17222 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
17223 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
17224 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
17225 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
17226 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
17227 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
17228 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
17229 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
17230 def_builtin_const (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
17231 def_builtin_const (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
17232 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
17234 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
17236 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
17237 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
17239 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
17240 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
17241 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
17242 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
17244 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
17245 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
17246 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
17247 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
17249 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
17251 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
17253 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
17254 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
17255 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
17256 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
17257 def_builtin_const (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
17258 def_builtin_const (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
17260 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
17262 /* Original 3DNow! */
17263 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
17264 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
17265 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
17266 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
17267 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
17268 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
17269 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
17270 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
17271 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
17272 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
17273 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
17274 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
17275 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
17276 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
17277 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
17278 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
17279 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
17280 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
17281 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
17282 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
17284 /* 3DNow! extension as used in the Athlon CPU. */
17285 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
17286 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
17287 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
17288 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
17289 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
17290 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
17292 /* SSE2 */
17293 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
17295 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
17296 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
17298 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
17299 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
17301 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
17302 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
17303 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
17304 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
17305 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
17307 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
17308 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
17309 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
17310 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
17312 def_builtin_const (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
17313 def_builtin_const (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
17315 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
17317 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
17318 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
17320 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
17321 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
17322 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
17323 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
17324 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
17326 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
17328 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
17329 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
17330 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
17331 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
17333 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
17334 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
17335 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
17337 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
17338 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
17339 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
17340 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
17342 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
17343 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
17344 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
17346 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
17347 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
17349 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
17350 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
17352 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
17353 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
17354 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
17356 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
17357 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
17358 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
17360 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
17361 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
17363 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
17364 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
17365 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
17366 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
17368 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
17369 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
17370 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
17371 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
17373 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
17374 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
17376 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
17378 /* Prescott New Instructions. */
17379 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
17380 void_ftype_pcvoid_unsigned_unsigned,
17381 IX86_BUILTIN_MONITOR);
17382 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
17383 void_ftype_unsigned_unsigned,
17384 IX86_BUILTIN_MWAIT);
17385 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
17386 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
17388 /* SSSE3. */
17389 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128",
17390 v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
17391 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
17392 IX86_BUILTIN_PALIGNR);
17394 /* AMDFAM10 SSE4A New built-ins */
17395 def_builtin (MASK_SSE4A, "__builtin_ia32_movntsd",
17396 void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
17397 def_builtin (MASK_SSE4A, "__builtin_ia32_movntss",
17398 void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
17399 def_builtin (MASK_SSE4A, "__builtin_ia32_extrqi",
17400 v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
17401 def_builtin (MASK_SSE4A, "__builtin_ia32_extrq",
17402 v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
17403 def_builtin (MASK_SSE4A, "__builtin_ia32_insertqi",
17404 v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
17405 def_builtin (MASK_SSE4A, "__builtin_ia32_insertq",
17406 v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
17408 /* Access to the vec_init patterns. */
17409 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
17410 integer_type_node, NULL_TREE);
17411 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
17412 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
17414 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
17415 short_integer_type_node,
17416 short_integer_type_node,
17417 short_integer_type_node, NULL_TREE);
17418 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
17419 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
17421 ftype = build_function_type_list (V8QI_type_node, char_type_node,
17422 char_type_node, char_type_node,
17423 char_type_node, char_type_node,
17424 char_type_node, char_type_node,
17425 char_type_node, NULL_TREE);
17426 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
17427 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
17429 /* Access to the vec_extract patterns. */
17430 ftype = build_function_type_list (double_type_node, V2DF_type_node,
17431 integer_type_node, NULL_TREE);
17432 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
17433 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
17435 ftype = build_function_type_list (long_long_integer_type_node,
17436 V2DI_type_node, integer_type_node,
17437 NULL_TREE);
17438 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
17439 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
17441 ftype = build_function_type_list (float_type_node, V4SF_type_node,
17442 integer_type_node, NULL_TREE);
17443 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
17444 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
17446 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
17447 integer_type_node, NULL_TREE);
17448 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
17449 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
17451 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
17452 integer_type_node, NULL_TREE);
17453 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
17454 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
17456 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
17457 integer_type_node, NULL_TREE);
17458 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
17459 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
17461 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
17462 integer_type_node, NULL_TREE);
17463 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
17464 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
17466 /* Access to the vec_set patterns. */
17467 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
17468 intHI_type_node,
17469 integer_type_node, NULL_TREE);
17470 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
17471 ftype, IX86_BUILTIN_VEC_SET_V8HI);
17473 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
17474 intHI_type_node,
17475 integer_type_node, NULL_TREE);
17476 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
17477 ftype, IX86_BUILTIN_VEC_SET_V4HI);
17480 /* Errors in the source file can cause expand_expr to return const0_rtx
17481 where we expect a vector. To avoid crashing, use one of the vector
17482 clear instructions. */
17483 static rtx
17484 safe_vector_operand (rtx x, enum machine_mode mode)
17486 if (x == const0_rtx)
17487 x = CONST0_RTX (mode);
17488 return x;
17491 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
17493 static rtx
17494 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
17496 rtx pat, xops[3];
17497 tree arg0 = CALL_EXPR_ARG (exp, 0);
17498 tree arg1 = CALL_EXPR_ARG (exp, 1);
17499 rtx op0 = expand_normal (arg0);
17500 rtx op1 = expand_normal (arg1);
17501 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17502 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17503 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
17505 if (VECTOR_MODE_P (mode0))
17506 op0 = safe_vector_operand (op0, mode0);
17507 if (VECTOR_MODE_P (mode1))
17508 op1 = safe_vector_operand (op1, mode1);
17510 if (optimize || !target
17511 || GET_MODE (target) != tmode
17512 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17513 target = gen_reg_rtx (tmode);
17515 if (GET_MODE (op1) == SImode && mode1 == TImode)
17517 rtx x = gen_reg_rtx (V4SImode);
17518 emit_insn (gen_sse2_loadd (x, op1));
17519 op1 = gen_lowpart (TImode, x);
17522 /* The insn must want input operands in the same modes as the
17523 result. */
17524 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
17525 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
17527 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
17528 op0 = copy_to_mode_reg (mode0, op0);
17529 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
17530 op1 = copy_to_mode_reg (mode1, op1);
17532 /* ??? Using ix86_fixup_binary_operands is problematic when
17533 we've got mismatched modes. Fake it. */
17535 xops[0] = target;
17536 xops[1] = op0;
17537 xops[2] = op1;
17539 if (tmode == mode0 && tmode == mode1)
17541 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
17542 op0 = xops[1];
17543 op1 = xops[2];
17545 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
17547 op0 = force_reg (mode0, op0);
17548 op1 = force_reg (mode1, op1);
17549 target = gen_reg_rtx (tmode);
17552 pat = GEN_FCN (icode) (target, op0, op1);
17553 if (! pat)
17554 return 0;
17555 emit_insn (pat);
17556 return target;
17559 /* Subroutine of ix86_expand_builtin to take care of stores. */
17561 static rtx
17562 ix86_expand_store_builtin (enum insn_code icode, tree exp)
17564 rtx pat;
17565 tree arg0 = CALL_EXPR_ARG (exp, 0);
17566 tree arg1 = CALL_EXPR_ARG (exp, 1);
17567 rtx op0 = expand_normal (arg0);
17568 rtx op1 = expand_normal (arg1);
17569 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
17570 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
17572 if (VECTOR_MODE_P (mode1))
17573 op1 = safe_vector_operand (op1, mode1);
17575 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17576 op1 = copy_to_mode_reg (mode1, op1);
17578 pat = GEN_FCN (icode) (op0, op1);
17579 if (pat)
17580 emit_insn (pat);
17581 return 0;
17584 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
17586 static rtx
17587 ix86_expand_unop_builtin (enum insn_code icode, tree exp,
17588 rtx target, int do_load)
17590 rtx pat;
17591 tree arg0 = CALL_EXPR_ARG (exp, 0);
17592 rtx op0 = expand_normal (arg0);
17593 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17594 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17596 if (optimize || !target
17597 || GET_MODE (target) != tmode
17598 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17599 target = gen_reg_rtx (tmode);
17600 if (do_load)
17601 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17602 else
17604 if (VECTOR_MODE_P (mode0))
17605 op0 = safe_vector_operand (op0, mode0);
17607 if ((optimize && !register_operand (op0, mode0))
17608 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17609 op0 = copy_to_mode_reg (mode0, op0);
17612 pat = GEN_FCN (icode) (target, op0);
17613 if (! pat)
17614 return 0;
17615 emit_insn (pat);
17616 return target;
17619 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
17620 sqrtss, rsqrtss, rcpss. */
17622 static rtx
17623 ix86_expand_unop1_builtin (enum insn_code icode, tree exp, rtx target)
17625 rtx pat;
17626 tree arg0 = CALL_EXPR_ARG (exp, 0);
17627 rtx op1, op0 = expand_normal (arg0);
17628 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17629 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17631 if (optimize || !target
17632 || GET_MODE (target) != tmode
17633 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17634 target = gen_reg_rtx (tmode);
17636 if (VECTOR_MODE_P (mode0))
17637 op0 = safe_vector_operand (op0, mode0);
17639 if ((optimize && !register_operand (op0, mode0))
17640 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17641 op0 = copy_to_mode_reg (mode0, op0);
17643 op1 = op0;
17644 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
17645 op1 = copy_to_mode_reg (mode0, op1);
17647 pat = GEN_FCN (icode) (target, op0, op1);
17648 if (! pat)
17649 return 0;
17650 emit_insn (pat);
17651 return target;
17654 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
17656 static rtx
17657 ix86_expand_sse_compare (const struct builtin_description *d, tree exp,
17658 rtx target)
17660 rtx pat;
17661 tree arg0 = CALL_EXPR_ARG (exp, 0);
17662 tree arg1 = CALL_EXPR_ARG (exp, 1);
17663 rtx op0 = expand_normal (arg0);
17664 rtx op1 = expand_normal (arg1);
17665 rtx op2;
17666 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
17667 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
17668 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
17669 enum rtx_code comparison = d->comparison;
17671 if (VECTOR_MODE_P (mode0))
17672 op0 = safe_vector_operand (op0, mode0);
17673 if (VECTOR_MODE_P (mode1))
17674 op1 = safe_vector_operand (op1, mode1);
17676 /* Swap operands if we have a comparison that isn't available in
17677 hardware. */
17678 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
17680 rtx tmp = gen_reg_rtx (mode1);
17681 emit_move_insn (tmp, op1);
17682 op1 = op0;
17683 op0 = tmp;
17686 if (optimize || !target
17687 || GET_MODE (target) != tmode
17688 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
17689 target = gen_reg_rtx (tmode);
17691 if ((optimize && !register_operand (op0, mode0))
17692 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
17693 op0 = copy_to_mode_reg (mode0, op0);
17694 if ((optimize && !register_operand (op1, mode1))
17695 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
17696 op1 = copy_to_mode_reg (mode1, op1);
17698 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
17699 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
17700 if (! pat)
17701 return 0;
17702 emit_insn (pat);
17703 return target;
17706 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
17708 static rtx
17709 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
17710 rtx target)
17712 rtx pat;
17713 tree arg0 = CALL_EXPR_ARG (exp, 0);
17714 tree arg1 = CALL_EXPR_ARG (exp, 1);
17715 rtx op0 = expand_normal (arg0);
17716 rtx op1 = expand_normal (arg1);
17717 rtx op2;
17718 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
17719 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
17720 enum rtx_code comparison = d->comparison;
17722 if (VECTOR_MODE_P (mode0))
17723 op0 = safe_vector_operand (op0, mode0);
17724 if (VECTOR_MODE_P (mode1))
17725 op1 = safe_vector_operand (op1, mode1);
17727 /* Swap operands if we have a comparison that isn't available in
17728 hardware. */
17729 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
17731 rtx tmp = op1;
17732 op1 = op0;
17733 op0 = tmp;
17736 target = gen_reg_rtx (SImode);
17737 emit_move_insn (target, const0_rtx);
17738 target = gen_rtx_SUBREG (QImode, target, 0);
17740 if ((optimize && !register_operand (op0, mode0))
17741 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
17742 op0 = copy_to_mode_reg (mode0, op0);
17743 if ((optimize && !register_operand (op1, mode1))
17744 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
17745 op1 = copy_to_mode_reg (mode1, op1);
17747 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
17748 pat = GEN_FCN (d->icode) (op0, op1);
17749 if (! pat)
17750 return 0;
17751 emit_insn (pat);
17752 emit_insn (gen_rtx_SET (VOIDmode,
17753 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
17754 gen_rtx_fmt_ee (comparison, QImode,
17755 SET_DEST (pat),
17756 const0_rtx)));
17758 return SUBREG_REG (target);
17761 /* Return the integer constant in ARG. Constrain it to be in the range
17762 of the subparts of VEC_TYPE; issue an error if not. */
17764 static int
17765 get_element_number (tree vec_type, tree arg)
17767 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
17769 if (!host_integerp (arg, 1)
17770 || (elt = tree_low_cst (arg, 1), elt > max))
17772 error ("selector must be an integer constant in the range 0..%wi", max);
17773 return 0;
17776 return elt;
17779 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17780 ix86_expand_vector_init. We DO have language-level syntax for this, in
17781 the form of (type){ init-list }. Except that since we can't place emms
17782 instructions from inside the compiler, we can't allow the use of MMX
17783 registers unless the user explicitly asks for it. So we do *not* define
17784 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
17785 we have builtins invoked by mmintrin.h that gives us license to emit
17786 these sorts of instructions. */
17788 static rtx
17789 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
17791 enum machine_mode tmode = TYPE_MODE (type);
17792 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
17793 int i, n_elt = GET_MODE_NUNITS (tmode);
17794 rtvec v = rtvec_alloc (n_elt);
17796 gcc_assert (VECTOR_MODE_P (tmode));
17797 gcc_assert (call_expr_nargs (exp) == n_elt);
17799 for (i = 0; i < n_elt; ++i)
17801 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
17802 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
17805 if (!target || !register_operand (target, tmode))
17806 target = gen_reg_rtx (tmode);
17808 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
17809 return target;
17812 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17813 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
17814 had a language-level syntax for referencing vector elements. */
17816 static rtx
17817 ix86_expand_vec_ext_builtin (tree exp, rtx target)
17819 enum machine_mode tmode, mode0;
17820 tree arg0, arg1;
17821 int elt;
17822 rtx op0;
17824 arg0 = CALL_EXPR_ARG (exp, 0);
17825 arg1 = CALL_EXPR_ARG (exp, 1);
17827 op0 = expand_normal (arg0);
17828 elt = get_element_number (TREE_TYPE (arg0), arg1);
17830 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
17831 mode0 = TYPE_MODE (TREE_TYPE (arg0));
17832 gcc_assert (VECTOR_MODE_P (mode0));
17834 op0 = force_reg (mode0, op0);
17836 if (optimize || !target || !register_operand (target, tmode))
17837 target = gen_reg_rtx (tmode);
17839 ix86_expand_vector_extract (true, target, op0, elt);
17841 return target;
17844 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17845 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
17846 a language-level syntax for referencing vector elements. */
17848 static rtx
17849 ix86_expand_vec_set_builtin (tree exp)
17851 enum machine_mode tmode, mode1;
17852 tree arg0, arg1, arg2;
17853 int elt;
17854 rtx op0, op1;
17856 arg0 = CALL_EXPR_ARG (exp, 0);
17857 arg1 = CALL_EXPR_ARG (exp, 1);
17858 arg2 = CALL_EXPR_ARG (exp, 2);
17860 tmode = TYPE_MODE (TREE_TYPE (arg0));
17861 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
17862 gcc_assert (VECTOR_MODE_P (tmode));
17864 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
17865 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
17866 elt = get_element_number (TREE_TYPE (arg0), arg2);
17868 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
17869 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
17871 op0 = force_reg (tmode, op0);
17872 op1 = force_reg (mode1, op1);
17874 ix86_expand_vector_set (true, op0, op1, elt);
17876 return op0;
17879 /* Expand an expression EXP that calls a built-in function,
17880 with result going to TARGET if that's convenient
17881 (and in mode MODE if that's convenient).
17882 SUBTARGET may be used as the target for computing one of EXP's operands.
17883 IGNORE is nonzero if the value is to be ignored. */
17885 static rtx
17886 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
17887 enum machine_mode mode ATTRIBUTE_UNUSED,
17888 int ignore ATTRIBUTE_UNUSED)
17890 const struct builtin_description *d;
17891 size_t i;
17892 enum insn_code icode;
17893 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
17894 tree arg0, arg1, arg2, arg3;
17895 rtx op0, op1, op2, op3, pat;
17896 enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
17897 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
17899 switch (fcode)
17901 case IX86_BUILTIN_EMMS:
17902 emit_insn (gen_mmx_emms ());
17903 return 0;
17905 case IX86_BUILTIN_SFENCE:
17906 emit_insn (gen_sse_sfence ());
17907 return 0;
17909 case IX86_BUILTIN_MASKMOVQ:
17910 case IX86_BUILTIN_MASKMOVDQU:
17911 icode = (fcode == IX86_BUILTIN_MASKMOVQ
17912 ? CODE_FOR_mmx_maskmovq
17913 : CODE_FOR_sse2_maskmovdqu);
17914 /* Note the arg order is different from the operand order. */
17915 arg1 = CALL_EXPR_ARG (exp, 0);
17916 arg2 = CALL_EXPR_ARG (exp, 1);
17917 arg0 = CALL_EXPR_ARG (exp, 2);
17918 op0 = expand_normal (arg0);
17919 op1 = expand_normal (arg1);
17920 op2 = expand_normal (arg2);
17921 mode0 = insn_data[icode].operand[0].mode;
17922 mode1 = insn_data[icode].operand[1].mode;
17923 mode2 = insn_data[icode].operand[2].mode;
17925 op0 = force_reg (Pmode, op0);
17926 op0 = gen_rtx_MEM (mode1, op0);
17928 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
17929 op0 = copy_to_mode_reg (mode0, op0);
17930 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
17931 op1 = copy_to_mode_reg (mode1, op1);
17932 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
17933 op2 = copy_to_mode_reg (mode2, op2);
17934 pat = GEN_FCN (icode) (op0, op1, op2);
17935 if (! pat)
17936 return 0;
17937 emit_insn (pat);
17938 return 0;
17940 case IX86_BUILTIN_SQRTSS:
17941 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, exp, target);
17942 case IX86_BUILTIN_RSQRTSS:
17943 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, exp, target);
17944 case IX86_BUILTIN_RCPSS:
17945 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, exp, target);
17947 case IX86_BUILTIN_LOADUPS:
17948 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, exp, target, 1);
17950 case IX86_BUILTIN_STOREUPS:
17951 return ix86_expand_store_builtin (CODE_FOR_sse_movups, exp);
17953 case IX86_BUILTIN_LOADHPS:
17954 case IX86_BUILTIN_LOADLPS:
17955 case IX86_BUILTIN_LOADHPD:
17956 case IX86_BUILTIN_LOADLPD:
17957 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
17958 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
17959 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
17960 : CODE_FOR_sse2_loadlpd);
17961 arg0 = CALL_EXPR_ARG (exp, 0);
17962 arg1 = CALL_EXPR_ARG (exp, 1);
17963 op0 = expand_normal (arg0);
17964 op1 = expand_normal (arg1);
17965 tmode = insn_data[icode].operand[0].mode;
17966 mode0 = insn_data[icode].operand[1].mode;
17967 mode1 = insn_data[icode].operand[2].mode;
17969 op0 = force_reg (mode0, op0);
17970 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
17971 if (optimize || target == 0
17972 || GET_MODE (target) != tmode
17973 || !register_operand (target, tmode))
17974 target = gen_reg_rtx (tmode);
17975 pat = GEN_FCN (icode) (target, op0, op1);
17976 if (! pat)
17977 return 0;
17978 emit_insn (pat);
17979 return target;
17981 case IX86_BUILTIN_STOREHPS:
17982 case IX86_BUILTIN_STORELPS:
17983 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
17984 : CODE_FOR_sse_storelps);
17985 arg0 = CALL_EXPR_ARG (exp, 0);
17986 arg1 = CALL_EXPR_ARG (exp, 1);
17987 op0 = expand_normal (arg0);
17988 op1 = expand_normal (arg1);
17989 mode0 = insn_data[icode].operand[0].mode;
17990 mode1 = insn_data[icode].operand[1].mode;
17992 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17993 op1 = force_reg (mode1, op1);
17995 pat = GEN_FCN (icode) (op0, op1);
17996 if (! pat)
17997 return 0;
17998 emit_insn (pat);
17999 return const0_rtx;
18001 case IX86_BUILTIN_MOVNTPS:
18002 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, exp);
18003 case IX86_BUILTIN_MOVNTQ:
18004 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, exp);
18006 case IX86_BUILTIN_LDMXCSR:
18007 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
18008 target = assign_386_stack_local (SImode, SLOT_TEMP);
18009 emit_move_insn (target, op0);
18010 emit_insn (gen_sse_ldmxcsr (target));
18011 return 0;
18013 case IX86_BUILTIN_STMXCSR:
18014 target = assign_386_stack_local (SImode, SLOT_TEMP);
18015 emit_insn (gen_sse_stmxcsr (target));
18016 return copy_to_mode_reg (SImode, target);
18018 case IX86_BUILTIN_SHUFPS:
18019 case IX86_BUILTIN_SHUFPD:
18020 icode = (fcode == IX86_BUILTIN_SHUFPS
18021 ? CODE_FOR_sse_shufps
18022 : CODE_FOR_sse2_shufpd);
18023 arg0 = CALL_EXPR_ARG (exp, 0);
18024 arg1 = CALL_EXPR_ARG (exp, 1);
18025 arg2 = CALL_EXPR_ARG (exp, 2);
18026 op0 = expand_normal (arg0);
18027 op1 = expand_normal (arg1);
18028 op2 = expand_normal (arg2);
18029 tmode = insn_data[icode].operand[0].mode;
18030 mode0 = insn_data[icode].operand[1].mode;
18031 mode1 = insn_data[icode].operand[2].mode;
18032 mode2 = insn_data[icode].operand[3].mode;
18034 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18035 op0 = copy_to_mode_reg (mode0, op0);
18036 if ((optimize && !register_operand (op1, mode1))
18037 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
18038 op1 = copy_to_mode_reg (mode1, op1);
18039 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
18041 /* @@@ better error message */
18042 error ("mask must be an immediate");
18043 return gen_reg_rtx (tmode);
18045 if (optimize || target == 0
18046 || GET_MODE (target) != tmode
18047 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18048 target = gen_reg_rtx (tmode);
18049 pat = GEN_FCN (icode) (target, op0, op1, op2);
18050 if (! pat)
18051 return 0;
18052 emit_insn (pat);
18053 return target;
18055 case IX86_BUILTIN_PSHUFW:
18056 case IX86_BUILTIN_PSHUFD:
18057 case IX86_BUILTIN_PSHUFHW:
18058 case IX86_BUILTIN_PSHUFLW:
18059 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
18060 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
18061 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
18062 : CODE_FOR_mmx_pshufw);
18063 arg0 = CALL_EXPR_ARG (exp, 0);
18064 arg1 = CALL_EXPR_ARG (exp, 1);
18065 op0 = expand_normal (arg0);
18066 op1 = expand_normal (arg1);
18067 tmode = insn_data[icode].operand[0].mode;
18068 mode1 = insn_data[icode].operand[1].mode;
18069 mode2 = insn_data[icode].operand[2].mode;
18071 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18072 op0 = copy_to_mode_reg (mode1, op0);
18073 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18075 /* @@@ better error message */
18076 error ("mask must be an immediate");
18077 return const0_rtx;
18079 if (target == 0
18080 || GET_MODE (target) != tmode
18081 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18082 target = gen_reg_rtx (tmode);
18083 pat = GEN_FCN (icode) (target, op0, op1);
18084 if (! pat)
18085 return 0;
18086 emit_insn (pat);
18087 return target;
18089 case IX86_BUILTIN_PSLLDQI128:
18090 case IX86_BUILTIN_PSRLDQI128:
18091 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
18092 : CODE_FOR_sse2_lshrti3);
18093 arg0 = CALL_EXPR_ARG (exp, 0);
18094 arg1 = CALL_EXPR_ARG (exp, 1);
18095 op0 = expand_normal (arg0);
18096 op1 = expand_normal (arg1);
18097 tmode = insn_data[icode].operand[0].mode;
18098 mode1 = insn_data[icode].operand[1].mode;
18099 mode2 = insn_data[icode].operand[2].mode;
18101 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18103 op0 = copy_to_reg (op0);
18104 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
18106 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18108 error ("shift must be an immediate");
18109 return const0_rtx;
18111 target = gen_reg_rtx (V2DImode);
18112 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
18113 if (! pat)
18114 return 0;
18115 emit_insn (pat);
18116 return target;
18118 case IX86_BUILTIN_FEMMS:
18119 emit_insn (gen_mmx_femms ());
18120 return NULL_RTX;
18122 case IX86_BUILTIN_PAVGUSB:
18123 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, exp, target);
18125 case IX86_BUILTIN_PF2ID:
18126 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, exp, target, 0);
18128 case IX86_BUILTIN_PFACC:
18129 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, exp, target);
18131 case IX86_BUILTIN_PFADD:
18132 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, exp, target);
18134 case IX86_BUILTIN_PFCMPEQ:
18135 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, exp, target);
18137 case IX86_BUILTIN_PFCMPGE:
18138 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, exp, target);
18140 case IX86_BUILTIN_PFCMPGT:
18141 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, exp, target);
18143 case IX86_BUILTIN_PFMAX:
18144 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, exp, target);
18146 case IX86_BUILTIN_PFMIN:
18147 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, exp, target);
18149 case IX86_BUILTIN_PFMUL:
18150 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, exp, target);
18152 case IX86_BUILTIN_PFRCP:
18153 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, exp, target, 0);
18155 case IX86_BUILTIN_PFRCPIT1:
18156 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, exp, target);
18158 case IX86_BUILTIN_PFRCPIT2:
18159 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, exp, target);
18161 case IX86_BUILTIN_PFRSQIT1:
18162 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, exp, target);
18164 case IX86_BUILTIN_PFRSQRT:
18165 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, exp, target, 0);
18167 case IX86_BUILTIN_PFSUB:
18168 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, exp, target);
18170 case IX86_BUILTIN_PFSUBR:
18171 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, exp, target);
18173 case IX86_BUILTIN_PI2FD:
18174 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, exp, target, 0);
18176 case IX86_BUILTIN_PMULHRW:
18177 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, exp, target);
18179 case IX86_BUILTIN_PF2IW:
18180 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, exp, target, 0);
18182 case IX86_BUILTIN_PFNACC:
18183 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, exp, target);
18185 case IX86_BUILTIN_PFPNACC:
18186 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, exp, target);
18188 case IX86_BUILTIN_PI2FW:
18189 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, exp, target, 0);
18191 case IX86_BUILTIN_PSWAPDSI:
18192 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, exp, target, 0);
18194 case IX86_BUILTIN_PSWAPDSF:
18195 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, exp, target, 0);
18197 case IX86_BUILTIN_SQRTSD:
18198 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, exp, target);
18199 case IX86_BUILTIN_LOADUPD:
18200 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, exp, target, 1);
18201 case IX86_BUILTIN_STOREUPD:
18202 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, exp);
18204 case IX86_BUILTIN_MFENCE:
18205 emit_insn (gen_sse2_mfence ());
18206 return 0;
18207 case IX86_BUILTIN_LFENCE:
18208 emit_insn (gen_sse2_lfence ());
18209 return 0;
18211 case IX86_BUILTIN_CLFLUSH:
18212 arg0 = CALL_EXPR_ARG (exp, 0);
18213 op0 = expand_normal (arg0);
18214 icode = CODE_FOR_sse2_clflush;
18215 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
18216 op0 = copy_to_mode_reg (Pmode, op0);
18218 emit_insn (gen_sse2_clflush (op0));
18219 return 0;
18221 case IX86_BUILTIN_MOVNTPD:
18222 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, exp);
18223 case IX86_BUILTIN_MOVNTDQ:
18224 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, exp);
18225 case IX86_BUILTIN_MOVNTI:
18226 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, exp);
18228 case IX86_BUILTIN_LOADDQU:
18229 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, exp, target, 1);
18230 case IX86_BUILTIN_STOREDQU:
18231 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, exp);
18233 case IX86_BUILTIN_MONITOR:
18234 arg0 = CALL_EXPR_ARG (exp, 0);
18235 arg1 = CALL_EXPR_ARG (exp, 1);
18236 arg2 = CALL_EXPR_ARG (exp, 2);
18237 op0 = expand_normal (arg0);
18238 op1 = expand_normal (arg1);
18239 op2 = expand_normal (arg2);
18240 if (!REG_P (op0))
18241 op0 = copy_to_mode_reg (Pmode, op0);
18242 if (!REG_P (op1))
18243 op1 = copy_to_mode_reg (SImode, op1);
18244 if (!REG_P (op2))
18245 op2 = copy_to_mode_reg (SImode, op2);
18246 if (!TARGET_64BIT)
18247 emit_insn (gen_sse3_monitor (op0, op1, op2));
18248 else
18249 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
18250 return 0;
18252 case IX86_BUILTIN_MWAIT:
18253 arg0 = CALL_EXPR_ARG (exp, 0);
18254 arg1 = CALL_EXPR_ARG (exp, 1);
18255 op0 = expand_normal (arg0);
18256 op1 = expand_normal (arg1);
18257 if (!REG_P (op0))
18258 op0 = copy_to_mode_reg (SImode, op0);
18259 if (!REG_P (op1))
18260 op1 = copy_to_mode_reg (SImode, op1);
18261 emit_insn (gen_sse3_mwait (op0, op1));
18262 return 0;
18264 case IX86_BUILTIN_LDDQU:
18265 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, exp,
18266 target, 1);
18268 case IX86_BUILTIN_PALIGNR:
18269 case IX86_BUILTIN_PALIGNR128:
18270 if (fcode == IX86_BUILTIN_PALIGNR)
18272 icode = CODE_FOR_ssse3_palignrdi;
18273 mode = DImode;
18275 else
18277 icode = CODE_FOR_ssse3_palignrti;
18278 mode = V2DImode;
18280 arg0 = CALL_EXPR_ARG (exp, 0);
18281 arg1 = CALL_EXPR_ARG (exp, 1);
18282 arg2 = CALL_EXPR_ARG (exp, 2);
18283 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
18284 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
18285 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
18286 tmode = insn_data[icode].operand[0].mode;
18287 mode1 = insn_data[icode].operand[1].mode;
18288 mode2 = insn_data[icode].operand[2].mode;
18289 mode3 = insn_data[icode].operand[3].mode;
18291 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18293 op0 = copy_to_reg (op0);
18294 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
18296 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18298 op1 = copy_to_reg (op1);
18299 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
18301 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18303 error ("shift must be an immediate");
18304 return const0_rtx;
18306 target = gen_reg_rtx (mode);
18307 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
18308 op0, op1, op2);
18309 if (! pat)
18310 return 0;
18311 emit_insn (pat);
18312 return target;
18314 case IX86_BUILTIN_MOVNTSD:
18315 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, exp);
18317 case IX86_BUILTIN_MOVNTSS:
18318 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, exp);
18320 case IX86_BUILTIN_INSERTQ:
18321 case IX86_BUILTIN_EXTRQ:
18322 icode = (fcode == IX86_BUILTIN_EXTRQ
18323 ? CODE_FOR_sse4a_extrq
18324 : CODE_FOR_sse4a_insertq);
18325 arg0 = CALL_EXPR_ARG (exp, 0);
18326 arg1 = CALL_EXPR_ARG (exp, 1);
18327 op0 = expand_normal (arg0);
18328 op1 = expand_normal (arg1);
18329 tmode = insn_data[icode].operand[0].mode;
18330 mode1 = insn_data[icode].operand[1].mode;
18331 mode2 = insn_data[icode].operand[2].mode;
18332 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18333 op0 = copy_to_mode_reg (mode1, op0);
18334 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18335 op1 = copy_to_mode_reg (mode2, op1);
18336 if (optimize || target == 0
18337 || GET_MODE (target) != tmode
18338 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18339 target = gen_reg_rtx (tmode);
18340 pat = GEN_FCN (icode) (target, op0, op1);
18341 if (! pat)
18342 return NULL_RTX;
18343 emit_insn (pat);
18344 return target;
18346 case IX86_BUILTIN_EXTRQI:
18347 icode = CODE_FOR_sse4a_extrqi;
18348 arg0 = CALL_EXPR_ARG (exp, 0);
18349 arg1 = CALL_EXPR_ARG (exp, 1);
18350 arg2 = CALL_EXPR_ARG (exp, 2);
18351 op0 = expand_normal (arg0);
18352 op1 = expand_normal (arg1);
18353 op2 = expand_normal (arg2);
18354 tmode = insn_data[icode].operand[0].mode;
18355 mode1 = insn_data[icode].operand[1].mode;
18356 mode2 = insn_data[icode].operand[2].mode;
18357 mode3 = insn_data[icode].operand[3].mode;
18358 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18359 op0 = copy_to_mode_reg (mode1, op0);
18360 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18362 error ("index mask must be an immediate");
18363 return gen_reg_rtx (tmode);
18365 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18367 error ("length mask must be an immediate");
18368 return gen_reg_rtx (tmode);
18370 if (optimize || target == 0
18371 || GET_MODE (target) != tmode
18372 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18373 target = gen_reg_rtx (tmode);
18374 pat = GEN_FCN (icode) (target, op0, op1, op2);
18375 if (! pat)
18376 return NULL_RTX;
18377 emit_insn (pat);
18378 return target;
18380 case IX86_BUILTIN_INSERTQI:
18381 icode = CODE_FOR_sse4a_insertqi;
18382 arg0 = CALL_EXPR_ARG (exp, 0);
18383 arg1 = CALL_EXPR_ARG (exp, 1);
18384 arg2 = CALL_EXPR_ARG (exp, 2);
18385 arg3 = CALL_EXPR_ARG (exp, 3);
18386 op0 = expand_normal (arg0);
18387 op1 = expand_normal (arg1);
18388 op2 = expand_normal (arg2);
18389 op3 = expand_normal (arg3);
18390 tmode = insn_data[icode].operand[0].mode;
18391 mode1 = insn_data[icode].operand[1].mode;
18392 mode2 = insn_data[icode].operand[2].mode;
18393 mode3 = insn_data[icode].operand[3].mode;
18394 mode4 = insn_data[icode].operand[4].mode;
18396 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18397 op0 = copy_to_mode_reg (mode1, op0);
18399 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18400 op1 = copy_to_mode_reg (mode2, op1);
18402 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18404 error ("index mask must be an immediate");
18405 return gen_reg_rtx (tmode);
18407 if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
18409 error ("length mask must be an immediate");
18410 return gen_reg_rtx (tmode);
18412 if (optimize || target == 0
18413 || GET_MODE (target) != tmode
18414 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18415 target = gen_reg_rtx (tmode);
18416 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
18417 if (! pat)
18418 return NULL_RTX;
18419 emit_insn (pat);
18420 return target;
18422 case IX86_BUILTIN_VEC_INIT_V2SI:
18423 case IX86_BUILTIN_VEC_INIT_V4HI:
18424 case IX86_BUILTIN_VEC_INIT_V8QI:
18425 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
18427 case IX86_BUILTIN_VEC_EXT_V2DF:
18428 case IX86_BUILTIN_VEC_EXT_V2DI:
18429 case IX86_BUILTIN_VEC_EXT_V4SF:
18430 case IX86_BUILTIN_VEC_EXT_V4SI:
18431 case IX86_BUILTIN_VEC_EXT_V8HI:
18432 case IX86_BUILTIN_VEC_EXT_V2SI:
18433 case IX86_BUILTIN_VEC_EXT_V4HI:
18434 return ix86_expand_vec_ext_builtin (exp, target);
18436 case IX86_BUILTIN_VEC_SET_V8HI:
18437 case IX86_BUILTIN_VEC_SET_V4HI:
18438 return ix86_expand_vec_set_builtin (exp);
18440 default:
18441 break;
18444 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
18445 if (d->code == fcode)
18447 /* Compares are treated specially. */
18448 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
18449 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
18450 || d->icode == CODE_FOR_sse2_maskcmpv2df3
18451 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
18452 return ix86_expand_sse_compare (d, exp, target);
18454 return ix86_expand_binop_builtin (d->icode, exp, target);
18457 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
18458 if (d->code == fcode)
18459 return ix86_expand_unop_builtin (d->icode, exp, target, 0);
18461 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
18462 if (d->code == fcode)
18463 return ix86_expand_sse_comi (d, exp, target);
18465 gcc_unreachable ();
18468 /* Returns a function decl for a vectorized version of the builtin function
18469 with builtin function code FN and the result vector type TYPE, or NULL_TREE
18470 if it is not available. */
18472 static tree
18473 ix86_builtin_vectorized_function (enum built_in_function fn, tree type_out,
18474 tree type_in)
18476 enum machine_mode in_mode, out_mode;
18477 int in_n, out_n;
18479 if (TREE_CODE (type_out) != VECTOR_TYPE
18480 || TREE_CODE (type_in) != VECTOR_TYPE)
18481 return NULL_TREE;
18483 out_mode = TYPE_MODE (TREE_TYPE (type_out));
18484 out_n = TYPE_VECTOR_SUBPARTS (type_out);
18485 in_mode = TYPE_MODE (TREE_TYPE (type_in));
18486 in_n = TYPE_VECTOR_SUBPARTS (type_in);
18488 switch (fn)
18490 case BUILT_IN_SQRT:
18491 if (out_mode == DFmode && out_n == 2
18492 && in_mode == DFmode && in_n == 2)
18493 return ix86_builtins[IX86_BUILTIN_SQRTPD];
18494 return NULL_TREE;
18496 case BUILT_IN_SQRTF:
18497 if (out_mode == SFmode && out_n == 4
18498 && in_mode == SFmode && in_n == 4)
18499 return ix86_builtins[IX86_BUILTIN_SQRTPS];
18500 return NULL_TREE;
18502 case BUILT_IN_LRINTF:
18503 if (out_mode == SImode && out_n == 4
18504 && in_mode == SFmode && in_n == 4)
18505 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
18506 return NULL_TREE;
18508 default:
18512 return NULL_TREE;
18515 /* Returns a decl of a function that implements conversion of the
18516 input vector of type TYPE, or NULL_TREE if it is not available. */
18518 static tree
18519 ix86_builtin_conversion (enum tree_code code, tree type)
18521 if (TREE_CODE (type) != VECTOR_TYPE)
18522 return NULL_TREE;
18524 switch (code)
18526 case FLOAT_EXPR:
18527 switch (TYPE_MODE (type))
18529 case V4SImode:
18530 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
18531 default:
18532 return NULL_TREE;
18535 case FIX_TRUNC_EXPR:
18536 switch (TYPE_MODE (type))
18538 case V4SFmode:
18539 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
18540 default:
18541 return NULL_TREE;
18543 default:
18544 return NULL_TREE;
18549 /* Store OPERAND to the memory after reload is completed. This means
18550 that we can't easily use assign_stack_local. */
18552 ix86_force_to_memory (enum machine_mode mode, rtx operand)
18554 rtx result;
18556 gcc_assert (reload_completed);
18557 if (TARGET_RED_ZONE)
18559 result = gen_rtx_MEM (mode,
18560 gen_rtx_PLUS (Pmode,
18561 stack_pointer_rtx,
18562 GEN_INT (-RED_ZONE_SIZE)));
18563 emit_move_insn (result, operand);
18565 else if (!TARGET_RED_ZONE && TARGET_64BIT)
18567 switch (mode)
18569 case HImode:
18570 case SImode:
18571 operand = gen_lowpart (DImode, operand);
18572 /* FALLTHRU */
18573 case DImode:
18574 emit_insn (
18575 gen_rtx_SET (VOIDmode,
18576 gen_rtx_MEM (DImode,
18577 gen_rtx_PRE_DEC (DImode,
18578 stack_pointer_rtx)),
18579 operand));
18580 break;
18581 default:
18582 gcc_unreachable ();
18584 result = gen_rtx_MEM (mode, stack_pointer_rtx);
18586 else
18588 switch (mode)
18590 case DImode:
18592 rtx operands[2];
18593 split_di (&operand, 1, operands, operands + 1);
18594 emit_insn (
18595 gen_rtx_SET (VOIDmode,
18596 gen_rtx_MEM (SImode,
18597 gen_rtx_PRE_DEC (Pmode,
18598 stack_pointer_rtx)),
18599 operands[1]));
18600 emit_insn (
18601 gen_rtx_SET (VOIDmode,
18602 gen_rtx_MEM (SImode,
18603 gen_rtx_PRE_DEC (Pmode,
18604 stack_pointer_rtx)),
18605 operands[0]));
18607 break;
18608 case HImode:
18609 /* Store HImodes as SImodes. */
18610 operand = gen_lowpart (SImode, operand);
18611 /* FALLTHRU */
18612 case SImode:
18613 emit_insn (
18614 gen_rtx_SET (VOIDmode,
18615 gen_rtx_MEM (GET_MODE (operand),
18616 gen_rtx_PRE_DEC (SImode,
18617 stack_pointer_rtx)),
18618 operand));
18619 break;
18620 default:
18621 gcc_unreachable ();
18623 result = gen_rtx_MEM (mode, stack_pointer_rtx);
18625 return result;
18628 /* Free operand from the memory. */
18629 void
18630 ix86_free_from_memory (enum machine_mode mode)
18632 if (!TARGET_RED_ZONE)
18634 int size;
18636 if (mode == DImode || TARGET_64BIT)
18637 size = 8;
18638 else
18639 size = 4;
18640 /* Use LEA to deallocate stack space. In peephole2 it will be converted
18641 to pop or add instruction if registers are available. */
18642 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
18643 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
18644 GEN_INT (size))));
18648 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
18649 QImode must go into class Q_REGS.
18650 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
18651 movdf to do mem-to-mem moves through integer regs. */
18652 enum reg_class
18653 ix86_preferred_reload_class (rtx x, enum reg_class class)
18655 enum machine_mode mode = GET_MODE (x);
18657 /* We're only allowed to return a subclass of CLASS. Many of the
18658 following checks fail for NO_REGS, so eliminate that early. */
18659 if (class == NO_REGS)
18660 return NO_REGS;
18662 /* All classes can load zeros. */
18663 if (x == CONST0_RTX (mode))
18664 return class;
18666 /* Force constants into memory if we are loading a (nonzero) constant into
18667 an MMX or SSE register. This is because there are no MMX/SSE instructions
18668 to load from a constant. */
18669 if (CONSTANT_P (x)
18670 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
18671 return NO_REGS;
18673 /* Prefer SSE regs only, if we can use them for math. */
18674 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
18675 return SSE_CLASS_P (class) ? class : NO_REGS;
18677 /* Floating-point constants need more complex checks. */
18678 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
18680 /* General regs can load everything. */
18681 if (reg_class_subset_p (class, GENERAL_REGS))
18682 return class;
18684 /* Floats can load 0 and 1 plus some others. Note that we eliminated
18685 zero above. We only want to wind up preferring 80387 registers if
18686 we plan on doing computation with them. */
18687 if (TARGET_80387
18688 && standard_80387_constant_p (x))
18690 /* Limit class to non-sse. */
18691 if (class == FLOAT_SSE_REGS)
18692 return FLOAT_REGS;
18693 if (class == FP_TOP_SSE_REGS)
18694 return FP_TOP_REG;
18695 if (class == FP_SECOND_SSE_REGS)
18696 return FP_SECOND_REG;
18697 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
18698 return class;
18701 return NO_REGS;
18704 /* Generally when we see PLUS here, it's the function invariant
18705 (plus soft-fp const_int). Which can only be computed into general
18706 regs. */
18707 if (GET_CODE (x) == PLUS)
18708 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
18710 /* QImode constants are easy to load, but non-constant QImode data
18711 must go into Q_REGS. */
18712 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
18714 if (reg_class_subset_p (class, Q_REGS))
18715 return class;
18716 if (reg_class_subset_p (Q_REGS, class))
18717 return Q_REGS;
18718 return NO_REGS;
18721 return class;
18724 /* Discourage putting floating-point values in SSE registers unless
18725 SSE math is being used, and likewise for the 387 registers. */
18726 enum reg_class
18727 ix86_preferred_output_reload_class (rtx x, enum reg_class class)
18729 enum machine_mode mode = GET_MODE (x);
18731 /* Restrict the output reload class to the register bank that we are doing
18732 math on. If we would like not to return a subset of CLASS, reject this
18733 alternative: if reload cannot do this, it will still use its choice. */
18734 mode = GET_MODE (x);
18735 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
18736 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
18738 if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
18740 if (class == FP_TOP_SSE_REGS)
18741 return FP_TOP_REG;
18742 else if (class == FP_SECOND_SSE_REGS)
18743 return FP_SECOND_REG;
18744 else
18745 return FLOAT_CLASS_P (class) ? class : NO_REGS;
18748 return class;
18751 /* If we are copying between general and FP registers, we need a memory
18752 location. The same is true for SSE and MMX registers.
18754 The macro can't work reliably when one of the CLASSES is class containing
18755 registers from multiple units (SSE, MMX, integer). We avoid this by never
18756 combining those units in single alternative in the machine description.
18757 Ensure that this constraint holds to avoid unexpected surprises.
18759 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
18760 enforce these sanity checks. */
18763 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
18764 enum machine_mode mode, int strict)
18766 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
18767 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
18768 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
18769 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
18770 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
18771 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
18773 gcc_assert (!strict);
18774 return true;
18777 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
18778 return true;
18780 /* ??? This is a lie. We do have moves between mmx/general, and for
18781 mmx/sse2. But by saying we need secondary memory we discourage the
18782 register allocator from using the mmx registers unless needed. */
18783 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
18784 return true;
18786 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
18788 /* SSE1 doesn't have any direct moves from other classes. */
18789 if (!TARGET_SSE2)
18790 return true;
18792 /* If the target says that inter-unit moves are more expensive
18793 than moving through memory, then don't generate them. */
18794 if (!TARGET_INTER_UNIT_MOVES)
18795 return true;
18797 /* Between SSE and general, we have moves no larger than word size. */
18798 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
18799 return true;
18802 return false;
18805 /* Return true if the registers in CLASS cannot represent the change from
18806 modes FROM to TO. */
18808 bool
18809 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
18810 enum reg_class class)
18812 if (from == to)
18813 return false;
18815 /* x87 registers can't do subreg at all, as all values are reformatted
18816 to extended precision. */
18817 if (MAYBE_FLOAT_CLASS_P (class))
18818 return true;
18820 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
18822 /* Vector registers do not support QI or HImode loads. If we don't
18823 disallow a change to these modes, reload will assume it's ok to
18824 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
18825 the vec_dupv4hi pattern. */
18826 if (GET_MODE_SIZE (from) < 4)
18827 return true;
18829 /* Vector registers do not support subreg with nonzero offsets, which
18830 are otherwise valid for integer registers. Since we can't see
18831 whether we have a nonzero offset from here, prohibit all
18832 nonparadoxical subregs changing size. */
18833 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
18834 return true;
18837 return false;
18840 /* Return the cost of moving data from a register in class CLASS1 to
18841 one in class CLASS2.
18843 It is not required that the cost always equal 2 when FROM is the same as TO;
18844 on some machines it is expensive to move between registers if they are not
18845 general registers. */
18848 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
18849 enum reg_class class2)
18851 /* In case we require secondary memory, compute cost of the store followed
18852 by load. In order to avoid bad register allocation choices, we need
18853 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
18855 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
18857 int cost = 1;
18859 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
18860 MEMORY_MOVE_COST (mode, class1, 1));
18861 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
18862 MEMORY_MOVE_COST (mode, class2, 1));
18864 /* In case of copying from general_purpose_register we may emit multiple
18865 stores followed by single load causing memory size mismatch stall.
18866 Count this as arbitrarily high cost of 20. */
18867 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
18868 cost += 20;
18870 /* In the case of FP/MMX moves, the registers actually overlap, and we
18871 have to switch modes in order to treat them differently. */
18872 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
18873 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
18874 cost += 20;
18876 return cost;
18879 /* Moves between SSE/MMX and integer unit are expensive. */
18880 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
18881 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
18882 return ix86_cost->mmxsse_to_integer;
18883 if (MAYBE_FLOAT_CLASS_P (class1))
18884 return ix86_cost->fp_move;
18885 if (MAYBE_SSE_CLASS_P (class1))
18886 return ix86_cost->sse_move;
18887 if (MAYBE_MMX_CLASS_P (class1))
18888 return ix86_cost->mmx_move;
18889 return 2;
18892 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
18894 bool
18895 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
18897 /* Flags and only flags can only hold CCmode values. */
18898 if (CC_REGNO_P (regno))
18899 return GET_MODE_CLASS (mode) == MODE_CC;
18900 if (GET_MODE_CLASS (mode) == MODE_CC
18901 || GET_MODE_CLASS (mode) == MODE_RANDOM
18902 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
18903 return 0;
18904 if (FP_REGNO_P (regno))
18905 return VALID_FP_MODE_P (mode);
18906 if (SSE_REGNO_P (regno))
18908 /* We implement the move patterns for all vector modes into and
18909 out of SSE registers, even when no operation instructions
18910 are available. */
18911 return (VALID_SSE_REG_MODE (mode)
18912 || VALID_SSE2_REG_MODE (mode)
18913 || VALID_MMX_REG_MODE (mode)
18914 || VALID_MMX_REG_MODE_3DNOW (mode));
18916 if (MMX_REGNO_P (regno))
18918 /* We implement the move patterns for 3DNOW modes even in MMX mode,
18919 so if the register is available at all, then we can move data of
18920 the given mode into or out of it. */
18921 return (VALID_MMX_REG_MODE (mode)
18922 || VALID_MMX_REG_MODE_3DNOW (mode));
18925 if (mode == QImode)
18927 /* Take care for QImode values - they can be in non-QI regs,
18928 but then they do cause partial register stalls. */
18929 if (regno < 4 || TARGET_64BIT)
18930 return 1;
18931 if (!TARGET_PARTIAL_REG_STALL)
18932 return 1;
18933 return reload_in_progress || reload_completed;
18935 /* We handle both integer and floats in the general purpose registers. */
18936 else if (VALID_INT_MODE_P (mode))
18937 return 1;
18938 else if (VALID_FP_MODE_P (mode))
18939 return 1;
18940 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
18941 on to use that value in smaller contexts, this can easily force a
18942 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
18943 supporting DImode, allow it. */
18944 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
18945 return 1;
18947 return 0;
18950 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
18951 tieable integer mode. */
18953 static bool
18954 ix86_tieable_integer_mode_p (enum machine_mode mode)
18956 switch (mode)
18958 case HImode:
18959 case SImode:
18960 return true;
18962 case QImode:
18963 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
18965 case DImode:
18966 return TARGET_64BIT;
18968 default:
18969 return false;
18973 /* Return true if MODE1 is accessible in a register that can hold MODE2
18974 without copying. That is, all register classes that can hold MODE2
18975 can also hold MODE1. */
18977 bool
18978 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
18980 if (mode1 == mode2)
18981 return true;
18983 if (ix86_tieable_integer_mode_p (mode1)
18984 && ix86_tieable_integer_mode_p (mode2))
18985 return true;
18987 /* MODE2 being XFmode implies fp stack or general regs, which means we
18988 can tie any smaller floating point modes to it. Note that we do not
18989 tie this with TFmode. */
18990 if (mode2 == XFmode)
18991 return mode1 == SFmode || mode1 == DFmode;
18993 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
18994 that we can tie it with SFmode. */
18995 if (mode2 == DFmode)
18996 return mode1 == SFmode;
18998 /* If MODE2 is only appropriate for an SSE register, then tie with
18999 any other mode acceptable to SSE registers. */
19000 if (GET_MODE_SIZE (mode2) == 16
19001 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
19002 return (GET_MODE_SIZE (mode1) == 16
19003 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
19005 /* If MODE2 is appropriate for an MMX register, then tie
19006 with any other mode acceptable to MMX registers. */
19007 if (GET_MODE_SIZE (mode2) == 8
19008 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
19009 return (GET_MODE_SIZE (mode1) == 8
19010 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
19012 return false;
19015 /* Return the cost of moving data of mode M between a
19016 register and memory. A value of 2 is the default; this cost is
19017 relative to those in `REGISTER_MOVE_COST'.
19019 If moving between registers and memory is more expensive than
19020 between two registers, you should define this macro to express the
19021 relative cost.
19023 Model also increased moving costs of QImode registers in non
19024 Q_REGS classes.
19027 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
19029 if (FLOAT_CLASS_P (class))
19031 int index;
19032 switch (mode)
19034 case SFmode:
19035 index = 0;
19036 break;
19037 case DFmode:
19038 index = 1;
19039 break;
19040 case XFmode:
19041 index = 2;
19042 break;
19043 default:
19044 return 100;
19046 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
19048 if (SSE_CLASS_P (class))
19050 int index;
19051 switch (GET_MODE_SIZE (mode))
19053 case 4:
19054 index = 0;
19055 break;
19056 case 8:
19057 index = 1;
19058 break;
19059 case 16:
19060 index = 2;
19061 break;
19062 default:
19063 return 100;
19065 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
19067 if (MMX_CLASS_P (class))
19069 int index;
19070 switch (GET_MODE_SIZE (mode))
19072 case 4:
19073 index = 0;
19074 break;
19075 case 8:
19076 index = 1;
19077 break;
19078 default:
19079 return 100;
19081 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
19083 switch (GET_MODE_SIZE (mode))
19085 case 1:
19086 if (in)
19087 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
19088 : ix86_cost->movzbl_load);
19089 else
19090 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
19091 : ix86_cost->int_store[0] + 4);
19092 break;
19093 case 2:
19094 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
19095 default:
19096 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
19097 if (mode == TFmode)
19098 mode = XFmode;
19099 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
19100 * (((int) GET_MODE_SIZE (mode)
19101 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
19105 /* Compute a (partial) cost for rtx X. Return true if the complete
19106 cost has been computed, and false if subexpressions should be
19107 scanned. In either case, *TOTAL contains the cost result. */
19109 static bool
19110 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
19112 enum machine_mode mode = GET_MODE (x);
19114 switch (code)
19116 case CONST_INT:
19117 case CONST:
19118 case LABEL_REF:
19119 case SYMBOL_REF:
19120 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
19121 *total = 3;
19122 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
19123 *total = 2;
19124 else if (flag_pic && SYMBOLIC_CONST (x)
19125 && (!TARGET_64BIT
19126 || (!GET_CODE (x) != LABEL_REF
19127 && (GET_CODE (x) != SYMBOL_REF
19128 || !SYMBOL_REF_LOCAL_P (x)))))
19129 *total = 1;
19130 else
19131 *total = 0;
19132 return true;
19134 case CONST_DOUBLE:
19135 if (mode == VOIDmode)
19136 *total = 0;
19137 else
19138 switch (standard_80387_constant_p (x))
19140 case 1: /* 0.0 */
19141 *total = 1;
19142 break;
19143 default: /* Other constants */
19144 *total = 2;
19145 break;
19146 case 0:
19147 case -1:
19148 /* Start with (MEM (SYMBOL_REF)), since that's where
19149 it'll probably end up. Add a penalty for size. */
19150 *total = (COSTS_N_INSNS (1)
19151 + (flag_pic != 0 && !TARGET_64BIT)
19152 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
19153 break;
19155 return true;
19157 case ZERO_EXTEND:
19158 /* The zero extensions is often completely free on x86_64, so make
19159 it as cheap as possible. */
19160 if (TARGET_64BIT && mode == DImode
19161 && GET_MODE (XEXP (x, 0)) == SImode)
19162 *total = 1;
19163 else if (TARGET_ZERO_EXTEND_WITH_AND)
19164 *total = ix86_cost->add;
19165 else
19166 *total = ix86_cost->movzx;
19167 return false;
19169 case SIGN_EXTEND:
19170 *total = ix86_cost->movsx;
19171 return false;
19173 case ASHIFT:
19174 if (CONST_INT_P (XEXP (x, 1))
19175 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
19177 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
19178 if (value == 1)
19180 *total = ix86_cost->add;
19181 return false;
19183 if ((value == 2 || value == 3)
19184 && ix86_cost->lea <= ix86_cost->shift_const)
19186 *total = ix86_cost->lea;
19187 return false;
19190 /* FALLTHRU */
19192 case ROTATE:
19193 case ASHIFTRT:
19194 case LSHIFTRT:
19195 case ROTATERT:
19196 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
19198 if (CONST_INT_P (XEXP (x, 1)))
19200 if (INTVAL (XEXP (x, 1)) > 32)
19201 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
19202 else
19203 *total = ix86_cost->shift_const * 2;
19205 else
19207 if (GET_CODE (XEXP (x, 1)) == AND)
19208 *total = ix86_cost->shift_var * 2;
19209 else
19210 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
19213 else
19215 if (CONST_INT_P (XEXP (x, 1)))
19216 *total = ix86_cost->shift_const;
19217 else
19218 *total = ix86_cost->shift_var;
19220 return false;
19222 case MULT:
19223 if (FLOAT_MODE_P (mode))
19225 *total = ix86_cost->fmul;
19226 return false;
19228 else
19230 rtx op0 = XEXP (x, 0);
19231 rtx op1 = XEXP (x, 1);
19232 int nbits;
19233 if (CONST_INT_P (XEXP (x, 1)))
19235 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
19236 for (nbits = 0; value != 0; value &= value - 1)
19237 nbits++;
19239 else
19240 /* This is arbitrary. */
19241 nbits = 7;
19243 /* Compute costs correctly for widening multiplication. */
19244 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
19245 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
19246 == GET_MODE_SIZE (mode))
19248 int is_mulwiden = 0;
19249 enum machine_mode inner_mode = GET_MODE (op0);
19251 if (GET_CODE (op0) == GET_CODE (op1))
19252 is_mulwiden = 1, op1 = XEXP (op1, 0);
19253 else if (CONST_INT_P (op1))
19255 if (GET_CODE (op0) == SIGN_EXTEND)
19256 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
19257 == INTVAL (op1);
19258 else
19259 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
19262 if (is_mulwiden)
19263 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
19266 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
19267 + nbits * ix86_cost->mult_bit
19268 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
19270 return true;
19273 case DIV:
19274 case UDIV:
19275 case MOD:
19276 case UMOD:
19277 if (FLOAT_MODE_P (mode))
19278 *total = ix86_cost->fdiv;
19279 else
19280 *total = ix86_cost->divide[MODE_INDEX (mode)];
19281 return false;
19283 case PLUS:
19284 if (FLOAT_MODE_P (mode))
19285 *total = ix86_cost->fadd;
19286 else if (GET_MODE_CLASS (mode) == MODE_INT
19287 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
19289 if (GET_CODE (XEXP (x, 0)) == PLUS
19290 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
19291 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
19292 && CONSTANT_P (XEXP (x, 1)))
19294 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
19295 if (val == 2 || val == 4 || val == 8)
19297 *total = ix86_cost->lea;
19298 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
19299 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
19300 outer_code);
19301 *total += rtx_cost (XEXP (x, 1), outer_code);
19302 return true;
19305 else if (GET_CODE (XEXP (x, 0)) == MULT
19306 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
19308 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
19309 if (val == 2 || val == 4 || val == 8)
19311 *total = ix86_cost->lea;
19312 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
19313 *total += rtx_cost (XEXP (x, 1), outer_code);
19314 return true;
19317 else if (GET_CODE (XEXP (x, 0)) == PLUS)
19319 *total = ix86_cost->lea;
19320 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
19321 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
19322 *total += rtx_cost (XEXP (x, 1), outer_code);
19323 return true;
19326 /* FALLTHRU */
19328 case MINUS:
19329 if (FLOAT_MODE_P (mode))
19331 *total = ix86_cost->fadd;
19332 return false;
19334 /* FALLTHRU */
19336 case AND:
19337 case IOR:
19338 case XOR:
19339 if (!TARGET_64BIT && mode == DImode)
19341 *total = (ix86_cost->add * 2
19342 + (rtx_cost (XEXP (x, 0), outer_code)
19343 << (GET_MODE (XEXP (x, 0)) != DImode))
19344 + (rtx_cost (XEXP (x, 1), outer_code)
19345 << (GET_MODE (XEXP (x, 1)) != DImode)));
19346 return true;
19348 /* FALLTHRU */
19350 case NEG:
19351 if (FLOAT_MODE_P (mode))
19353 *total = ix86_cost->fchs;
19354 return false;
19356 /* FALLTHRU */
19358 case NOT:
19359 if (!TARGET_64BIT && mode == DImode)
19360 *total = ix86_cost->add * 2;
19361 else
19362 *total = ix86_cost->add;
19363 return false;
19365 case COMPARE:
19366 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
19367 && XEXP (XEXP (x, 0), 1) == const1_rtx
19368 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
19369 && XEXP (x, 1) == const0_rtx)
19371 /* This kind of construct is implemented using test[bwl].
19372 Treat it as if we had an AND. */
19373 *total = (ix86_cost->add
19374 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
19375 + rtx_cost (const1_rtx, outer_code));
19376 return true;
19378 return false;
19380 case FLOAT_EXTEND:
19381 if (!TARGET_SSE_MATH
19382 || mode == XFmode
19383 || (mode == DFmode && !TARGET_SSE2))
19384 *total = 0;
19385 return false;
19387 case ABS:
19388 if (FLOAT_MODE_P (mode))
19389 *total = ix86_cost->fabs;
19390 return false;
19392 case SQRT:
19393 if (FLOAT_MODE_P (mode))
19394 *total = ix86_cost->fsqrt;
19395 return false;
19397 case UNSPEC:
19398 if (XINT (x, 1) == UNSPEC_TP)
19399 *total = 0;
19400 return false;
19402 default:
19403 return false;
19407 #if TARGET_MACHO
19409 static int current_machopic_label_num;
19411 /* Given a symbol name and its associated stub, write out the
19412 definition of the stub. */
19414 void
19415 machopic_output_stub (FILE *file, const char *symb, const char *stub)
19417 unsigned int length;
19418 char *binder_name, *symbol_name, lazy_ptr_name[32];
19419 int label = ++current_machopic_label_num;
19421 /* For 64-bit we shouldn't get here. */
19422 gcc_assert (!TARGET_64BIT);
19424 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
19425 symb = (*targetm.strip_name_encoding) (symb);
19427 length = strlen (stub);
19428 binder_name = alloca (length + 32);
19429 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
19431 length = strlen (symb);
19432 symbol_name = alloca (length + 32);
19433 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
19435 sprintf (lazy_ptr_name, "L%d$lz", label);
19437 if (MACHOPIC_PURE)
19438 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
19439 else
19440 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
19442 fprintf (file, "%s:\n", stub);
19443 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19445 if (MACHOPIC_PURE)
19447 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
19448 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
19449 fprintf (file, "\tjmp\t*%%edx\n");
19451 else
19452 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
19454 fprintf (file, "%s:\n", binder_name);
19456 if (MACHOPIC_PURE)
19458 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
19459 fprintf (file, "\tpushl\t%%eax\n");
19461 else
19462 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
19464 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
19466 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
19467 fprintf (file, "%s:\n", lazy_ptr_name);
19468 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19469 fprintf (file, "\t.long %s\n", binder_name);
19472 void
19473 darwin_x86_file_end (void)
19475 darwin_file_end ();
19476 ix86_file_end ();
19478 #endif /* TARGET_MACHO */
19480 /* Order the registers for register allocator. */
19482 void
19483 x86_order_regs_for_local_alloc (void)
19485 int pos = 0;
19486 int i;
19488 /* First allocate the local general purpose registers. */
19489 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
19490 if (GENERAL_REGNO_P (i) && call_used_regs[i])
19491 reg_alloc_order [pos++] = i;
19493 /* Global general purpose registers. */
19494 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
19495 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
19496 reg_alloc_order [pos++] = i;
19498 /* x87 registers come first in case we are doing FP math
19499 using them. */
19500 if (!TARGET_SSE_MATH)
19501 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
19502 reg_alloc_order [pos++] = i;
19504 /* SSE registers. */
19505 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
19506 reg_alloc_order [pos++] = i;
19507 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
19508 reg_alloc_order [pos++] = i;
19510 /* x87 registers. */
19511 if (TARGET_SSE_MATH)
19512 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
19513 reg_alloc_order [pos++] = i;
19515 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
19516 reg_alloc_order [pos++] = i;
19518 /* Initialize the rest of array as we do not allocate some registers
19519 at all. */
19520 while (pos < FIRST_PSEUDO_REGISTER)
19521 reg_alloc_order [pos++] = 0;
19524 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
19525 struct attribute_spec.handler. */
19526 static tree
19527 ix86_handle_struct_attribute (tree *node, tree name,
19528 tree args ATTRIBUTE_UNUSED,
19529 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
19531 tree *type = NULL;
19532 if (DECL_P (*node))
19534 if (TREE_CODE (*node) == TYPE_DECL)
19535 type = &TREE_TYPE (*node);
19537 else
19538 type = node;
19540 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
19541 || TREE_CODE (*type) == UNION_TYPE)))
19543 warning (OPT_Wattributes, "%qs attribute ignored",
19544 IDENTIFIER_POINTER (name));
19545 *no_add_attrs = true;
19548 else if ((is_attribute_p ("ms_struct", name)
19549 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
19550 || ((is_attribute_p ("gcc_struct", name)
19551 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
19553 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
19554 IDENTIFIER_POINTER (name));
19555 *no_add_attrs = true;
19558 return NULL_TREE;
19561 static bool
19562 ix86_ms_bitfield_layout_p (tree record_type)
19564 return (TARGET_MS_BITFIELD_LAYOUT &&
19565 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
19566 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
19569 /* Returns an expression indicating where the this parameter is
19570 located on entry to the FUNCTION. */
19572 static rtx
19573 x86_this_parameter (tree function)
19575 tree type = TREE_TYPE (function);
19577 if (TARGET_64BIT)
19579 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
19580 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
19583 if (ix86_function_regparm (type, function) > 0)
19585 tree parm;
19587 parm = TYPE_ARG_TYPES (type);
19588 /* Figure out whether or not the function has a variable number of
19589 arguments. */
19590 for (; parm; parm = TREE_CHAIN (parm))
19591 if (TREE_VALUE (parm) == void_type_node)
19592 break;
19593 /* If not, the this parameter is in the first argument. */
19594 if (parm)
19596 int regno = 0;
19597 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
19598 regno = 2;
19599 return gen_rtx_REG (SImode, regno);
19603 if (aggregate_value_p (TREE_TYPE (type), type))
19604 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
19605 else
19606 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
19609 /* Determine whether x86_output_mi_thunk can succeed. */
19611 static bool
19612 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
19613 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
19614 HOST_WIDE_INT vcall_offset, tree function)
19616 /* 64-bit can handle anything. */
19617 if (TARGET_64BIT)
19618 return true;
19620 /* For 32-bit, everything's fine if we have one free register. */
19621 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
19622 return true;
19624 /* Need a free register for vcall_offset. */
19625 if (vcall_offset)
19626 return false;
19628 /* Need a free register for GOT references. */
19629 if (flag_pic && !(*targetm.binds_local_p) (function))
19630 return false;
19632 /* Otherwise ok. */
19633 return true;
19636 /* Output the assembler code for a thunk function. THUNK_DECL is the
19637 declaration for the thunk function itself, FUNCTION is the decl for
19638 the target function. DELTA is an immediate constant offset to be
19639 added to THIS. If VCALL_OFFSET is nonzero, the word at
19640 *(*this + vcall_offset) should be added to THIS. */
19642 static void
19643 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
19644 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
19645 HOST_WIDE_INT vcall_offset, tree function)
19647 rtx xops[3];
19648 rtx this = x86_this_parameter (function);
19649 rtx this_reg, tmp;
19651 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
19652 pull it in now and let DELTA benefit. */
19653 if (REG_P (this))
19654 this_reg = this;
19655 else if (vcall_offset)
19657 /* Put the this parameter into %eax. */
19658 xops[0] = this;
19659 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
19660 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19662 else
19663 this_reg = NULL_RTX;
19665 /* Adjust the this parameter by a fixed constant. */
19666 if (delta)
19668 xops[0] = GEN_INT (delta);
19669 xops[1] = this_reg ? this_reg : this;
19670 if (TARGET_64BIT)
19672 if (!x86_64_general_operand (xops[0], DImode))
19674 tmp = gen_rtx_REG (DImode, R10_REG);
19675 xops[1] = tmp;
19676 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
19677 xops[0] = tmp;
19678 xops[1] = this;
19680 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
19682 else
19683 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
19686 /* Adjust the this parameter by a value stored in the vtable. */
19687 if (vcall_offset)
19689 if (TARGET_64BIT)
19690 tmp = gen_rtx_REG (DImode, R10_REG);
19691 else
19693 int tmp_regno = 2 /* ECX */;
19694 if (lookup_attribute ("fastcall",
19695 TYPE_ATTRIBUTES (TREE_TYPE (function))))
19696 tmp_regno = 0 /* EAX */;
19697 tmp = gen_rtx_REG (SImode, tmp_regno);
19700 xops[0] = gen_rtx_MEM (Pmode, this_reg);
19701 xops[1] = tmp;
19702 if (TARGET_64BIT)
19703 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
19704 else
19705 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19707 /* Adjust the this parameter. */
19708 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
19709 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
19711 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
19712 xops[0] = GEN_INT (vcall_offset);
19713 xops[1] = tmp2;
19714 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
19715 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
19717 xops[1] = this_reg;
19718 if (TARGET_64BIT)
19719 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
19720 else
19721 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
19724 /* If necessary, drop THIS back to its stack slot. */
19725 if (this_reg && this_reg != this)
19727 xops[0] = this_reg;
19728 xops[1] = this;
19729 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19732 xops[0] = XEXP (DECL_RTL (function), 0);
19733 if (TARGET_64BIT)
19735 if (!flag_pic || (*targetm.binds_local_p) (function))
19736 output_asm_insn ("jmp\t%P0", xops);
19737 else
19739 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
19740 tmp = gen_rtx_CONST (Pmode, tmp);
19741 tmp = gen_rtx_MEM (QImode, tmp);
19742 xops[0] = tmp;
19743 output_asm_insn ("jmp\t%A0", xops);
19746 else
19748 if (!flag_pic || (*targetm.binds_local_p) (function))
19749 output_asm_insn ("jmp\t%P0", xops);
19750 else
19751 #if TARGET_MACHO
19752 if (TARGET_MACHO)
19754 rtx sym_ref = XEXP (DECL_RTL (function), 0);
19755 tmp = (gen_rtx_SYMBOL_REF
19756 (Pmode,
19757 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
19758 tmp = gen_rtx_MEM (QImode, tmp);
19759 xops[0] = tmp;
19760 output_asm_insn ("jmp\t%0", xops);
19762 else
19763 #endif /* TARGET_MACHO */
19765 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
19766 output_set_got (tmp, NULL_RTX);
19768 xops[1] = tmp;
19769 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
19770 output_asm_insn ("jmp\t{*}%1", xops);
19775 static void
19776 x86_file_start (void)
19778 default_file_start ();
19779 #if TARGET_MACHO
19780 darwin_file_start ();
19781 #endif
19782 if (X86_FILE_START_VERSION_DIRECTIVE)
19783 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
19784 if (X86_FILE_START_FLTUSED)
19785 fputs ("\t.global\t__fltused\n", asm_out_file);
19786 if (ix86_asm_dialect == ASM_INTEL)
19787 fputs ("\t.intel_syntax\n", asm_out_file);
19791 x86_field_alignment (tree field, int computed)
19793 enum machine_mode mode;
19794 tree type = TREE_TYPE (field);
19796 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
19797 return computed;
19798 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
19799 ? get_inner_array_type (type) : type);
19800 if (mode == DFmode || mode == DCmode
19801 || GET_MODE_CLASS (mode) == MODE_INT
19802 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
19803 return MIN (32, computed);
19804 return computed;
19807 /* Output assembler code to FILE to increment profiler label # LABELNO
19808 for profiling a function entry. */
19809 void
19810 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
19812 if (TARGET_64BIT)
19813 if (flag_pic)
19815 #ifndef NO_PROFILE_COUNTERS
19816 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
19817 #endif
19818 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
19820 else
19822 #ifndef NO_PROFILE_COUNTERS
19823 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
19824 #endif
19825 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
19827 else if (flag_pic)
19829 #ifndef NO_PROFILE_COUNTERS
19830 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
19831 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
19832 #endif
19833 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
19835 else
19837 #ifndef NO_PROFILE_COUNTERS
19838 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
19839 PROFILE_COUNT_REGISTER);
19840 #endif
19841 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
19845 /* We don't have exact information about the insn sizes, but we may assume
19846 quite safely that we are informed about all 1 byte insns and memory
19847 address sizes. This is enough to eliminate unnecessary padding in
19848 99% of cases. */
19850 static int
19851 min_insn_size (rtx insn)
19853 int l = 0;
19855 if (!INSN_P (insn) || !active_insn_p (insn))
19856 return 0;
19858 /* Discard alignments we've emit and jump instructions. */
19859 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
19860 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
19861 return 0;
19862 if (JUMP_P (insn)
19863 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
19864 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
19865 return 0;
19867 /* Important case - calls are always 5 bytes.
19868 It is common to have many calls in the row. */
19869 if (CALL_P (insn)
19870 && symbolic_reference_mentioned_p (PATTERN (insn))
19871 && !SIBLING_CALL_P (insn))
19872 return 5;
19873 if (get_attr_length (insn) <= 1)
19874 return 1;
19876 /* For normal instructions we may rely on the sizes of addresses
19877 and the presence of symbol to require 4 bytes of encoding.
19878 This is not the case for jumps where references are PC relative. */
19879 if (!JUMP_P (insn))
19881 l = get_attr_length_address (insn);
19882 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
19883 l = 4;
19885 if (l)
19886 return 1+l;
19887 else
19888 return 2;
19891 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
19892 window. */
19894 static void
19895 ix86_avoid_jump_misspredicts (void)
19897 rtx insn, start = get_insns ();
19898 int nbytes = 0, njumps = 0;
19899 int isjump = 0;
19901 /* Look for all minimal intervals of instructions containing 4 jumps.
19902 The intervals are bounded by START and INSN. NBYTES is the total
19903 size of instructions in the interval including INSN and not including
19904 START. When the NBYTES is smaller than 16 bytes, it is possible
19905 that the end of START and INSN ends up in the same 16byte page.
19907 The smallest offset in the page INSN can start is the case where START
19908 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
19909 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
19911 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
19914 nbytes += min_insn_size (insn);
19915 if (dump_file)
19916 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
19917 INSN_UID (insn), min_insn_size (insn));
19918 if ((JUMP_P (insn)
19919 && GET_CODE (PATTERN (insn)) != ADDR_VEC
19920 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
19921 || CALL_P (insn))
19922 njumps++;
19923 else
19924 continue;
19926 while (njumps > 3)
19928 start = NEXT_INSN (start);
19929 if ((JUMP_P (start)
19930 && GET_CODE (PATTERN (start)) != ADDR_VEC
19931 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
19932 || CALL_P (start))
19933 njumps--, isjump = 1;
19934 else
19935 isjump = 0;
19936 nbytes -= min_insn_size (start);
19938 gcc_assert (njumps >= 0);
19939 if (dump_file)
19940 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
19941 INSN_UID (start), INSN_UID (insn), nbytes);
19943 if (njumps == 3 && isjump && nbytes < 16)
19945 int padsize = 15 - nbytes + min_insn_size (insn);
19947 if (dump_file)
19948 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
19949 INSN_UID (insn), padsize);
19950 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
19955 /* AMD Athlon works faster
19956 when RET is not destination of conditional jump or directly preceded
19957 by other jump instruction. We avoid the penalty by inserting NOP just
19958 before the RET instructions in such cases. */
19959 static void
19960 ix86_pad_returns (void)
19962 edge e;
19963 edge_iterator ei;
19965 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
19967 basic_block bb = e->src;
19968 rtx ret = BB_END (bb);
19969 rtx prev;
19970 bool replace = false;
19972 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
19973 || !maybe_hot_bb_p (bb))
19974 continue;
19975 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
19976 if (active_insn_p (prev) || LABEL_P (prev))
19977 break;
19978 if (prev && LABEL_P (prev))
19980 edge e;
19981 edge_iterator ei;
19983 FOR_EACH_EDGE (e, ei, bb->preds)
19984 if (EDGE_FREQUENCY (e) && e->src->index >= 0
19985 && !(e->flags & EDGE_FALLTHRU))
19986 replace = true;
19988 if (!replace)
19990 prev = prev_active_insn (ret);
19991 if (prev
19992 && ((JUMP_P (prev) && any_condjump_p (prev))
19993 || CALL_P (prev)))
19994 replace = true;
19995 /* Empty functions get branch mispredict even when the jump destination
19996 is not visible to us. */
19997 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
19998 replace = true;
20000 if (replace)
20002 emit_insn_before (gen_return_internal_long (), ret);
20003 delete_insn (ret);
20008 /* Implement machine specific optimizations. We implement padding of returns
20009 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
20010 static void
20011 ix86_reorg (void)
20013 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
20014 ix86_pad_returns ();
20015 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
20016 ix86_avoid_jump_misspredicts ();
20019 /* Return nonzero when QImode register that must be represented via REX prefix
20020 is used. */
20021 bool
20022 x86_extended_QIreg_mentioned_p (rtx insn)
20024 int i;
20025 extract_insn_cached (insn);
20026 for (i = 0; i < recog_data.n_operands; i++)
20027 if (REG_P (recog_data.operand[i])
20028 && REGNO (recog_data.operand[i]) >= 4)
20029 return true;
20030 return false;
20033 /* Return nonzero when P points to register encoded via REX prefix.
20034 Called via for_each_rtx. */
20035 static int
20036 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
20038 unsigned int regno;
20039 if (!REG_P (*p))
20040 return 0;
20041 regno = REGNO (*p);
20042 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
20045 /* Return true when INSN mentions register that must be encoded using REX
20046 prefix. */
20047 bool
20048 x86_extended_reg_mentioned_p (rtx insn)
20050 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
20053 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
20054 optabs would emit if we didn't have TFmode patterns. */
20056 void
20057 x86_emit_floatuns (rtx operands[2])
20059 rtx neglab, donelab, i0, i1, f0, in, out;
20060 enum machine_mode mode, inmode;
20062 inmode = GET_MODE (operands[1]);
20063 gcc_assert (inmode == SImode || inmode == DImode);
20065 out = operands[0];
20066 in = force_reg (inmode, operands[1]);
20067 mode = GET_MODE (out);
20068 neglab = gen_label_rtx ();
20069 donelab = gen_label_rtx ();
20070 f0 = gen_reg_rtx (mode);
20072 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
20074 expand_float (out, in, 0);
20076 emit_jump_insn (gen_jump (donelab));
20077 emit_barrier ();
20079 emit_label (neglab);
20081 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
20082 1, OPTAB_DIRECT);
20083 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
20084 1, OPTAB_DIRECT);
20085 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
20087 expand_float (f0, i0, 0);
20089 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
20091 emit_label (donelab);
20094 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20095 with all elements equal to VAR. Return true if successful. */
20097 static bool
20098 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
20099 rtx target, rtx val)
20101 enum machine_mode smode, wsmode, wvmode;
20102 rtx x;
20104 switch (mode)
20106 case V2SImode:
20107 case V2SFmode:
20108 if (!mmx_ok)
20109 return false;
20110 /* FALLTHRU */
20112 case V2DFmode:
20113 case V2DImode:
20114 case V4SFmode:
20115 case V4SImode:
20116 val = force_reg (GET_MODE_INNER (mode), val);
20117 x = gen_rtx_VEC_DUPLICATE (mode, val);
20118 emit_insn (gen_rtx_SET (VOIDmode, target, x));
20119 return true;
20121 case V4HImode:
20122 if (!mmx_ok)
20123 return false;
20124 if (TARGET_SSE || TARGET_3DNOW_A)
20126 val = gen_lowpart (SImode, val);
20127 x = gen_rtx_TRUNCATE (HImode, val);
20128 x = gen_rtx_VEC_DUPLICATE (mode, x);
20129 emit_insn (gen_rtx_SET (VOIDmode, target, x));
20130 return true;
20132 else
20134 smode = HImode;
20135 wsmode = SImode;
20136 wvmode = V2SImode;
20137 goto widen;
20140 case V8QImode:
20141 if (!mmx_ok)
20142 return false;
20143 smode = QImode;
20144 wsmode = HImode;
20145 wvmode = V4HImode;
20146 goto widen;
20147 case V8HImode:
20148 if (TARGET_SSE2)
20150 rtx tmp1, tmp2;
20151 /* Extend HImode to SImode using a paradoxical SUBREG. */
20152 tmp1 = gen_reg_rtx (SImode);
20153 emit_move_insn (tmp1, gen_lowpart (SImode, val));
20154 /* Insert the SImode value as low element of V4SImode vector. */
20155 tmp2 = gen_reg_rtx (V4SImode);
20156 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
20157 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
20158 CONST0_RTX (V4SImode),
20159 const1_rtx);
20160 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
20161 /* Cast the V4SImode vector back to a V8HImode vector. */
20162 tmp1 = gen_reg_rtx (V8HImode);
20163 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
20164 /* Duplicate the low short through the whole low SImode word. */
20165 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
20166 /* Cast the V8HImode vector back to a V4SImode vector. */
20167 tmp2 = gen_reg_rtx (V4SImode);
20168 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
20169 /* Replicate the low element of the V4SImode vector. */
20170 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
20171 /* Cast the V2SImode back to V8HImode, and store in target. */
20172 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
20173 return true;
20175 smode = HImode;
20176 wsmode = SImode;
20177 wvmode = V4SImode;
20178 goto widen;
20179 case V16QImode:
20180 if (TARGET_SSE2)
20182 rtx tmp1, tmp2;
20183 /* Extend QImode to SImode using a paradoxical SUBREG. */
20184 tmp1 = gen_reg_rtx (SImode);
20185 emit_move_insn (tmp1, gen_lowpart (SImode, val));
20186 /* Insert the SImode value as low element of V4SImode vector. */
20187 tmp2 = gen_reg_rtx (V4SImode);
20188 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
20189 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
20190 CONST0_RTX (V4SImode),
20191 const1_rtx);
20192 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
20193 /* Cast the V4SImode vector back to a V16QImode vector. */
20194 tmp1 = gen_reg_rtx (V16QImode);
20195 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
20196 /* Duplicate the low byte through the whole low SImode word. */
20197 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
20198 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
20199 /* Cast the V16QImode vector back to a V4SImode vector. */
20200 tmp2 = gen_reg_rtx (V4SImode);
20201 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
20202 /* Replicate the low element of the V4SImode vector. */
20203 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
20204 /* Cast the V2SImode back to V16QImode, and store in target. */
20205 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
20206 return true;
20208 smode = QImode;
20209 wsmode = HImode;
20210 wvmode = V8HImode;
20211 goto widen;
20212 widen:
20213 /* Replicate the value once into the next wider mode and recurse. */
20214 val = convert_modes (wsmode, smode, val, true);
20215 x = expand_simple_binop (wsmode, ASHIFT, val,
20216 GEN_INT (GET_MODE_BITSIZE (smode)),
20217 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20218 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
20220 x = gen_reg_rtx (wvmode);
20221 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
20222 gcc_unreachable ();
20223 emit_move_insn (target, gen_lowpart (mode, x));
20224 return true;
20226 default:
20227 return false;
20231 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20232 whose ONE_VAR element is VAR, and other elements are zero. Return true
20233 if successful. */
20235 static bool
20236 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
20237 rtx target, rtx var, int one_var)
20239 enum machine_mode vsimode;
20240 rtx new_target;
20241 rtx x, tmp;
20243 switch (mode)
20245 case V2SFmode:
20246 case V2SImode:
20247 if (!mmx_ok)
20248 return false;
20249 /* FALLTHRU */
20251 case V2DFmode:
20252 case V2DImode:
20253 if (one_var != 0)
20254 return false;
20255 var = force_reg (GET_MODE_INNER (mode), var);
20256 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
20257 emit_insn (gen_rtx_SET (VOIDmode, target, x));
20258 return true;
20260 case V4SFmode:
20261 case V4SImode:
20262 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
20263 new_target = gen_reg_rtx (mode);
20264 else
20265 new_target = target;
20266 var = force_reg (GET_MODE_INNER (mode), var);
20267 x = gen_rtx_VEC_DUPLICATE (mode, var);
20268 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
20269 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
20270 if (one_var != 0)
20272 /* We need to shuffle the value to the correct position, so
20273 create a new pseudo to store the intermediate result. */
20275 /* With SSE2, we can use the integer shuffle insns. */
20276 if (mode != V4SFmode && TARGET_SSE2)
20278 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
20279 GEN_INT (1),
20280 GEN_INT (one_var == 1 ? 0 : 1),
20281 GEN_INT (one_var == 2 ? 0 : 1),
20282 GEN_INT (one_var == 3 ? 0 : 1)));
20283 if (target != new_target)
20284 emit_move_insn (target, new_target);
20285 return true;
20288 /* Otherwise convert the intermediate result to V4SFmode and
20289 use the SSE1 shuffle instructions. */
20290 if (mode != V4SFmode)
20292 tmp = gen_reg_rtx (V4SFmode);
20293 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
20295 else
20296 tmp = new_target;
20298 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
20299 GEN_INT (1),
20300 GEN_INT (one_var == 1 ? 0 : 1),
20301 GEN_INT (one_var == 2 ? 0+4 : 1+4),
20302 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
20304 if (mode != V4SFmode)
20305 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
20306 else if (tmp != target)
20307 emit_move_insn (target, tmp);
20309 else if (target != new_target)
20310 emit_move_insn (target, new_target);
20311 return true;
20313 case V8HImode:
20314 case V16QImode:
20315 vsimode = V4SImode;
20316 goto widen;
20317 case V4HImode:
20318 case V8QImode:
20319 if (!mmx_ok)
20320 return false;
20321 vsimode = V2SImode;
20322 goto widen;
20323 widen:
20324 if (one_var != 0)
20325 return false;
20327 /* Zero extend the variable element to SImode and recurse. */
20328 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
20330 x = gen_reg_rtx (vsimode);
20331 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
20332 var, one_var))
20333 gcc_unreachable ();
20335 emit_move_insn (target, gen_lowpart (mode, x));
20336 return true;
20338 default:
20339 return false;
20343 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20344 consisting of the values in VALS. It is known that all elements
20345 except ONE_VAR are constants. Return true if successful. */
20347 static bool
20348 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
20349 rtx target, rtx vals, int one_var)
20351 rtx var = XVECEXP (vals, 0, one_var);
20352 enum machine_mode wmode;
20353 rtx const_vec, x;
20355 const_vec = copy_rtx (vals);
20356 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
20357 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
20359 switch (mode)
20361 case V2DFmode:
20362 case V2DImode:
20363 case V2SFmode:
20364 case V2SImode:
20365 /* For the two element vectors, it's just as easy to use
20366 the general case. */
20367 return false;
20369 case V4SFmode:
20370 case V4SImode:
20371 case V8HImode:
20372 case V4HImode:
20373 break;
20375 case V16QImode:
20376 wmode = V8HImode;
20377 goto widen;
20378 case V8QImode:
20379 wmode = V4HImode;
20380 goto widen;
20381 widen:
20382 /* There's no way to set one QImode entry easily. Combine
20383 the variable value with its adjacent constant value, and
20384 promote to an HImode set. */
20385 x = XVECEXP (vals, 0, one_var ^ 1);
20386 if (one_var & 1)
20388 var = convert_modes (HImode, QImode, var, true);
20389 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
20390 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20391 x = GEN_INT (INTVAL (x) & 0xff);
20393 else
20395 var = convert_modes (HImode, QImode, var, true);
20396 x = gen_int_mode (INTVAL (x) << 8, HImode);
20398 if (x != const0_rtx)
20399 var = expand_simple_binop (HImode, IOR, var, x, var,
20400 1, OPTAB_LIB_WIDEN);
20402 x = gen_reg_rtx (wmode);
20403 emit_move_insn (x, gen_lowpart (wmode, const_vec));
20404 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
20406 emit_move_insn (target, gen_lowpart (mode, x));
20407 return true;
20409 default:
20410 return false;
20413 emit_move_insn (target, const_vec);
20414 ix86_expand_vector_set (mmx_ok, target, var, one_var);
20415 return true;
20418 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
20419 all values variable, and none identical. */
20421 static void
20422 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
20423 rtx target, rtx vals)
20425 enum machine_mode half_mode = GET_MODE_INNER (mode);
20426 rtx op0 = NULL, op1 = NULL;
20427 bool use_vec_concat = false;
20429 switch (mode)
20431 case V2SFmode:
20432 case V2SImode:
20433 if (!mmx_ok && !TARGET_SSE)
20434 break;
20435 /* FALLTHRU */
20437 case V2DFmode:
20438 case V2DImode:
20439 /* For the two element vectors, we always implement VEC_CONCAT. */
20440 op0 = XVECEXP (vals, 0, 0);
20441 op1 = XVECEXP (vals, 0, 1);
20442 use_vec_concat = true;
20443 break;
20445 case V4SFmode:
20446 half_mode = V2SFmode;
20447 goto half;
20448 case V4SImode:
20449 half_mode = V2SImode;
20450 goto half;
20451 half:
20453 rtvec v;
20455 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
20456 Recurse to load the two halves. */
20458 op0 = gen_reg_rtx (half_mode);
20459 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
20460 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
20462 op1 = gen_reg_rtx (half_mode);
20463 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
20464 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
20466 use_vec_concat = true;
20468 break;
20470 case V8HImode:
20471 case V16QImode:
20472 case V4HImode:
20473 case V8QImode:
20474 break;
20476 default:
20477 gcc_unreachable ();
20480 if (use_vec_concat)
20482 if (!register_operand (op0, half_mode))
20483 op0 = force_reg (half_mode, op0);
20484 if (!register_operand (op1, half_mode))
20485 op1 = force_reg (half_mode, op1);
20487 emit_insn (gen_rtx_SET (VOIDmode, target,
20488 gen_rtx_VEC_CONCAT (mode, op0, op1)));
20490 else
20492 int i, j, n_elts, n_words, n_elt_per_word;
20493 enum machine_mode inner_mode;
20494 rtx words[4], shift;
20496 inner_mode = GET_MODE_INNER (mode);
20497 n_elts = GET_MODE_NUNITS (mode);
20498 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
20499 n_elt_per_word = n_elts / n_words;
20500 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
20502 for (i = 0; i < n_words; ++i)
20504 rtx word = NULL_RTX;
20506 for (j = 0; j < n_elt_per_word; ++j)
20508 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
20509 elt = convert_modes (word_mode, inner_mode, elt, true);
20511 if (j == 0)
20512 word = elt;
20513 else
20515 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
20516 word, 1, OPTAB_LIB_WIDEN);
20517 word = expand_simple_binop (word_mode, IOR, word, elt,
20518 word, 1, OPTAB_LIB_WIDEN);
20522 words[i] = word;
20525 if (n_words == 1)
20526 emit_move_insn (target, gen_lowpart (mode, words[0]));
20527 else if (n_words == 2)
20529 rtx tmp = gen_reg_rtx (mode);
20530 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
20531 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
20532 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
20533 emit_move_insn (target, tmp);
20535 else if (n_words == 4)
20537 rtx tmp = gen_reg_rtx (V4SImode);
20538 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
20539 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
20540 emit_move_insn (target, gen_lowpart (mode, tmp));
20542 else
20543 gcc_unreachable ();
20547 /* Initialize vector TARGET via VALS. Suppress the use of MMX
20548 instructions unless MMX_OK is true. */
20550 void
20551 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
20553 enum machine_mode mode = GET_MODE (target);
20554 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20555 int n_elts = GET_MODE_NUNITS (mode);
20556 int n_var = 0, one_var = -1;
20557 bool all_same = true, all_const_zero = true;
20558 int i;
20559 rtx x;
20561 for (i = 0; i < n_elts; ++i)
20563 x = XVECEXP (vals, 0, i);
20564 if (!CONSTANT_P (x))
20565 n_var++, one_var = i;
20566 else if (x != CONST0_RTX (inner_mode))
20567 all_const_zero = false;
20568 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
20569 all_same = false;
20572 /* Constants are best loaded from the constant pool. */
20573 if (n_var == 0)
20575 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
20576 return;
20579 /* If all values are identical, broadcast the value. */
20580 if (all_same
20581 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
20582 XVECEXP (vals, 0, 0)))
20583 return;
20585 /* Values where only one field is non-constant are best loaded from
20586 the pool and overwritten via move later. */
20587 if (n_var == 1)
20589 if (all_const_zero
20590 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
20591 XVECEXP (vals, 0, one_var),
20592 one_var))
20593 return;
20595 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
20596 return;
20599 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
20602 void
20603 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
20605 enum machine_mode mode = GET_MODE (target);
20606 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20607 bool use_vec_merge = false;
20608 rtx tmp;
20610 switch (mode)
20612 case V2SFmode:
20613 case V2SImode:
20614 if (mmx_ok)
20616 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
20617 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
20618 if (elt == 0)
20619 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
20620 else
20621 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
20622 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20623 return;
20625 break;
20627 case V2DFmode:
20628 case V2DImode:
20630 rtx op0, op1;
20632 /* For the two element vectors, we implement a VEC_CONCAT with
20633 the extraction of the other element. */
20635 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
20636 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
20638 if (elt == 0)
20639 op0 = val, op1 = tmp;
20640 else
20641 op0 = tmp, op1 = val;
20643 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
20644 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20646 return;
20648 case V4SFmode:
20649 switch (elt)
20651 case 0:
20652 use_vec_merge = true;
20653 break;
20655 case 1:
20656 /* tmp = target = A B C D */
20657 tmp = copy_to_reg (target);
20658 /* target = A A B B */
20659 emit_insn (gen_sse_unpcklps (target, target, target));
20660 /* target = X A B B */
20661 ix86_expand_vector_set (false, target, val, 0);
20662 /* target = A X C D */
20663 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20664 GEN_INT (1), GEN_INT (0),
20665 GEN_INT (2+4), GEN_INT (3+4)));
20666 return;
20668 case 2:
20669 /* tmp = target = A B C D */
20670 tmp = copy_to_reg (target);
20671 /* tmp = X B C D */
20672 ix86_expand_vector_set (false, tmp, val, 0);
20673 /* target = A B X D */
20674 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20675 GEN_INT (0), GEN_INT (1),
20676 GEN_INT (0+4), GEN_INT (3+4)));
20677 return;
20679 case 3:
20680 /* tmp = target = A B C D */
20681 tmp = copy_to_reg (target);
20682 /* tmp = X B C D */
20683 ix86_expand_vector_set (false, tmp, val, 0);
20684 /* target = A B X D */
20685 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20686 GEN_INT (0), GEN_INT (1),
20687 GEN_INT (2+4), GEN_INT (0+4)));
20688 return;
20690 default:
20691 gcc_unreachable ();
20693 break;
20695 case V4SImode:
20696 /* Element 0 handled by vec_merge below. */
20697 if (elt == 0)
20699 use_vec_merge = true;
20700 break;
20703 if (TARGET_SSE2)
20705 /* With SSE2, use integer shuffles to swap element 0 and ELT,
20706 store into element 0, then shuffle them back. */
20708 rtx order[4];
20710 order[0] = GEN_INT (elt);
20711 order[1] = const1_rtx;
20712 order[2] = const2_rtx;
20713 order[3] = GEN_INT (3);
20714 order[elt] = const0_rtx;
20716 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
20717 order[1], order[2], order[3]));
20719 ix86_expand_vector_set (false, target, val, 0);
20721 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
20722 order[1], order[2], order[3]));
20724 else
20726 /* For SSE1, we have to reuse the V4SF code. */
20727 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
20728 gen_lowpart (SFmode, val), elt);
20730 return;
20732 case V8HImode:
20733 use_vec_merge = TARGET_SSE2;
20734 break;
20735 case V4HImode:
20736 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
20737 break;
20739 case V16QImode:
20740 case V8QImode:
20741 default:
20742 break;
20745 if (use_vec_merge)
20747 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
20748 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
20749 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20751 else
20753 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
20755 emit_move_insn (mem, target);
20757 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
20758 emit_move_insn (tmp, val);
20760 emit_move_insn (target, mem);
20764 void
20765 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
20767 enum machine_mode mode = GET_MODE (vec);
20768 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20769 bool use_vec_extr = false;
20770 rtx tmp;
20772 switch (mode)
20774 case V2SImode:
20775 case V2SFmode:
20776 if (!mmx_ok)
20777 break;
20778 /* FALLTHRU */
20780 case V2DFmode:
20781 case V2DImode:
20782 use_vec_extr = true;
20783 break;
20785 case V4SFmode:
20786 switch (elt)
20788 case 0:
20789 tmp = vec;
20790 break;
20792 case 1:
20793 case 3:
20794 tmp = gen_reg_rtx (mode);
20795 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
20796 GEN_INT (elt), GEN_INT (elt),
20797 GEN_INT (elt+4), GEN_INT (elt+4)));
20798 break;
20800 case 2:
20801 tmp = gen_reg_rtx (mode);
20802 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
20803 break;
20805 default:
20806 gcc_unreachable ();
20808 vec = tmp;
20809 use_vec_extr = true;
20810 elt = 0;
20811 break;
20813 case V4SImode:
20814 if (TARGET_SSE2)
20816 switch (elt)
20818 case 0:
20819 tmp = vec;
20820 break;
20822 case 1:
20823 case 3:
20824 tmp = gen_reg_rtx (mode);
20825 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
20826 GEN_INT (elt), GEN_INT (elt),
20827 GEN_INT (elt), GEN_INT (elt)));
20828 break;
20830 case 2:
20831 tmp = gen_reg_rtx (mode);
20832 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
20833 break;
20835 default:
20836 gcc_unreachable ();
20838 vec = tmp;
20839 use_vec_extr = true;
20840 elt = 0;
20842 else
20844 /* For SSE1, we have to reuse the V4SF code. */
20845 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
20846 gen_lowpart (V4SFmode, vec), elt);
20847 return;
20849 break;
20851 case V8HImode:
20852 use_vec_extr = TARGET_SSE2;
20853 break;
20854 case V4HImode:
20855 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
20856 break;
20858 case V16QImode:
20859 case V8QImode:
20860 /* ??? Could extract the appropriate HImode element and shift. */
20861 default:
20862 break;
20865 if (use_vec_extr)
20867 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
20868 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
20870 /* Let the rtl optimizers know about the zero extension performed. */
20871 if (inner_mode == HImode)
20873 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
20874 target = gen_lowpart (SImode, target);
20877 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20879 else
20881 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
20883 emit_move_insn (mem, vec);
20885 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
20886 emit_move_insn (target, tmp);
20890 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
20891 pattern to reduce; DEST is the destination; IN is the input vector. */
20893 void
20894 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
20896 rtx tmp1, tmp2, tmp3;
20898 tmp1 = gen_reg_rtx (V4SFmode);
20899 tmp2 = gen_reg_rtx (V4SFmode);
20900 tmp3 = gen_reg_rtx (V4SFmode);
20902 emit_insn (gen_sse_movhlps (tmp1, in, in));
20903 emit_insn (fn (tmp2, tmp1, in));
20905 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
20906 GEN_INT (1), GEN_INT (1),
20907 GEN_INT (1+4), GEN_INT (1+4)));
20908 emit_insn (fn (dest, tmp2, tmp3));
20911 /* Target hook for scalar_mode_supported_p. */
20912 static bool
20913 ix86_scalar_mode_supported_p (enum machine_mode mode)
20915 if (DECIMAL_FLOAT_MODE_P (mode))
20916 return true;
20917 else
20918 return default_scalar_mode_supported_p (mode);
20921 /* Implements target hook vector_mode_supported_p. */
20922 static bool
20923 ix86_vector_mode_supported_p (enum machine_mode mode)
20925 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
20926 return true;
20927 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
20928 return true;
20929 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
20930 return true;
20931 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
20932 return true;
20933 return false;
20936 /* Worker function for TARGET_MD_ASM_CLOBBERS.
20938 We do this in the new i386 backend to maintain source compatibility
20939 with the old cc0-based compiler. */
20941 static tree
20942 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
20943 tree inputs ATTRIBUTE_UNUSED,
20944 tree clobbers)
20946 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
20947 clobbers);
20948 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
20949 clobbers);
20950 return clobbers;
20953 /* Return true if this goes in small data/bss. */
20955 static bool
20956 ix86_in_large_data_p (tree exp)
20958 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
20959 return false;
20961 /* Functions are never large data. */
20962 if (TREE_CODE (exp) == FUNCTION_DECL)
20963 return false;
20965 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
20967 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
20968 if (strcmp (section, ".ldata") == 0
20969 || strcmp (section, ".lbss") == 0)
20970 return true;
20971 return false;
20973 else
20975 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
20977 /* If this is an incomplete type with size 0, then we can't put it
20978 in data because it might be too big when completed. */
20979 if (!size || size > ix86_section_threshold)
20980 return true;
20983 return false;
20985 static void
20986 ix86_encode_section_info (tree decl, rtx rtl, int first)
20988 default_encode_section_info (decl, rtl, first);
20990 if (TREE_CODE (decl) == VAR_DECL
20991 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
20992 && ix86_in_large_data_p (decl))
20993 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
20996 /* Worker function for REVERSE_CONDITION. */
20998 enum rtx_code
20999 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
21001 return (mode != CCFPmode && mode != CCFPUmode
21002 ? reverse_condition (code)
21003 : reverse_condition_maybe_unordered (code));
21006 /* Output code to perform an x87 FP register move, from OPERANDS[1]
21007 to OPERANDS[0]. */
21009 const char *
21010 output_387_reg_move (rtx insn, rtx *operands)
21012 if (REG_P (operands[1])
21013 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
21015 if (REGNO (operands[0]) == FIRST_STACK_REG)
21016 return output_387_ffreep (operands, 0);
21017 return "fstp\t%y0";
21019 if (STACK_TOP_P (operands[0]))
21020 return "fld%z1\t%y1";
21021 return "fst\t%y0";
21024 /* Output code to perform a conditional jump to LABEL, if C2 flag in
21025 FP status register is set. */
21027 void
21028 ix86_emit_fp_unordered_jump (rtx label)
21030 rtx reg = gen_reg_rtx (HImode);
21031 rtx temp;
21033 emit_insn (gen_x86_fnstsw_1 (reg));
21035 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_size))
21037 emit_insn (gen_x86_sahf_1 (reg));
21039 temp = gen_rtx_REG (CCmode, FLAGS_REG);
21040 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
21042 else
21044 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
21046 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21047 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
21050 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
21051 gen_rtx_LABEL_REF (VOIDmode, label),
21052 pc_rtx);
21053 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
21055 emit_jump_insn (temp);
21056 predict_jump (REG_BR_PROB_BASE * 10 / 100);
21059 /* Output code to perform a log1p XFmode calculation. */
21061 void ix86_emit_i387_log1p (rtx op0, rtx op1)
21063 rtx label1 = gen_label_rtx ();
21064 rtx label2 = gen_label_rtx ();
21066 rtx tmp = gen_reg_rtx (XFmode);
21067 rtx tmp2 = gen_reg_rtx (XFmode);
21069 emit_insn (gen_absxf2 (tmp, op1));
21070 emit_insn (gen_cmpxf (tmp,
21071 CONST_DOUBLE_FROM_REAL_VALUE (
21072 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
21073 XFmode)));
21074 emit_jump_insn (gen_bge (label1));
21076 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
21077 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
21078 emit_jump (label2);
21080 emit_label (label1);
21081 emit_move_insn (tmp, CONST1_RTX (XFmode));
21082 emit_insn (gen_addxf3 (tmp, op1, tmp));
21083 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
21084 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
21086 emit_label (label2);
21089 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
21091 static void
21092 i386_solaris_elf_named_section (const char *name, unsigned int flags,
21093 tree decl)
21095 /* With Binutils 2.15, the "@unwind" marker must be specified on
21096 every occurrence of the ".eh_frame" section, not just the first
21097 one. */
21098 if (TARGET_64BIT
21099 && strcmp (name, ".eh_frame") == 0)
21101 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
21102 flags & SECTION_WRITE ? "aw" : "a");
21103 return;
21105 default_elf_asm_named_section (name, flags, decl);
21108 /* Return the mangling of TYPE if it is an extended fundamental type. */
21110 static const char *
21111 ix86_mangle_fundamental_type (tree type)
21113 switch (TYPE_MODE (type))
21115 case TFmode:
21116 /* __float128 is "g". */
21117 return "g";
21118 case XFmode:
21119 /* "long double" or __float80 is "e". */
21120 return "e";
21121 default:
21122 return NULL;
21126 /* For 32-bit code we can save PIC register setup by using
21127 __stack_chk_fail_local hidden function instead of calling
21128 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
21129 register, so it is better to call __stack_chk_fail directly. */
21131 static tree
21132 ix86_stack_protect_fail (void)
21134 return TARGET_64BIT
21135 ? default_external_stack_protect_fail ()
21136 : default_hidden_stack_protect_fail ();
21139 /* Select a format to encode pointers in exception handling data. CODE
21140 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
21141 true if the symbol may be affected by dynamic relocations.
21143 ??? All x86 object file formats are capable of representing this.
21144 After all, the relocation needed is the same as for the call insn.
21145 Whether or not a particular assembler allows us to enter such, I
21146 guess we'll have to see. */
21148 asm_preferred_eh_data_format (int code, int global)
21150 if (flag_pic)
21152 int type = DW_EH_PE_sdata8;
21153 if (!TARGET_64BIT
21154 || ix86_cmodel == CM_SMALL_PIC
21155 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
21156 type = DW_EH_PE_sdata4;
21157 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
21159 if (ix86_cmodel == CM_SMALL
21160 || (ix86_cmodel == CM_MEDIUM && code))
21161 return DW_EH_PE_udata4;
21162 return DW_EH_PE_absptr;
21165 /* Expand copysign from SIGN to the positive value ABS_VALUE
21166 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
21167 the sign-bit. */
21168 static void
21169 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
21171 enum machine_mode mode = GET_MODE (sign);
21172 rtx sgn = gen_reg_rtx (mode);
21173 if (mask == NULL_RTX)
21175 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
21176 if (!VECTOR_MODE_P (mode))
21178 /* We need to generate a scalar mode mask in this case. */
21179 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
21180 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
21181 mask = gen_reg_rtx (mode);
21182 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
21185 else
21186 mask = gen_rtx_NOT (mode, mask);
21187 emit_insn (gen_rtx_SET (VOIDmode, sgn,
21188 gen_rtx_AND (mode, mask, sign)));
21189 emit_insn (gen_rtx_SET (VOIDmode, result,
21190 gen_rtx_IOR (mode, abs_value, sgn)));
21193 /* Expand fabs (OP0) and return a new rtx that holds the result. The
21194 mask for masking out the sign-bit is stored in *SMASK, if that is
21195 non-null. */
21196 static rtx
21197 ix86_expand_sse_fabs (rtx op0, rtx *smask)
21199 enum machine_mode mode = GET_MODE (op0);
21200 rtx xa, mask;
21202 xa = gen_reg_rtx (mode);
21203 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
21204 if (!VECTOR_MODE_P (mode))
21206 /* We need to generate a scalar mode mask in this case. */
21207 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
21208 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
21209 mask = gen_reg_rtx (mode);
21210 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
21212 emit_insn (gen_rtx_SET (VOIDmode, xa,
21213 gen_rtx_AND (mode, op0, mask)));
21215 if (smask)
21216 *smask = mask;
21218 return xa;
21221 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
21222 swapping the operands if SWAP_OPERANDS is true. The expanded
21223 code is a forward jump to a newly created label in case the
21224 comparison is true. The generated label rtx is returned. */
21225 static rtx
21226 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
21227 bool swap_operands)
21229 rtx label, tmp;
21231 if (swap_operands)
21233 tmp = op0;
21234 op0 = op1;
21235 op1 = tmp;
21238 label = gen_label_rtx ();
21239 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
21240 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21241 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
21242 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
21243 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
21244 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
21245 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
21246 JUMP_LABEL (tmp) = label;
21248 return label;
21251 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
21252 using comparison code CODE. Operands are swapped for the comparison if
21253 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
21254 static rtx
21255 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
21256 bool swap_operands)
21258 enum machine_mode mode = GET_MODE (op0);
21259 rtx mask = gen_reg_rtx (mode);
21261 if (swap_operands)
21263 rtx tmp = op0;
21264 op0 = op1;
21265 op1 = tmp;
21268 if (mode == DFmode)
21269 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
21270 gen_rtx_fmt_ee (code, mode, op0, op1)));
21271 else
21272 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
21273 gen_rtx_fmt_ee (code, mode, op0, op1)));
21275 return mask;
21278 /* Generate and return a rtx of mode MODE for 2**n where n is the number
21279 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
21280 static rtx
21281 ix86_gen_TWO52 (enum machine_mode mode)
21283 REAL_VALUE_TYPE TWO52r;
21284 rtx TWO52;
21286 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
21287 TWO52 = const_double_from_real_value (TWO52r, mode);
21288 TWO52 = force_reg (mode, TWO52);
21290 return TWO52;
21293 /* Expand SSE sequence for computing lround from OP1 storing
21294 into OP0. */
21295 void
21296 ix86_expand_lround (rtx op0, rtx op1)
21298 /* C code for the stuff we're doing below:
21299 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
21300 return (long)tmp;
21302 enum machine_mode mode = GET_MODE (op1);
21303 const struct real_format *fmt;
21304 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
21305 rtx adj;
21307 /* load nextafter (0.5, 0.0) */
21308 fmt = REAL_MODE_FORMAT (mode);
21309 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
21310 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
21312 /* adj = copysign (0.5, op1) */
21313 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
21314 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
21316 /* adj = op1 + adj */
21317 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
21319 /* op0 = (imode)adj */
21320 expand_fix (op0, adj, 0);
21323 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
21324 into OPERAND0. */
21325 void
21326 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
21328 /* C code for the stuff we're doing below (for do_floor):
21329 xi = (long)op1;
21330 xi -= (double)xi > op1 ? 1 : 0;
21331 return xi;
21333 enum machine_mode fmode = GET_MODE (op1);
21334 enum machine_mode imode = GET_MODE (op0);
21335 rtx ireg, freg, label, tmp;
21337 /* reg = (long)op1 */
21338 ireg = gen_reg_rtx (imode);
21339 expand_fix (ireg, op1, 0);
21341 /* freg = (double)reg */
21342 freg = gen_reg_rtx (fmode);
21343 expand_float (freg, ireg, 0);
21345 /* ireg = (freg > op1) ? ireg - 1 : ireg */
21346 label = ix86_expand_sse_compare_and_jump (UNLE,
21347 freg, op1, !do_floor);
21348 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
21349 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
21350 emit_move_insn (ireg, tmp);
21352 emit_label (label);
21353 LABEL_NUSES (label) = 1;
21355 emit_move_insn (op0, ireg);
21358 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
21359 result in OPERAND0. */
21360 void
21361 ix86_expand_rint (rtx operand0, rtx operand1)
21363 /* C code for the stuff we're doing below:
21364 xa = fabs (operand1);
21365 if (!isless (xa, 2**52))
21366 return operand1;
21367 xa = xa + 2**52 - 2**52;
21368 return copysign (xa, operand1);
21370 enum machine_mode mode = GET_MODE (operand0);
21371 rtx res, xa, label, TWO52, mask;
21373 res = gen_reg_rtx (mode);
21374 emit_move_insn (res, operand1);
21376 /* xa = abs (operand1) */
21377 xa = ix86_expand_sse_fabs (res, &mask);
21379 /* if (!isless (xa, TWO52)) goto label; */
21380 TWO52 = ix86_gen_TWO52 (mode);
21381 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21383 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21384 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
21386 ix86_sse_copysign_to_positive (res, xa, res, mask);
21388 emit_label (label);
21389 LABEL_NUSES (label) = 1;
21391 emit_move_insn (operand0, res);
21394 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21395 into OPERAND0. */
21396 void
21397 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
21399 /* C code for the stuff we expand below.
21400 double xa = fabs (x), x2;
21401 if (!isless (xa, TWO52))
21402 return x;
21403 xa = xa + TWO52 - TWO52;
21404 x2 = copysign (xa, x);
21405 Compensate. Floor:
21406 if (x2 > x)
21407 x2 -= 1;
21408 Compensate. Ceil:
21409 if (x2 < x)
21410 x2 -= -1;
21411 return x2;
21413 enum machine_mode mode = GET_MODE (operand0);
21414 rtx xa, TWO52, tmp, label, one, res, mask;
21416 TWO52 = ix86_gen_TWO52 (mode);
21418 /* Temporary for holding the result, initialized to the input
21419 operand to ease control flow. */
21420 res = gen_reg_rtx (mode);
21421 emit_move_insn (res, operand1);
21423 /* xa = abs (operand1) */
21424 xa = ix86_expand_sse_fabs (res, &mask);
21426 /* if (!isless (xa, TWO52)) goto label; */
21427 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21429 /* xa = xa + TWO52 - TWO52; */
21430 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21431 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
21433 /* xa = copysign (xa, operand1) */
21434 ix86_sse_copysign_to_positive (xa, xa, res, mask);
21436 /* generate 1.0 or -1.0 */
21437 one = force_reg (mode,
21438 const_double_from_real_value (do_floor
21439 ? dconst1 : dconstm1, mode));
21441 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21442 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
21443 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21444 gen_rtx_AND (mode, one, tmp)));
21445 /* We always need to subtract here to preserve signed zero. */
21446 tmp = expand_simple_binop (mode, MINUS,
21447 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21448 emit_move_insn (res, tmp);
21450 emit_label (label);
21451 LABEL_NUSES (label) = 1;
21453 emit_move_insn (operand0, res);
21456 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21457 into OPERAND0. */
21458 void
21459 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
21461 /* C code for the stuff we expand below.
21462 double xa = fabs (x), x2;
21463 if (!isless (xa, TWO52))
21464 return x;
21465 x2 = (double)(long)x;
21466 Compensate. Floor:
21467 if (x2 > x)
21468 x2 -= 1;
21469 Compensate. Ceil:
21470 if (x2 < x)
21471 x2 += 1;
21472 if (HONOR_SIGNED_ZEROS (mode))
21473 return copysign (x2, x);
21474 return x2;
21476 enum machine_mode mode = GET_MODE (operand0);
21477 rtx xa, xi, TWO52, tmp, label, one, res, mask;
21479 TWO52 = ix86_gen_TWO52 (mode);
21481 /* Temporary for holding the result, initialized to the input
21482 operand to ease control flow. */
21483 res = gen_reg_rtx (mode);
21484 emit_move_insn (res, operand1);
21486 /* xa = abs (operand1) */
21487 xa = ix86_expand_sse_fabs (res, &mask);
21489 /* if (!isless (xa, TWO52)) goto label; */
21490 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21492 /* xa = (double)(long)x */
21493 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21494 expand_fix (xi, res, 0);
21495 expand_float (xa, xi, 0);
21497 /* generate 1.0 */
21498 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
21500 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21501 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
21502 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21503 gen_rtx_AND (mode, one, tmp)));
21504 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
21505 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21506 emit_move_insn (res, tmp);
21508 if (HONOR_SIGNED_ZEROS (mode))
21509 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
21511 emit_label (label);
21512 LABEL_NUSES (label) = 1;
21514 emit_move_insn (operand0, res);
21517 /* Expand SSE sequence for computing round from OPERAND1 storing
21518 into OPERAND0. Sequence that works without relying on DImode truncation
21519 via cvttsd2siq that is only available on 64bit targets. */
21520 void
21521 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
21523 /* C code for the stuff we expand below.
21524 double xa = fabs (x), xa2, x2;
21525 if (!isless (xa, TWO52))
21526 return x;
21527 Using the absolute value and copying back sign makes
21528 -0.0 -> -0.0 correct.
21529 xa2 = xa + TWO52 - TWO52;
21530 Compensate.
21531 dxa = xa2 - xa;
21532 if (dxa <= -0.5)
21533 xa2 += 1;
21534 else if (dxa > 0.5)
21535 xa2 -= 1;
21536 x2 = copysign (xa2, x);
21537 return x2;
21539 enum machine_mode mode = GET_MODE (operand0);
21540 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
21542 TWO52 = ix86_gen_TWO52 (mode);
21544 /* Temporary for holding the result, initialized to the input
21545 operand to ease control flow. */
21546 res = gen_reg_rtx (mode);
21547 emit_move_insn (res, operand1);
21549 /* xa = abs (operand1) */
21550 xa = ix86_expand_sse_fabs (res, &mask);
21552 /* if (!isless (xa, TWO52)) goto label; */
21553 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21555 /* xa2 = xa + TWO52 - TWO52; */
21556 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21557 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
21559 /* dxa = xa2 - xa; */
21560 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
21562 /* generate 0.5, 1.0 and -0.5 */
21563 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
21564 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
21565 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
21566 0, OPTAB_DIRECT);
21568 /* Compensate. */
21569 tmp = gen_reg_rtx (mode);
21570 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
21571 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
21572 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21573 gen_rtx_AND (mode, one, tmp)));
21574 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21575 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
21576 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
21577 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21578 gen_rtx_AND (mode, one, tmp)));
21579 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21581 /* res = copysign (xa2, operand1) */
21582 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
21584 emit_label (label);
21585 LABEL_NUSES (label) = 1;
21587 emit_move_insn (operand0, res);
21590 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21591 into OPERAND0. */
21592 void
21593 ix86_expand_trunc (rtx operand0, rtx operand1)
21595 /* C code for SSE variant we expand below.
21596 double xa = fabs (x), x2;
21597 if (!isless (xa, TWO52))
21598 return x;
21599 x2 = (double)(long)x;
21600 if (HONOR_SIGNED_ZEROS (mode))
21601 return copysign (x2, x);
21602 return x2;
21604 enum machine_mode mode = GET_MODE (operand0);
21605 rtx xa, xi, TWO52, label, res, mask;
21607 TWO52 = ix86_gen_TWO52 (mode);
21609 /* Temporary for holding the result, initialized to the input
21610 operand to ease control flow. */
21611 res = gen_reg_rtx (mode);
21612 emit_move_insn (res, operand1);
21614 /* xa = abs (operand1) */
21615 xa = ix86_expand_sse_fabs (res, &mask);
21617 /* if (!isless (xa, TWO52)) goto label; */
21618 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21620 /* x = (double)(long)x */
21621 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21622 expand_fix (xi, res, 0);
21623 expand_float (res, xi, 0);
21625 if (HONOR_SIGNED_ZEROS (mode))
21626 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
21628 emit_label (label);
21629 LABEL_NUSES (label) = 1;
21631 emit_move_insn (operand0, res);
21634 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21635 into OPERAND0. */
21636 void
21637 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
21639 enum machine_mode mode = GET_MODE (operand0);
21640 rtx xa, mask, TWO52, label, one, res, smask, tmp;
21642 /* C code for SSE variant we expand below.
21643 double xa = fabs (x), x2;
21644 if (!isless (xa, TWO52))
21645 return x;
21646 xa2 = xa + TWO52 - TWO52;
21647 Compensate:
21648 if (xa2 > xa)
21649 xa2 -= 1.0;
21650 x2 = copysign (xa2, x);
21651 return x2;
21654 TWO52 = ix86_gen_TWO52 (mode);
21656 /* Temporary for holding the result, initialized to the input
21657 operand to ease control flow. */
21658 res = gen_reg_rtx (mode);
21659 emit_move_insn (res, operand1);
21661 /* xa = abs (operand1) */
21662 xa = ix86_expand_sse_fabs (res, &smask);
21664 /* if (!isless (xa, TWO52)) goto label; */
21665 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21667 /* res = xa + TWO52 - TWO52; */
21668 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21669 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
21670 emit_move_insn (res, tmp);
21672 /* generate 1.0 */
21673 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
21675 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
21676 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
21677 emit_insn (gen_rtx_SET (VOIDmode, mask,
21678 gen_rtx_AND (mode, mask, one)));
21679 tmp = expand_simple_binop (mode, MINUS,
21680 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
21681 emit_move_insn (res, tmp);
21683 /* res = copysign (res, operand1) */
21684 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
21686 emit_label (label);
21687 LABEL_NUSES (label) = 1;
21689 emit_move_insn (operand0, res);
21692 /* Expand SSE sequence for computing round from OPERAND1 storing
21693 into OPERAND0. */
21694 void
21695 ix86_expand_round (rtx operand0, rtx operand1)
21697 /* C code for the stuff we're doing below:
21698 double xa = fabs (x);
21699 if (!isless (xa, TWO52))
21700 return x;
21701 xa = (double)(long)(xa + nextafter (0.5, 0.0));
21702 return copysign (xa, x);
21704 enum machine_mode mode = GET_MODE (operand0);
21705 rtx res, TWO52, xa, label, xi, half, mask;
21706 const struct real_format *fmt;
21707 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
21709 /* Temporary for holding the result, initialized to the input
21710 operand to ease control flow. */
21711 res = gen_reg_rtx (mode);
21712 emit_move_insn (res, operand1);
21714 TWO52 = ix86_gen_TWO52 (mode);
21715 xa = ix86_expand_sse_fabs (res, &mask);
21716 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21718 /* load nextafter (0.5, 0.0) */
21719 fmt = REAL_MODE_FORMAT (mode);
21720 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
21721 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
21723 /* xa = xa + 0.5 */
21724 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
21725 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
21727 /* xa = (double)(int64_t)xa */
21728 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21729 expand_fix (xi, xa, 0);
21730 expand_float (xa, xi, 0);
21732 /* res = copysign (xa, operand1) */
21733 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
21735 emit_label (label);
21736 LABEL_NUSES (label) = 1;
21738 emit_move_insn (operand0, res);
21741 #include "gt-i386.h"