2007-05-24 H.J. Lu <hongjiu.lu@intel.com>
[official-gcc.git] / gcc / config / i386 / i386.c
blob3609dac47151a42d4bfdd21ba94150418e0d2490
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
51 #include "dwarf2.h"
52 #include "tm-constrs.h"
53 #include "params.h"
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
57 #endif
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
65 : 4)
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
71 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
73 static const
74 struct processor_costs size_cost = { /* costs for tuning for size */
75 COSTS_N_BYTES (2), /* cost of an add instruction */
76 COSTS_N_BYTES (3), /* cost of a lea instruction */
77 COSTS_N_BYTES (2), /* variable shift costs */
78 COSTS_N_BYTES (3), /* constant shift costs */
79 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
80 COSTS_N_BYTES (3), /* HI */
81 COSTS_N_BYTES (3), /* SI */
82 COSTS_N_BYTES (3), /* DI */
83 COSTS_N_BYTES (5)}, /* other */
84 0, /* cost of multiply per each bit set */
85 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 COSTS_N_BYTES (3), /* cost of movsx */
91 COSTS_N_BYTES (3), /* cost of movzx */
92 0, /* "large" insn */
93 2, /* MOVE_RATIO */
94 2, /* cost for loading QImode using movzbl */
95 {2, 2, 2}, /* cost of loading integer registers
96 in QImode, HImode and SImode.
97 Relative to reg-reg move (2). */
98 {2, 2, 2}, /* cost of storing integer registers */
99 2, /* cost of reg,reg fld/fst */
100 {2, 2, 2}, /* cost of loading fp registers
101 in SFmode, DFmode and XFmode */
102 {2, 2, 2}, /* cost of storing fp registers
103 in SFmode, DFmode and XFmode */
104 3, /* cost of moving MMX register */
105 {3, 3}, /* cost of loading MMX registers
106 in SImode and DImode */
107 {3, 3}, /* cost of storing MMX registers
108 in SImode and DImode */
109 3, /* cost of moving SSE register */
110 {3, 3, 3}, /* cost of loading SSE registers
111 in SImode, DImode and TImode */
112 {3, 3, 3}, /* cost of storing SSE registers
113 in SImode, DImode and TImode */
114 3, /* MMX or SSE register to integer */
115 0, /* size of prefetch block */
116 0, /* number of parallel prefetches */
117 2, /* Branch cost */
118 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
119 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
120 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
121 COSTS_N_BYTES (2), /* cost of FABS instruction. */
122 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
123 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
124 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
125 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
126 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
127 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}}
130 /* Processor costs (relative to an add) */
131 static const
132 struct processor_costs i386_cost = { /* 386 specific costs */
133 COSTS_N_INSNS (1), /* cost of an add instruction */
134 COSTS_N_INSNS (1), /* cost of a lea instruction */
135 COSTS_N_INSNS (3), /* variable shift costs */
136 COSTS_N_INSNS (2), /* constant shift costs */
137 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
138 COSTS_N_INSNS (6), /* HI */
139 COSTS_N_INSNS (6), /* SI */
140 COSTS_N_INSNS (6), /* DI */
141 COSTS_N_INSNS (6)}, /* other */
142 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
143 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
144 COSTS_N_INSNS (23), /* HI */
145 COSTS_N_INSNS (23), /* SI */
146 COSTS_N_INSNS (23), /* DI */
147 COSTS_N_INSNS (23)}, /* other */
148 COSTS_N_INSNS (3), /* cost of movsx */
149 COSTS_N_INSNS (2), /* cost of movzx */
150 15, /* "large" insn */
151 3, /* MOVE_RATIO */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of storing fp registers
161 in SFmode, DFmode and XFmode */
162 2, /* cost of moving MMX register */
163 {4, 8}, /* cost of loading MMX registers
164 in SImode and DImode */
165 {4, 8}, /* cost of storing MMX registers
166 in SImode and DImode */
167 2, /* cost of moving SSE register */
168 {4, 8, 16}, /* cost of loading SSE registers
169 in SImode, DImode and TImode */
170 {4, 8, 16}, /* cost of storing SSE registers
171 in SImode, DImode and TImode */
172 3, /* MMX or SSE register to integer */
173 0, /* size of prefetch block */
174 0, /* number of parallel prefetches */
175 1, /* Branch cost */
176 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
177 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
178 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
179 COSTS_N_INSNS (22), /* cost of FABS instruction. */
180 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
181 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
182 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
183 DUMMY_STRINGOP_ALGS},
184 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
185 DUMMY_STRINGOP_ALGS},
188 static const
189 struct processor_costs i486_cost = { /* 486 specific costs */
190 COSTS_N_INSNS (1), /* cost of an add instruction */
191 COSTS_N_INSNS (1), /* cost of a lea instruction */
192 COSTS_N_INSNS (3), /* variable shift costs */
193 COSTS_N_INSNS (2), /* constant shift costs */
194 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
195 COSTS_N_INSNS (12), /* HI */
196 COSTS_N_INSNS (12), /* SI */
197 COSTS_N_INSNS (12), /* DI */
198 COSTS_N_INSNS (12)}, /* other */
199 1, /* cost of multiply per each bit set */
200 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
201 COSTS_N_INSNS (40), /* HI */
202 COSTS_N_INSNS (40), /* SI */
203 COSTS_N_INSNS (40), /* DI */
204 COSTS_N_INSNS (40)}, /* other */
205 COSTS_N_INSNS (3), /* cost of movsx */
206 COSTS_N_INSNS (2), /* cost of movzx */
207 15, /* "large" insn */
208 3, /* MOVE_RATIO */
209 4, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {8, 8, 8}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {8, 8, 8}, /* cost of storing fp registers
218 in SFmode, DFmode and XFmode */
219 2, /* cost of moving MMX register */
220 {4, 8}, /* cost of loading MMX registers
221 in SImode and DImode */
222 {4, 8}, /* cost of storing MMX registers
223 in SImode and DImode */
224 2, /* cost of moving SSE register */
225 {4, 8, 16}, /* cost of loading SSE registers
226 in SImode, DImode and TImode */
227 {4, 8, 16}, /* cost of storing SSE registers
228 in SImode, DImode and TImode */
229 3, /* MMX or SSE register to integer */
230 0, /* size of prefetch block */
231 0, /* number of parallel prefetches */
232 1, /* Branch cost */
233 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
234 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
235 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
236 COSTS_N_INSNS (3), /* cost of FABS instruction. */
237 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
238 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
239 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
240 DUMMY_STRINGOP_ALGS},
241 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
242 DUMMY_STRINGOP_ALGS}
245 static const
246 struct processor_costs pentium_cost = {
247 COSTS_N_INSNS (1), /* cost of an add instruction */
248 COSTS_N_INSNS (1), /* cost of a lea instruction */
249 COSTS_N_INSNS (4), /* variable shift costs */
250 COSTS_N_INSNS (1), /* constant shift costs */
251 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
252 COSTS_N_INSNS (11), /* HI */
253 COSTS_N_INSNS (11), /* SI */
254 COSTS_N_INSNS (11), /* DI */
255 COSTS_N_INSNS (11)}, /* other */
256 0, /* cost of multiply per each bit set */
257 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
258 COSTS_N_INSNS (25), /* HI */
259 COSTS_N_INSNS (25), /* SI */
260 COSTS_N_INSNS (25), /* DI */
261 COSTS_N_INSNS (25)}, /* other */
262 COSTS_N_INSNS (3), /* cost of movsx */
263 COSTS_N_INSNS (2), /* cost of movzx */
264 8, /* "large" insn */
265 6, /* MOVE_RATIO */
266 6, /* cost for loading QImode using movzbl */
267 {2, 4, 2}, /* cost of loading integer registers
268 in QImode, HImode and SImode.
269 Relative to reg-reg move (2). */
270 {2, 4, 2}, /* cost of storing integer registers */
271 2, /* cost of reg,reg fld/fst */
272 {2, 2, 6}, /* cost of loading fp registers
273 in SFmode, DFmode and XFmode */
274 {4, 4, 6}, /* cost of storing fp registers
275 in SFmode, DFmode and XFmode */
276 8, /* cost of moving MMX register */
277 {8, 8}, /* cost of loading MMX registers
278 in SImode and DImode */
279 {8, 8}, /* cost of storing MMX registers
280 in SImode and DImode */
281 2, /* cost of moving SSE register */
282 {4, 8, 16}, /* cost of loading SSE registers
283 in SImode, DImode and TImode */
284 {4, 8, 16}, /* cost of storing SSE registers
285 in SImode, DImode and TImode */
286 3, /* MMX or SSE register to integer */
287 0, /* size of prefetch block */
288 0, /* number of parallel prefetches */
289 2, /* Branch cost */
290 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
291 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
292 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
293 COSTS_N_INSNS (1), /* cost of FABS instruction. */
294 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
295 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
296 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
297 DUMMY_STRINGOP_ALGS},
298 {{libcall, {{-1, rep_prefix_4_byte}}},
299 DUMMY_STRINGOP_ALGS}
302 static const
303 struct processor_costs pentiumpro_cost = {
304 COSTS_N_INSNS (1), /* cost of an add instruction */
305 COSTS_N_INSNS (1), /* cost of a lea instruction */
306 COSTS_N_INSNS (1), /* variable shift costs */
307 COSTS_N_INSNS (1), /* constant shift costs */
308 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
309 COSTS_N_INSNS (4), /* HI */
310 COSTS_N_INSNS (4), /* SI */
311 COSTS_N_INSNS (4), /* DI */
312 COSTS_N_INSNS (4)}, /* other */
313 0, /* cost of multiply per each bit set */
314 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
315 COSTS_N_INSNS (17), /* HI */
316 COSTS_N_INSNS (17), /* SI */
317 COSTS_N_INSNS (17), /* DI */
318 COSTS_N_INSNS (17)}, /* other */
319 COSTS_N_INSNS (1), /* cost of movsx */
320 COSTS_N_INSNS (1), /* cost of movzx */
321 8, /* "large" insn */
322 6, /* MOVE_RATIO */
323 2, /* cost for loading QImode using movzbl */
324 {4, 4, 4}, /* cost of loading integer registers
325 in QImode, HImode and SImode.
326 Relative to reg-reg move (2). */
327 {2, 2, 2}, /* cost of storing integer registers */
328 2, /* cost of reg,reg fld/fst */
329 {2, 2, 6}, /* cost of loading fp registers
330 in SFmode, DFmode and XFmode */
331 {4, 4, 6}, /* cost of storing fp registers
332 in SFmode, DFmode and XFmode */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {2, 2, 8}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 3, /* MMX or SSE register to integer */
344 32, /* size of prefetch block */
345 6, /* number of parallel prefetches */
346 2, /* Branch cost */
347 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
348 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
349 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
350 COSTS_N_INSNS (2), /* cost of FABS instruction. */
351 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
352 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
353 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
354 the alignment). For small blocks inline loop is still a noticeable win, for bigger
355 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
356 more expensive startup time in CPU, but after 4K the difference is down in the noise.
358 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
359 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
360 DUMMY_STRINGOP_ALGS},
361 {{rep_prefix_4_byte, {{1024, unrolled_loop},
362 {8192, rep_prefix_4_byte}, {-1, libcall}}},
363 DUMMY_STRINGOP_ALGS}
366 static const
367 struct processor_costs geode_cost = {
368 COSTS_N_INSNS (1), /* cost of an add instruction */
369 COSTS_N_INSNS (1), /* cost of a lea instruction */
370 COSTS_N_INSNS (2), /* variable shift costs */
371 COSTS_N_INSNS (1), /* constant shift costs */
372 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
373 COSTS_N_INSNS (4), /* HI */
374 COSTS_N_INSNS (7), /* SI */
375 COSTS_N_INSNS (7), /* DI */
376 COSTS_N_INSNS (7)}, /* other */
377 0, /* cost of multiply per each bit set */
378 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
379 COSTS_N_INSNS (23), /* HI */
380 COSTS_N_INSNS (39), /* SI */
381 COSTS_N_INSNS (39), /* DI */
382 COSTS_N_INSNS (39)}, /* other */
383 COSTS_N_INSNS (1), /* cost of movsx */
384 COSTS_N_INSNS (1), /* cost of movzx */
385 8, /* "large" insn */
386 4, /* MOVE_RATIO */
387 1, /* cost for loading QImode using movzbl */
388 {1, 1, 1}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {1, 1, 1}, /* cost of storing integer registers */
392 1, /* cost of reg,reg fld/fst */
393 {1, 1, 1}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {4, 6, 6}, /* cost of storing fp registers
396 in SFmode, DFmode and XFmode */
398 1, /* cost of moving MMX register */
399 {1, 1}, /* cost of loading MMX registers
400 in SImode and DImode */
401 {1, 1}, /* cost of storing MMX registers
402 in SImode and DImode */
403 1, /* cost of moving SSE register */
404 {1, 1, 1}, /* cost of loading SSE registers
405 in SImode, DImode and TImode */
406 {1, 1, 1}, /* cost of storing SSE registers
407 in SImode, DImode and TImode */
408 1, /* MMX or SSE register to integer */
409 32, /* size of prefetch block */
410 1, /* number of parallel prefetches */
411 1, /* Branch cost */
412 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
413 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
414 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
415 COSTS_N_INSNS (1), /* cost of FABS instruction. */
416 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
417 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
418 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
419 DUMMY_STRINGOP_ALGS},
420 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
421 DUMMY_STRINGOP_ALGS}
424 static const
425 struct processor_costs k6_cost = {
426 COSTS_N_INSNS (1), /* cost of an add instruction */
427 COSTS_N_INSNS (2), /* cost of a lea instruction */
428 COSTS_N_INSNS (1), /* variable shift costs */
429 COSTS_N_INSNS (1), /* constant shift costs */
430 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
431 COSTS_N_INSNS (3), /* HI */
432 COSTS_N_INSNS (3), /* SI */
433 COSTS_N_INSNS (3), /* DI */
434 COSTS_N_INSNS (3)}, /* other */
435 0, /* cost of multiply per each bit set */
436 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
437 COSTS_N_INSNS (18), /* HI */
438 COSTS_N_INSNS (18), /* SI */
439 COSTS_N_INSNS (18), /* DI */
440 COSTS_N_INSNS (18)}, /* other */
441 COSTS_N_INSNS (2), /* cost of movsx */
442 COSTS_N_INSNS (2), /* cost of movzx */
443 8, /* "large" insn */
444 4, /* MOVE_RATIO */
445 3, /* cost for loading QImode using movzbl */
446 {4, 5, 4}, /* cost of loading integer registers
447 in QImode, HImode and SImode.
448 Relative to reg-reg move (2). */
449 {2, 3, 2}, /* cost of storing integer registers */
450 4, /* cost of reg,reg fld/fst */
451 {6, 6, 6}, /* cost of loading fp registers
452 in SFmode, DFmode and XFmode */
453 {4, 4, 4}, /* cost of storing fp registers
454 in SFmode, DFmode and XFmode */
455 2, /* cost of moving MMX register */
456 {2, 2}, /* cost of loading MMX registers
457 in SImode and DImode */
458 {2, 2}, /* cost of storing MMX registers
459 in SImode and DImode */
460 2, /* cost of moving SSE register */
461 {2, 2, 8}, /* cost of loading SSE registers
462 in SImode, DImode and TImode */
463 {2, 2, 8}, /* cost of storing SSE registers
464 in SImode, DImode and TImode */
465 6, /* MMX or SSE register to integer */
466 32, /* size of prefetch block */
467 1, /* number of parallel prefetches */
468 1, /* Branch cost */
469 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
470 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
471 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
472 COSTS_N_INSNS (2), /* cost of FABS instruction. */
473 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
474 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
475 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
476 DUMMY_STRINGOP_ALGS},
477 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
478 DUMMY_STRINGOP_ALGS}
481 static const
482 struct processor_costs athlon_cost = {
483 COSTS_N_INSNS (1), /* cost of an add instruction */
484 COSTS_N_INSNS (2), /* cost of a lea instruction */
485 COSTS_N_INSNS (1), /* variable shift costs */
486 COSTS_N_INSNS (1), /* constant shift costs */
487 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
488 COSTS_N_INSNS (5), /* HI */
489 COSTS_N_INSNS (5), /* SI */
490 COSTS_N_INSNS (5), /* DI */
491 COSTS_N_INSNS (5)}, /* other */
492 0, /* cost of multiply per each bit set */
493 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
494 COSTS_N_INSNS (26), /* HI */
495 COSTS_N_INSNS (42), /* SI */
496 COSTS_N_INSNS (74), /* DI */
497 COSTS_N_INSNS (74)}, /* other */
498 COSTS_N_INSNS (1), /* cost of movsx */
499 COSTS_N_INSNS (1), /* cost of movzx */
500 8, /* "large" insn */
501 9, /* MOVE_RATIO */
502 4, /* cost for loading QImode using movzbl */
503 {3, 4, 3}, /* cost of loading integer registers
504 in QImode, HImode and SImode.
505 Relative to reg-reg move (2). */
506 {3, 4, 3}, /* cost of storing integer registers */
507 4, /* cost of reg,reg fld/fst */
508 {4, 4, 12}, /* cost of loading fp registers
509 in SFmode, DFmode and XFmode */
510 {6, 6, 8}, /* cost of storing fp registers
511 in SFmode, DFmode and XFmode */
512 2, /* cost of moving MMX register */
513 {4, 4}, /* cost of loading MMX registers
514 in SImode and DImode */
515 {4, 4}, /* cost of storing MMX registers
516 in SImode and DImode */
517 2, /* cost of moving SSE register */
518 {4, 4, 6}, /* cost of loading SSE registers
519 in SImode, DImode and TImode */
520 {4, 4, 5}, /* cost of storing SSE registers
521 in SImode, DImode and TImode */
522 5, /* MMX or SSE register to integer */
523 64, /* size of prefetch block */
524 6, /* number of parallel prefetches */
525 5, /* Branch cost */
526 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
527 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
528 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
529 COSTS_N_INSNS (2), /* cost of FABS instruction. */
530 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
531 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
532 /* For some reason, Athlon deals better with REP prefix (relative to loops)
533 compared to K8. Alignment becomes important after 8 bytes for memcpy and
534 128 bytes for memset. */
535 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
536 DUMMY_STRINGOP_ALGS},
537 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
538 DUMMY_STRINGOP_ALGS}
541 static const
542 struct processor_costs k8_cost = {
543 COSTS_N_INSNS (1), /* cost of an add instruction */
544 COSTS_N_INSNS (2), /* cost of a lea instruction */
545 COSTS_N_INSNS (1), /* variable shift costs */
546 COSTS_N_INSNS (1), /* constant shift costs */
547 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
548 COSTS_N_INSNS (4), /* HI */
549 COSTS_N_INSNS (3), /* SI */
550 COSTS_N_INSNS (4), /* DI */
551 COSTS_N_INSNS (5)}, /* other */
552 0, /* cost of multiply per each bit set */
553 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
554 COSTS_N_INSNS (26), /* HI */
555 COSTS_N_INSNS (42), /* SI */
556 COSTS_N_INSNS (74), /* DI */
557 COSTS_N_INSNS (74)}, /* other */
558 COSTS_N_INSNS (1), /* cost of movsx */
559 COSTS_N_INSNS (1), /* cost of movzx */
560 8, /* "large" insn */
561 9, /* MOVE_RATIO */
562 4, /* cost for loading QImode using movzbl */
563 {3, 4, 3}, /* cost of loading integer registers
564 in QImode, HImode and SImode.
565 Relative to reg-reg move (2). */
566 {3, 4, 3}, /* cost of storing integer registers */
567 4, /* cost of reg,reg fld/fst */
568 {4, 4, 12}, /* cost of loading fp registers
569 in SFmode, DFmode and XFmode */
570 {6, 6, 8}, /* cost of storing fp registers
571 in SFmode, DFmode and XFmode */
572 2, /* cost of moving MMX register */
573 {3, 3}, /* cost of loading MMX registers
574 in SImode and DImode */
575 {4, 4}, /* cost of storing MMX registers
576 in SImode and DImode */
577 2, /* cost of moving SSE register */
578 {4, 3, 6}, /* cost of loading SSE registers
579 in SImode, DImode and TImode */
580 {4, 4, 5}, /* cost of storing SSE registers
581 in SImode, DImode and TImode */
582 5, /* MMX or SSE register to integer */
583 64, /* size of prefetch block */
584 /* New AMD processors never drop prefetches; if they cannot be performed
585 immediately, they are queued. We set number of simultaneous prefetches
586 to a large constant to reflect this (it probably is not a good idea not
587 to limit number of prefetches at all, as their execution also takes some
588 time). */
589 100, /* number of parallel prefetches */
590 5, /* Branch cost */
591 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
592 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
593 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
594 COSTS_N_INSNS (2), /* cost of FABS instruction. */
595 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
596 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
597 /* K8 has optimized REP instruction for medium sized blocks, but for very small
598 blocks it is better to use loop. For large blocks, libcall can do
599 nontemporary accesses and beat inline considerably. */
600 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
601 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
602 {{libcall, {{8, loop}, {24, unrolled_loop},
603 {2048, rep_prefix_4_byte}, {-1, libcall}}},
604 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
607 struct processor_costs amdfam10_cost = {
608 COSTS_N_INSNS (1), /* cost of an add instruction */
609 COSTS_N_INSNS (2), /* cost of a lea instruction */
610 COSTS_N_INSNS (1), /* variable shift costs */
611 COSTS_N_INSNS (1), /* constant shift costs */
612 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
613 COSTS_N_INSNS (4), /* HI */
614 COSTS_N_INSNS (3), /* SI */
615 COSTS_N_INSNS (4), /* DI */
616 COSTS_N_INSNS (5)}, /* other */
617 0, /* cost of multiply per each bit set */
618 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
619 COSTS_N_INSNS (35), /* HI */
620 COSTS_N_INSNS (51), /* SI */
621 COSTS_N_INSNS (83), /* DI */
622 COSTS_N_INSNS (83)}, /* other */
623 COSTS_N_INSNS (1), /* cost of movsx */
624 COSTS_N_INSNS (1), /* cost of movzx */
625 8, /* "large" insn */
626 9, /* MOVE_RATIO */
627 4, /* cost for loading QImode using movzbl */
628 {3, 4, 3}, /* cost of loading integer registers
629 in QImode, HImode and SImode.
630 Relative to reg-reg move (2). */
631 {3, 4, 3}, /* cost of storing integer registers */
632 4, /* cost of reg,reg fld/fst */
633 {4, 4, 12}, /* cost of loading fp registers
634 in SFmode, DFmode and XFmode */
635 {6, 6, 8}, /* cost of storing fp registers
636 in SFmode, DFmode and XFmode */
637 2, /* cost of moving MMX register */
638 {3, 3}, /* cost of loading MMX registers
639 in SImode and DImode */
640 {4, 4}, /* cost of storing MMX registers
641 in SImode and DImode */
642 2, /* cost of moving SSE register */
643 {4, 4, 3}, /* cost of loading SSE registers
644 in SImode, DImode and TImode */
645 {4, 4, 5}, /* cost of storing SSE registers
646 in SImode, DImode and TImode */
647 3, /* MMX or SSE register to integer */
648 /* On K8
649 MOVD reg64, xmmreg Double FSTORE 4
650 MOVD reg32, xmmreg Double FSTORE 4
651 On AMDFAM10
652 MOVD reg64, xmmreg Double FADD 3
653 1/1 1/1
654 MOVD reg32, xmmreg Double FADD 3
655 1/1 1/1 */
656 64, /* size of prefetch block */
657 /* New AMD processors never drop prefetches; if they cannot be performed
658 immediately, they are queued. We set number of simultaneous prefetches
659 to a large constant to reflect this (it probably is not a good idea not
660 to limit number of prefetches at all, as their execution also takes some
661 time). */
662 100, /* number of parallel prefetches */
663 5, /* Branch cost */
664 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
665 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
666 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
667 COSTS_N_INSNS (2), /* cost of FABS instruction. */
668 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
669 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
671 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
672 very small blocks it is better to use loop. For large blocks, libcall can
673 do nontemporary accesses and beat inline considerably. */
674 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
675 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
676 {{libcall, {{8, loop}, {24, unrolled_loop},
677 {2048, rep_prefix_4_byte}, {-1, libcall}}},
678 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
681 static const
682 struct processor_costs pentium4_cost = {
683 COSTS_N_INSNS (1), /* cost of an add instruction */
684 COSTS_N_INSNS (3), /* cost of a lea instruction */
685 COSTS_N_INSNS (4), /* variable shift costs */
686 COSTS_N_INSNS (4), /* constant shift costs */
687 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
688 COSTS_N_INSNS (15), /* HI */
689 COSTS_N_INSNS (15), /* SI */
690 COSTS_N_INSNS (15), /* DI */
691 COSTS_N_INSNS (15)}, /* other */
692 0, /* cost of multiply per each bit set */
693 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
694 COSTS_N_INSNS (56), /* HI */
695 COSTS_N_INSNS (56), /* SI */
696 COSTS_N_INSNS (56), /* DI */
697 COSTS_N_INSNS (56)}, /* other */
698 COSTS_N_INSNS (1), /* cost of movsx */
699 COSTS_N_INSNS (1), /* cost of movzx */
700 16, /* "large" insn */
701 6, /* MOVE_RATIO */
702 2, /* cost for loading QImode using movzbl */
703 {4, 5, 4}, /* cost of loading integer registers
704 in QImode, HImode and SImode.
705 Relative to reg-reg move (2). */
706 {2, 3, 2}, /* cost of storing integer registers */
707 2, /* cost of reg,reg fld/fst */
708 {2, 2, 6}, /* cost of loading fp registers
709 in SFmode, DFmode and XFmode */
710 {4, 4, 6}, /* cost of storing fp registers
711 in SFmode, DFmode and XFmode */
712 2, /* cost of moving MMX register */
713 {2, 2}, /* cost of loading MMX registers
714 in SImode and DImode */
715 {2, 2}, /* cost of storing MMX registers
716 in SImode and DImode */
717 12, /* cost of moving SSE register */
718 {12, 12, 12}, /* cost of loading SSE registers
719 in SImode, DImode and TImode */
720 {2, 2, 8}, /* cost of storing SSE registers
721 in SImode, DImode and TImode */
722 10, /* MMX or SSE register to integer */
723 64, /* size of prefetch block */
724 6, /* number of parallel prefetches */
725 2, /* Branch cost */
726 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
727 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
728 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
729 COSTS_N_INSNS (2), /* cost of FABS instruction. */
730 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
731 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
732 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
733 DUMMY_STRINGOP_ALGS},
734 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
735 {-1, libcall}}},
736 DUMMY_STRINGOP_ALGS},
739 static const
740 struct processor_costs nocona_cost = {
741 COSTS_N_INSNS (1), /* cost of an add instruction */
742 COSTS_N_INSNS (1), /* cost of a lea instruction */
743 COSTS_N_INSNS (1), /* variable shift costs */
744 COSTS_N_INSNS (1), /* constant shift costs */
745 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
746 COSTS_N_INSNS (10), /* HI */
747 COSTS_N_INSNS (10), /* SI */
748 COSTS_N_INSNS (10), /* DI */
749 COSTS_N_INSNS (10)}, /* other */
750 0, /* cost of multiply per each bit set */
751 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
752 COSTS_N_INSNS (66), /* HI */
753 COSTS_N_INSNS (66), /* SI */
754 COSTS_N_INSNS (66), /* DI */
755 COSTS_N_INSNS (66)}, /* other */
756 COSTS_N_INSNS (1), /* cost of movsx */
757 COSTS_N_INSNS (1), /* cost of movzx */
758 16, /* "large" insn */
759 17, /* MOVE_RATIO */
760 4, /* cost for loading QImode using movzbl */
761 {4, 4, 4}, /* cost of loading integer registers
762 in QImode, HImode and SImode.
763 Relative to reg-reg move (2). */
764 {4, 4, 4}, /* cost of storing integer registers */
765 3, /* cost of reg,reg fld/fst */
766 {12, 12, 12}, /* cost of loading fp registers
767 in SFmode, DFmode and XFmode */
768 {4, 4, 4}, /* cost of storing fp registers
769 in SFmode, DFmode and XFmode */
770 6, /* cost of moving MMX register */
771 {12, 12}, /* cost of loading MMX registers
772 in SImode and DImode */
773 {12, 12}, /* cost of storing MMX registers
774 in SImode and DImode */
775 6, /* cost of moving SSE register */
776 {12, 12, 12}, /* cost of loading SSE registers
777 in SImode, DImode and TImode */
778 {12, 12, 12}, /* cost of storing SSE registers
779 in SImode, DImode and TImode */
780 8, /* MMX or SSE register to integer */
781 128, /* size of prefetch block */
782 8, /* number of parallel prefetches */
783 1, /* Branch cost */
784 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
785 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
786 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
787 COSTS_N_INSNS (3), /* cost of FABS instruction. */
788 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
789 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
790 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
791 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
792 {100000, unrolled_loop}, {-1, libcall}}}},
793 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
794 {-1, libcall}}},
795 {libcall, {{24, loop}, {64, unrolled_loop},
796 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
799 static const
800 struct processor_costs core2_cost = {
801 COSTS_N_INSNS (1), /* cost of an add instruction */
802 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
803 COSTS_N_INSNS (1), /* variable shift costs */
804 COSTS_N_INSNS (1), /* constant shift costs */
805 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
806 COSTS_N_INSNS (3), /* HI */
807 COSTS_N_INSNS (3), /* SI */
808 COSTS_N_INSNS (3), /* DI */
809 COSTS_N_INSNS (3)}, /* other */
810 0, /* cost of multiply per each bit set */
811 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
812 COSTS_N_INSNS (22), /* HI */
813 COSTS_N_INSNS (22), /* SI */
814 COSTS_N_INSNS (22), /* DI */
815 COSTS_N_INSNS (22)}, /* other */
816 COSTS_N_INSNS (1), /* cost of movsx */
817 COSTS_N_INSNS (1), /* cost of movzx */
818 8, /* "large" insn */
819 16, /* MOVE_RATIO */
820 2, /* cost for loading QImode using movzbl */
821 {6, 6, 6}, /* cost of loading integer registers
822 in QImode, HImode and SImode.
823 Relative to reg-reg move (2). */
824 {4, 4, 4}, /* cost of storing integer registers */
825 2, /* cost of reg,reg fld/fst */
826 {6, 6, 6}, /* cost of loading fp registers
827 in SFmode, DFmode and XFmode */
828 {4, 4, 4}, /* cost of loading integer registers */
829 2, /* cost of moving MMX register */
830 {6, 6}, /* cost of loading MMX registers
831 in SImode and DImode */
832 {4, 4}, /* cost of storing MMX registers
833 in SImode and DImode */
834 2, /* cost of moving SSE register */
835 {6, 6, 6}, /* cost of loading SSE registers
836 in SImode, DImode and TImode */
837 {4, 4, 4}, /* cost of storing SSE registers
838 in SImode, DImode and TImode */
839 2, /* MMX or SSE register to integer */
840 128, /* size of prefetch block */
841 8, /* number of parallel prefetches */
842 3, /* Branch cost */
843 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
844 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
845 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
846 COSTS_N_INSNS (1), /* cost of FABS instruction. */
847 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
848 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
849 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
850 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
851 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
852 {{libcall, {{8, loop}, {15, unrolled_loop},
853 {2048, rep_prefix_4_byte}, {-1, libcall}}},
854 {libcall, {{24, loop}, {32, unrolled_loop},
855 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
858 /* Generic64 should produce code tuned for Nocona and K8. */
859 static const
860 struct processor_costs generic64_cost = {
861 COSTS_N_INSNS (1), /* cost of an add instruction */
862 /* On all chips taken into consideration lea is 2 cycles and more. With
863 this cost however our current implementation of synth_mult results in
864 use of unnecessary temporary registers causing regression on several
865 SPECfp benchmarks. */
866 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
867 COSTS_N_INSNS (1), /* variable shift costs */
868 COSTS_N_INSNS (1), /* constant shift costs */
869 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
870 COSTS_N_INSNS (4), /* HI */
871 COSTS_N_INSNS (3), /* SI */
872 COSTS_N_INSNS (4), /* DI */
873 COSTS_N_INSNS (2)}, /* other */
874 0, /* cost of multiply per each bit set */
875 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
876 COSTS_N_INSNS (26), /* HI */
877 COSTS_N_INSNS (42), /* SI */
878 COSTS_N_INSNS (74), /* DI */
879 COSTS_N_INSNS (74)}, /* other */
880 COSTS_N_INSNS (1), /* cost of movsx */
881 COSTS_N_INSNS (1), /* cost of movzx */
882 8, /* "large" insn */
883 17, /* MOVE_RATIO */
884 4, /* cost for loading QImode using movzbl */
885 {4, 4, 4}, /* cost of loading integer registers
886 in QImode, HImode and SImode.
887 Relative to reg-reg move (2). */
888 {4, 4, 4}, /* cost of storing integer registers */
889 4, /* cost of reg,reg fld/fst */
890 {12, 12, 12}, /* cost of loading fp registers
891 in SFmode, DFmode and XFmode */
892 {6, 6, 8}, /* cost of storing fp registers
893 in SFmode, DFmode and XFmode */
894 2, /* cost of moving MMX register */
895 {8, 8}, /* cost of loading MMX registers
896 in SImode and DImode */
897 {8, 8}, /* cost of storing MMX registers
898 in SImode and DImode */
899 2, /* cost of moving SSE register */
900 {8, 8, 8}, /* cost of loading SSE registers
901 in SImode, DImode and TImode */
902 {8, 8, 8}, /* cost of storing SSE registers
903 in SImode, DImode and TImode */
904 5, /* MMX or SSE register to integer */
905 64, /* size of prefetch block */
906 6, /* number of parallel prefetches */
907 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
908 is increased to perhaps more appropriate value of 5. */
909 3, /* Branch cost */
910 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
911 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
912 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
913 COSTS_N_INSNS (8), /* cost of FABS instruction. */
914 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
915 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
916 {DUMMY_STRINGOP_ALGS,
917 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
918 {DUMMY_STRINGOP_ALGS,
919 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
922 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
923 static const
924 struct processor_costs generic32_cost = {
925 COSTS_N_INSNS (1), /* cost of an add instruction */
926 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
927 COSTS_N_INSNS (1), /* variable shift costs */
928 COSTS_N_INSNS (1), /* constant shift costs */
929 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
930 COSTS_N_INSNS (4), /* HI */
931 COSTS_N_INSNS (3), /* SI */
932 COSTS_N_INSNS (4), /* DI */
933 COSTS_N_INSNS (2)}, /* other */
934 0, /* cost of multiply per each bit set */
935 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
936 COSTS_N_INSNS (26), /* HI */
937 COSTS_N_INSNS (42), /* SI */
938 COSTS_N_INSNS (74), /* DI */
939 COSTS_N_INSNS (74)}, /* other */
940 COSTS_N_INSNS (1), /* cost of movsx */
941 COSTS_N_INSNS (1), /* cost of movzx */
942 8, /* "large" insn */
943 17, /* MOVE_RATIO */
944 4, /* cost for loading QImode using movzbl */
945 {4, 4, 4}, /* cost of loading integer registers
946 in QImode, HImode and SImode.
947 Relative to reg-reg move (2). */
948 {4, 4, 4}, /* cost of storing integer registers */
949 4, /* cost of reg,reg fld/fst */
950 {12, 12, 12}, /* cost of loading fp registers
951 in SFmode, DFmode and XFmode */
952 {6, 6, 8}, /* cost of storing fp registers
953 in SFmode, DFmode and XFmode */
954 2, /* cost of moving MMX register */
955 {8, 8}, /* cost of loading MMX registers
956 in SImode and DImode */
957 {8, 8}, /* cost of storing MMX registers
958 in SImode and DImode */
959 2, /* cost of moving SSE register */
960 {8, 8, 8}, /* cost of loading SSE registers
961 in SImode, DImode and TImode */
962 {8, 8, 8}, /* cost of storing SSE registers
963 in SImode, DImode and TImode */
964 5, /* MMX or SSE register to integer */
965 64, /* size of prefetch block */
966 6, /* number of parallel prefetches */
967 3, /* Branch cost */
968 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
969 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
970 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
971 COSTS_N_INSNS (8), /* cost of FABS instruction. */
972 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
973 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
974 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
975 DUMMY_STRINGOP_ALGS},
976 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
977 DUMMY_STRINGOP_ALGS},
980 const struct processor_costs *ix86_cost = &pentium_cost;
982 /* Processor feature/optimization bitmasks. */
983 #define m_386 (1<<PROCESSOR_I386)
984 #define m_486 (1<<PROCESSOR_I486)
985 #define m_PENT (1<<PROCESSOR_PENTIUM)
986 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
987 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
988 #define m_NOCONA (1<<PROCESSOR_NOCONA)
989 #define m_CORE2 (1<<PROCESSOR_CORE2)
991 #define m_GEODE (1<<PROCESSOR_GEODE)
992 #define m_K6 (1<<PROCESSOR_K6)
993 #define m_K6_GEODE (m_K6 | m_GEODE)
994 #define m_K8 (1<<PROCESSOR_K8)
995 #define m_ATHLON (1<<PROCESSOR_ATHLON)
996 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
997 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
998 #define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
1000 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1001 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1003 /* Generic instruction choice should be common subset of supported CPUs
1004 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1005 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1007 /* Feature tests against the various tunings. */
1008 unsigned int ix86_tune_features[X86_TUNE_LAST] = {
1009 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1010 negatively, so enabling for Generic64 seems like good code size
1011 tradeoff. We can't enable it for 32bit generic because it does not
1012 work well with PPro base chips. */
1013 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC64,
1015 /* X86_TUNE_PUSH_MEMORY */
1016 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
1017 | m_NOCONA | m_CORE2 | m_GENERIC,
1019 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1020 m_486 | m_PENT,
1022 /* X86_TUNE_USE_BIT_TEST */
1023 m_386,
1025 /* X86_TUNE_UNROLL_STRLEN */
1026 m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6 | m_CORE2 | m_GENERIC,
1028 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1029 m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_GENERIC,
1031 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1032 on simulation result. But after P4 was made, no performance benefit
1033 was observed with branch hints. It also increases the code size.
1034 As a result, icc never generates branch hints. */
1037 /* X86_TUNE_DOUBLE_WITH_ADD */
1038 ~m_386,
1040 /* X86_TUNE_USE_SAHF */
1041 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1042 | m_NOCONA | m_CORE2 | m_GENERIC,
1044 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1045 partial dependencies. */
1046 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
1047 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1049 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1050 register stalls on Generic32 compilation setting as well. However
1051 in current implementation the partial register stalls are not eliminated
1052 very well - they can be introduced via subregs synthesized by combine
1053 and can happen in caller/callee saving sequences. Because this option
1054 pays back little on PPro based chips and is in conflict with partial reg
1055 dependencies used by Athlon/P4 based chips, it is better to leave it off
1056 for generic32 for now. */
1057 m_PPRO,
1059 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1060 m_CORE2 | m_GENERIC,
1062 /* X86_TUNE_USE_HIMODE_FIOP */
1063 m_386 | m_486 | m_K6_GEODE,
1065 /* X86_TUNE_USE_SIMODE_FIOP */
1066 ~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT | m_CORE2 | m_GENERIC),
1068 /* X86_TUNE_USE_MOV0 */
1069 m_K6,
1071 /* X86_TUNE_USE_CLTD */
1072 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1074 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1075 m_PENT4,
1077 /* X86_TUNE_SPLIT_LONG_MOVES */
1078 m_PPRO,
1080 /* X86_TUNE_READ_MODIFY_WRITE */
1081 ~m_PENT,
1083 /* X86_TUNE_READ_MODIFY */
1084 ~(m_PENT | m_PPRO),
1086 /* X86_TUNE_PROMOTE_QIMODE */
1087 m_K6_GEODE | m_PENT | m_386 | m_486 | m_ATHLON_K8_AMDFAM10 | m_CORE2
1088 | m_GENERIC /* | m_PENT4 ? */,
1090 /* X86_TUNE_FAST_PREFIX */
1091 ~(m_PENT | m_486 | m_386),
1093 /* X86_TUNE_SINGLE_STRINGOP */
1094 m_386 | m_PENT4 | m_NOCONA,
1096 /* X86_TUNE_QIMODE_MATH */
1099 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1100 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1101 might be considered for Generic32 if our scheme for avoiding partial
1102 stalls was more effective. */
1103 ~m_PPRO,
1105 /* X86_TUNE_PROMOTE_QI_REGS */
1108 /* X86_TUNE_PROMOTE_HI_REGS */
1109 m_PPRO,
1111 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1112 m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1114 /* X86_TUNE_ADD_ESP_8 */
1115 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386
1116 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1118 /* X86_TUNE_SUB_ESP_4 */
1119 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1121 /* X86_TUNE_SUB_ESP_8 */
1122 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486
1123 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1125 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1126 for DFmode copies */
1127 ~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1128 | m_GENERIC | m_GEODE),
1130 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1131 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1133 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1134 conflict here in between PPro/Pentium4 based chips that thread 128bit
1135 SSE registers as single units versus K8 based chips that divide SSE
1136 registers to two 64bit halves. This knob promotes all store destinations
1137 to be 128bit to allow register renaming on 128bit SSE units, but usually
1138 results in one extra microop on 64bit SSE units. Experimental results
1139 shows that disabling this option on P4 brings over 20% SPECfp regression,
1140 while enabling it on K8 brings roughly 2.4% regression that can be partly
1141 masked by careful scheduling of moves. */
1142 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1144 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1145 m_AMDFAM10,
1147 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1148 are resolved on SSE register parts instead of whole registers, so we may
1149 maintain just lower part of scalar values in proper format leaving the
1150 upper part undefined. */
1151 m_ATHLON_K8,
1153 /* X86_TUNE_SSE_TYPELESS_STORES */
1154 m_ATHLON_K8_AMDFAM10,
1156 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1157 m_PPRO | m_PENT4 | m_NOCONA,
1159 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1160 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1162 /* X86_TUNE_PROLOGUE_USING_MOVE */
1163 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1165 /* X86_TUNE_EPILOGUE_USING_MOVE */
1166 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1168 /* X86_TUNE_SHIFT1 */
1169 ~m_486,
1171 /* X86_TUNE_USE_FFREEP */
1172 m_ATHLON_K8_AMDFAM10,
1174 /* X86_TUNE_INTER_UNIT_MOVES */
1175 ~(m_ATHLON_K8_AMDFAM10 | m_GENERIC),
1177 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1178 than 4 branch instructions in the 16 byte window. */
1179 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1181 /* X86_TUNE_SCHEDULE */
1182 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1184 /* X86_TUNE_USE_BT */
1185 m_ATHLON_K8_AMDFAM10,
1187 /* X86_TUNE_USE_INCDEC */
1188 ~(m_PENT4 | m_NOCONA | m_GENERIC),
1190 /* X86_TUNE_PAD_RETURNS */
1191 m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC,
1193 /* X86_TUNE_EXT_80387_CONSTANTS */
1194 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1196 /* X86_TUNE_SHORTEN_X87_SSE */
1197 ~m_K8,
1199 /* X86_TUNE_AVOID_VECTOR_DECODE */
1200 m_K8 | m_GENERIC64,
1202 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1203 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1204 ~(m_386 | m_486),
1206 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1207 vector path on AMD machines. */
1208 m_K8 | m_GENERIC64 | m_AMDFAM10,
1210 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1211 machines. */
1212 m_K8 | m_GENERIC64 | m_AMDFAM10,
1214 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1215 than a MOV. */
1216 m_PENT,
1218 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1219 but one byte longer. */
1220 m_PENT,
1222 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1223 operand that cannot be represented using a modRM byte. The XOR
1224 replacement is long decoded, so this split helps here as well. */
1225 m_K6,
1228 /* Feature tests against the various architecture variations. */
1229 unsigned int ix86_arch_features[X86_ARCH_LAST] = {
1230 /* X86_ARCH_CMOVE */
1231 m_PPRO | m_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA,
1233 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1234 ~m_386,
1236 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1237 ~(m_386 | m_486),
1239 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1240 ~m_386,
1242 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1243 ~m_386,
1246 static const unsigned int x86_accumulate_outgoing_args
1247 = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1249 static const unsigned int x86_arch_always_fancy_math_387
1250 = m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4
1251 | m_NOCONA | m_CORE2 | m_GENERIC;
1253 static enum stringop_alg stringop_alg = no_stringop;
1255 /* In case the average insn count for single function invocation is
1256 lower than this constant, emit fast (but longer) prologue and
1257 epilogue code. */
1258 #define FAST_PROLOGUE_INSN_COUNT 20
1260 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1261 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1262 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1263 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1265 /* Array of the smallest class containing reg number REGNO, indexed by
1266 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1268 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1270 /* ax, dx, cx, bx */
1271 AREG, DREG, CREG, BREG,
1272 /* si, di, bp, sp */
1273 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1274 /* FP registers */
1275 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1276 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1277 /* arg pointer */
1278 NON_Q_REGS,
1279 /* flags, fpsr, fpcr, frame */
1280 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1281 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1282 SSE_REGS, SSE_REGS,
1283 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1284 MMX_REGS, MMX_REGS,
1285 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1286 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1287 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1288 SSE_REGS, SSE_REGS,
1291 /* The "default" register map used in 32bit mode. */
1293 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1295 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1296 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1297 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1298 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1299 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1300 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1301 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1304 static int const x86_64_int_parameter_registers[6] =
1306 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1307 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1310 static int const x86_64_ms_abi_int_parameter_registers[4] =
1312 2 /*RCX*/, 1 /*RDX*/,
1313 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1316 static int const x86_64_int_return_registers[4] =
1318 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1321 /* The "default" register map used in 64bit mode. */
1322 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1324 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1325 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1326 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1327 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1328 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1329 8,9,10,11,12,13,14,15, /* extended integer registers */
1330 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1333 /* Define the register numbers to be used in Dwarf debugging information.
1334 The SVR4 reference port C compiler uses the following register numbers
1335 in its Dwarf output code:
1336 0 for %eax (gcc regno = 0)
1337 1 for %ecx (gcc regno = 2)
1338 2 for %edx (gcc regno = 1)
1339 3 for %ebx (gcc regno = 3)
1340 4 for %esp (gcc regno = 7)
1341 5 for %ebp (gcc regno = 6)
1342 6 for %esi (gcc regno = 4)
1343 7 for %edi (gcc regno = 5)
1344 The following three DWARF register numbers are never generated by
1345 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1346 believes these numbers have these meanings.
1347 8 for %eip (no gcc equivalent)
1348 9 for %eflags (gcc regno = 17)
1349 10 for %trapno (no gcc equivalent)
1350 It is not at all clear how we should number the FP stack registers
1351 for the x86 architecture. If the version of SDB on x86/svr4 were
1352 a bit less brain dead with respect to floating-point then we would
1353 have a precedent to follow with respect to DWARF register numbers
1354 for x86 FP registers, but the SDB on x86/svr4 is so completely
1355 broken with respect to FP registers that it is hardly worth thinking
1356 of it as something to strive for compatibility with.
1357 The version of x86/svr4 SDB I have at the moment does (partially)
1358 seem to believe that DWARF register number 11 is associated with
1359 the x86 register %st(0), but that's about all. Higher DWARF
1360 register numbers don't seem to be associated with anything in
1361 particular, and even for DWARF regno 11, SDB only seems to under-
1362 stand that it should say that a variable lives in %st(0) (when
1363 asked via an `=' command) if we said it was in DWARF regno 11,
1364 but SDB still prints garbage when asked for the value of the
1365 variable in question (via a `/' command).
1366 (Also note that the labels SDB prints for various FP stack regs
1367 when doing an `x' command are all wrong.)
1368 Note that these problems generally don't affect the native SVR4
1369 C compiler because it doesn't allow the use of -O with -g and
1370 because when it is *not* optimizing, it allocates a memory
1371 location for each floating-point variable, and the memory
1372 location is what gets described in the DWARF AT_location
1373 attribute for the variable in question.
1374 Regardless of the severe mental illness of the x86/svr4 SDB, we
1375 do something sensible here and we use the following DWARF
1376 register numbers. Note that these are all stack-top-relative
1377 numbers.
1378 11 for %st(0) (gcc regno = 8)
1379 12 for %st(1) (gcc regno = 9)
1380 13 for %st(2) (gcc regno = 10)
1381 14 for %st(3) (gcc regno = 11)
1382 15 for %st(4) (gcc regno = 12)
1383 16 for %st(5) (gcc regno = 13)
1384 17 for %st(6) (gcc regno = 14)
1385 18 for %st(7) (gcc regno = 15)
1387 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1389 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1390 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1391 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1392 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1393 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1394 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1395 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1398 /* Test and compare insns in i386.md store the information needed to
1399 generate branch and scc insns here. */
1401 rtx ix86_compare_op0 = NULL_RTX;
1402 rtx ix86_compare_op1 = NULL_RTX;
1403 rtx ix86_compare_emitted = NULL_RTX;
1405 /* Size of the register save area. */
1406 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1408 /* Define the structure for the machine field in struct function. */
1410 struct stack_local_entry GTY(())
1412 unsigned short mode;
1413 unsigned short n;
1414 rtx rtl;
1415 struct stack_local_entry *next;
1418 /* Structure describing stack frame layout.
1419 Stack grows downward:
1421 [arguments]
1422 <- ARG_POINTER
1423 saved pc
1425 saved frame pointer if frame_pointer_needed
1426 <- HARD_FRAME_POINTER
1427 [saved regs]
1429 [padding1] \
1431 [va_arg registers] (
1432 > to_allocate <- FRAME_POINTER
1433 [frame] (
1435 [padding2] /
1437 struct ix86_frame
1439 int nregs;
1440 int padding1;
1441 int va_arg_size;
1442 HOST_WIDE_INT frame;
1443 int padding2;
1444 int outgoing_arguments_size;
1445 int red_zone_size;
1447 HOST_WIDE_INT to_allocate;
1448 /* The offsets relative to ARG_POINTER. */
1449 HOST_WIDE_INT frame_pointer_offset;
1450 HOST_WIDE_INT hard_frame_pointer_offset;
1451 HOST_WIDE_INT stack_pointer_offset;
1453 /* When save_regs_using_mov is set, emit prologue using
1454 move instead of push instructions. */
1455 bool save_regs_using_mov;
1458 /* Code model option. */
1459 enum cmodel ix86_cmodel;
1460 /* Asm dialect. */
1461 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1462 /* TLS dialects. */
1463 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1465 /* Which unit we are generating floating point math for. */
1466 enum fpmath_unit ix86_fpmath;
1468 /* Which cpu are we scheduling for. */
1469 enum processor_type ix86_tune;
1471 /* Which instruction set architecture to use. */
1472 enum processor_type ix86_arch;
1474 /* true if sse prefetch instruction is not NOOP. */
1475 int x86_prefetch_sse;
1477 /* ix86_regparm_string as a number */
1478 static int ix86_regparm;
1480 /* -mstackrealign option */
1481 extern int ix86_force_align_arg_pointer;
1482 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1484 /* Preferred alignment for stack boundary in bits. */
1485 unsigned int ix86_preferred_stack_boundary;
1487 /* Values 1-5: see jump.c */
1488 int ix86_branch_cost;
1490 /* Variables which are this size or smaller are put in the data/bss
1491 or ldata/lbss sections. */
1493 int ix86_section_threshold = 65536;
1495 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1496 char internal_label_prefix[16];
1497 int internal_label_prefix_len;
1499 /* Register class used for passing given 64bit part of the argument.
1500 These represent classes as documented by the PS ABI, with the exception
1501 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1502 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1504 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1505 whenever possible (upper half does contain padding). */
1506 enum x86_64_reg_class
1508 X86_64_NO_CLASS,
1509 X86_64_INTEGER_CLASS,
1510 X86_64_INTEGERSI_CLASS,
1511 X86_64_SSE_CLASS,
1512 X86_64_SSESF_CLASS,
1513 X86_64_SSEDF_CLASS,
1514 X86_64_SSEUP_CLASS,
1515 X86_64_X87_CLASS,
1516 X86_64_X87UP_CLASS,
1517 X86_64_COMPLEX_X87_CLASS,
1518 X86_64_MEMORY_CLASS
1520 static const char * const x86_64_reg_class_name[] =
1522 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1523 "sseup", "x87", "x87up", "cplx87", "no"
1526 #define MAX_CLASSES 4
1528 /* Table of constants used by fldpi, fldln2, etc.... */
1529 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1530 static bool ext_80387_constants_init = 0;
1533 static struct machine_function * ix86_init_machine_status (void);
1534 static rtx ix86_function_value (tree, tree, bool);
1535 static int ix86_function_regparm (tree, tree);
1536 static void ix86_compute_frame_layout (struct ix86_frame *);
1537 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1538 rtx, rtx, int);
1541 /* The svr4 ABI for the i386 says that records and unions are returned
1542 in memory. */
1543 #ifndef DEFAULT_PCC_STRUCT_RETURN
1544 #define DEFAULT_PCC_STRUCT_RETURN 1
1545 #endif
1547 /* Implement TARGET_HANDLE_OPTION. */
1549 static bool
1550 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1552 switch (code)
1554 case OPT_m3dnow:
1555 if (!value)
1557 target_flags &= ~MASK_3DNOW_A;
1558 target_flags_explicit |= MASK_3DNOW_A;
1560 return true;
1562 case OPT_mmmx:
1563 if (!value)
1565 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1566 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1568 return true;
1570 case OPT_msse:
1571 if (!value)
1573 target_flags &= ~(MASK_SSE2 | MASK_SSE3 | MASK_SSSE3
1574 | MASK_SSE4A);
1575 target_flags_explicit |= (MASK_SSE2 | MASK_SSE3 | MASK_SSSE3
1576 | MASK_SSE4A);
1578 return true;
1580 case OPT_msse2:
1581 if (!value)
1583 target_flags &= ~(MASK_SSE3 | MASK_SSSE3 | MASK_SSE4A);
1584 target_flags_explicit |= MASK_SSE3 | MASK_SSSE3 | MASK_SSE4A;
1586 return true;
1588 case OPT_msse3:
1589 if (!value)
1591 target_flags &= ~(MASK_SSSE3 | MASK_SSE4A);
1592 target_flags_explicit |= MASK_SSSE3 | MASK_SSE4A;
1594 return true;
1596 case OPT_mssse3:
1597 if (!value)
1599 target_flags &= ~(MASK_SSE4_1 | MASK_SSE4A);
1600 target_flags_explicit |= MASK_SSE4_1 | MASK_SSE4A;
1602 return true;
1604 case OPT_msse4_1:
1605 if (!value)
1607 target_flags &= ~MASK_SSE4A;
1608 target_flags_explicit |= MASK_SSE4A;
1610 return true;
1612 case OPT_msse4a:
1613 if (!value)
1615 target_flags &= ~MASK_SSE4_1;
1616 target_flags_explicit |= MASK_SSE4_1;
1618 return true;
1620 default:
1621 return true;
1625 /* Sometimes certain combinations of command options do not make
1626 sense on a particular target machine. You can define a macro
1627 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1628 defined, is executed once just after all the command options have
1629 been parsed.
1631 Don't use this macro to turn on various extra optimizations for
1632 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1634 void
1635 override_options (void)
1637 int i;
1638 int ix86_tune_defaulted = 0;
1639 unsigned int ix86_arch_mask, ix86_tune_mask;
1641 /* Comes from final.c -- no real reason to change it. */
1642 #define MAX_CODE_ALIGN 16
1644 static struct ptt
1646 const struct processor_costs *cost; /* Processor costs */
1647 const int target_enable; /* Target flags to enable. */
1648 const int target_disable; /* Target flags to disable. */
1649 const int align_loop; /* Default alignments. */
1650 const int align_loop_max_skip;
1651 const int align_jump;
1652 const int align_jump_max_skip;
1653 const int align_func;
1655 const processor_target_table[PROCESSOR_max] =
1657 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1658 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1659 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1660 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1661 {&geode_cost, 0, 0, 0, 0, 0, 0, 0},
1662 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1663 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1664 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1665 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1666 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1667 {&core2_cost, 0, 0, 16, 7, 16, 7, 16},
1668 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1669 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16},
1670 {&amdfam10_cost, 0, 0, 32, 24, 32, 7, 32}
1673 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1674 static struct pta
1676 const char *const name; /* processor name or nickname. */
1677 const enum processor_type processor;
1678 const enum pta_flags
1680 PTA_SSE = 1 << 0,
1681 PTA_SSE2 = 1 << 1,
1682 PTA_SSE3 = 1 << 2,
1683 PTA_MMX = 1 << 3,
1684 PTA_PREFETCH_SSE = 1 << 4,
1685 PTA_3DNOW = 1 << 5,
1686 PTA_3DNOW_A = 1 << 6,
1687 PTA_64BIT = 1 << 7,
1688 PTA_SSSE3 = 1 << 8,
1689 PTA_CX16 = 1 << 9,
1690 PTA_POPCNT = 1 << 10,
1691 PTA_ABM = 1 << 11,
1692 PTA_SSE4A = 1 << 12,
1693 PTA_NO_SAHF = 1 << 13,
1694 PTA_SSE4_1 = 1 << 14
1695 } flags;
1697 const processor_alias_table[] =
1699 {"i386", PROCESSOR_I386, 0},
1700 {"i486", PROCESSOR_I486, 0},
1701 {"i586", PROCESSOR_PENTIUM, 0},
1702 {"pentium", PROCESSOR_PENTIUM, 0},
1703 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1704 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1705 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1706 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1707 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1708 {"i686", PROCESSOR_PENTIUMPRO, 0},
1709 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1710 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1711 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1712 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1713 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1714 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1715 | PTA_MMX | PTA_PREFETCH_SSE},
1716 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1717 | PTA_MMX | PTA_PREFETCH_SSE},
1718 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1719 | PTA_MMX | PTA_PREFETCH_SSE},
1720 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1721 | PTA_MMX | PTA_PREFETCH_SSE
1722 | PTA_CX16 | PTA_NO_SAHF},
1723 {"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3
1724 | PTA_64BIT | PTA_MMX
1725 | PTA_PREFETCH_SSE | PTA_CX16},
1726 {"geode", PROCESSOR_GEODE, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1727 | PTA_3DNOW_A},
1728 {"k6", PROCESSOR_K6, PTA_MMX},
1729 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1730 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1731 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1732 | PTA_3DNOW_A},
1733 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1734 | PTA_3DNOW | PTA_3DNOW_A},
1735 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1736 | PTA_3DNOW_A | PTA_SSE},
1737 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1738 | PTA_3DNOW_A | PTA_SSE},
1739 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1740 | PTA_3DNOW_A | PTA_SSE},
1741 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1742 | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
1743 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1744 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
1745 | PTA_NO_SAHF},
1746 {"k8-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1747 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
1748 | PTA_SSE3 | PTA_NO_SAHF},
1749 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1750 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1751 | PTA_SSE2 | PTA_NO_SAHF},
1752 {"opteron-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1753 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1754 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
1755 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1756 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1757 | PTA_SSE2 | PTA_NO_SAHF},
1758 {"athlon64-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1759 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1760 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
1761 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1762 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1763 | PTA_SSE2 | PTA_NO_SAHF},
1764 {"amdfam10", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1765 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1766 | PTA_SSE2 | PTA_SSE3 | PTA_POPCNT
1767 | PTA_ABM | PTA_SSE4A | PTA_CX16},
1768 {"barcelona", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1769 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1770 | PTA_SSE2 | PTA_SSE3 | PTA_POPCNT
1771 | PTA_ABM | PTA_SSE4A | PTA_CX16},
1772 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1773 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1776 int const pta_size = ARRAY_SIZE (processor_alias_table);
1778 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1779 SUBTARGET_OVERRIDE_OPTIONS;
1780 #endif
1782 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1783 SUBSUBTARGET_OVERRIDE_OPTIONS;
1784 #endif
1786 /* -fPIC is the default for x86_64. */
1787 if (TARGET_MACHO && TARGET_64BIT)
1788 flag_pic = 2;
1790 /* Set the default values for switches whose default depends on TARGET_64BIT
1791 in case they weren't overwritten by command line options. */
1792 if (TARGET_64BIT)
1794 /* Mach-O doesn't support omitting the frame pointer for now. */
1795 if (flag_omit_frame_pointer == 2)
1796 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1797 if (flag_asynchronous_unwind_tables == 2)
1798 flag_asynchronous_unwind_tables = 1;
1799 if (flag_pcc_struct_return == 2)
1800 flag_pcc_struct_return = 0;
1802 else
1804 if (flag_omit_frame_pointer == 2)
1805 flag_omit_frame_pointer = 0;
1806 if (flag_asynchronous_unwind_tables == 2)
1807 flag_asynchronous_unwind_tables = 0;
1808 if (flag_pcc_struct_return == 2)
1809 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1812 /* Need to check -mtune=generic first. */
1813 if (ix86_tune_string)
1815 if (!strcmp (ix86_tune_string, "generic")
1816 || !strcmp (ix86_tune_string, "i686")
1817 /* As special support for cross compilers we read -mtune=native
1818 as -mtune=generic. With native compilers we won't see the
1819 -mtune=native, as it was changed by the driver. */
1820 || !strcmp (ix86_tune_string, "native"))
1822 if (TARGET_64BIT)
1823 ix86_tune_string = "generic64";
1824 else
1825 ix86_tune_string = "generic32";
1827 else if (!strncmp (ix86_tune_string, "generic", 7))
1828 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1830 else
1832 if (ix86_arch_string)
1833 ix86_tune_string = ix86_arch_string;
1834 if (!ix86_tune_string)
1836 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1837 ix86_tune_defaulted = 1;
1840 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1841 need to use a sensible tune option. */
1842 if (!strcmp (ix86_tune_string, "generic")
1843 || !strcmp (ix86_tune_string, "x86-64")
1844 || !strcmp (ix86_tune_string, "i686"))
1846 if (TARGET_64BIT)
1847 ix86_tune_string = "generic64";
1848 else
1849 ix86_tune_string = "generic32";
1852 if (ix86_stringop_string)
1854 if (!strcmp (ix86_stringop_string, "rep_byte"))
1855 stringop_alg = rep_prefix_1_byte;
1856 else if (!strcmp (ix86_stringop_string, "libcall"))
1857 stringop_alg = libcall;
1858 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
1859 stringop_alg = rep_prefix_4_byte;
1860 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
1861 stringop_alg = rep_prefix_8_byte;
1862 else if (!strcmp (ix86_stringop_string, "byte_loop"))
1863 stringop_alg = loop_1_byte;
1864 else if (!strcmp (ix86_stringop_string, "loop"))
1865 stringop_alg = loop;
1866 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
1867 stringop_alg = unrolled_loop;
1868 else
1869 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
1871 if (!strcmp (ix86_tune_string, "x86-64"))
1872 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1873 "-mtune=generic instead as appropriate.");
1875 if (!ix86_arch_string)
1876 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1877 if (!strcmp (ix86_arch_string, "generic"))
1878 error ("generic CPU can be used only for -mtune= switch");
1879 if (!strncmp (ix86_arch_string, "generic", 7))
1880 error ("bad value (%s) for -march= switch", ix86_arch_string);
1882 if (ix86_cmodel_string != 0)
1884 if (!strcmp (ix86_cmodel_string, "small"))
1885 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1886 else if (!strcmp (ix86_cmodel_string, "medium"))
1887 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1888 else if (!strcmp (ix86_cmodel_string, "large"))
1889 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
1890 else if (flag_pic)
1891 error ("code model %s does not support PIC mode", ix86_cmodel_string);
1892 else if (!strcmp (ix86_cmodel_string, "32"))
1893 ix86_cmodel = CM_32;
1894 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1895 ix86_cmodel = CM_KERNEL;
1896 else
1897 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1899 else
1901 /* For TARGET_64BIT_MS_ABI, force pic on, in order to enable the
1902 use of rip-relative addressing. This eliminates fixups that
1903 would otherwise be needed if this object is to be placed in a
1904 DLL, and is essentially just as efficient as direct addressing. */
1905 if (TARGET_64BIT_MS_ABI)
1906 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
1907 else if (TARGET_64BIT)
1908 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1909 else
1910 ix86_cmodel = CM_32;
1912 if (ix86_asm_string != 0)
1914 if (! TARGET_MACHO
1915 && !strcmp (ix86_asm_string, "intel"))
1916 ix86_asm_dialect = ASM_INTEL;
1917 else if (!strcmp (ix86_asm_string, "att"))
1918 ix86_asm_dialect = ASM_ATT;
1919 else
1920 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1922 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1923 error ("code model %qs not supported in the %s bit mode",
1924 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1925 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1926 sorry ("%i-bit mode not compiled in",
1927 (target_flags & MASK_64BIT) ? 64 : 32);
1929 for (i = 0; i < pta_size; i++)
1930 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1932 ix86_arch = processor_alias_table[i].processor;
1933 /* Default cpu tuning to the architecture. */
1934 ix86_tune = ix86_arch;
1935 if (processor_alias_table[i].flags & PTA_MMX
1936 && !(target_flags_explicit & MASK_MMX))
1937 target_flags |= MASK_MMX;
1938 if (processor_alias_table[i].flags & PTA_3DNOW
1939 && !(target_flags_explicit & MASK_3DNOW))
1940 target_flags |= MASK_3DNOW;
1941 if (processor_alias_table[i].flags & PTA_3DNOW_A
1942 && !(target_flags_explicit & MASK_3DNOW_A))
1943 target_flags |= MASK_3DNOW_A;
1944 if (processor_alias_table[i].flags & PTA_SSE
1945 && !(target_flags_explicit & MASK_SSE))
1946 target_flags |= MASK_SSE;
1947 if (processor_alias_table[i].flags & PTA_SSE2
1948 && !(target_flags_explicit & MASK_SSE2))
1949 target_flags |= MASK_SSE2;
1950 if (processor_alias_table[i].flags & PTA_SSE3
1951 && !(target_flags_explicit & MASK_SSE3))
1952 target_flags |= MASK_SSE3;
1953 if (processor_alias_table[i].flags & PTA_SSSE3
1954 && !(target_flags_explicit & MASK_SSSE3))
1955 target_flags |= MASK_SSSE3;
1956 if (processor_alias_table[i].flags & PTA_SSE4_1
1957 && !(target_flags_explicit & MASK_SSE4_1))
1958 target_flags |= MASK_SSE4_1;
1959 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1960 x86_prefetch_sse = true;
1961 if (processor_alias_table[i].flags & PTA_CX16)
1962 x86_cmpxchg16b = true;
1963 if (processor_alias_table[i].flags & PTA_POPCNT
1964 && !(target_flags_explicit & MASK_POPCNT))
1965 target_flags |= MASK_POPCNT;
1966 if (processor_alias_table[i].flags & PTA_ABM
1967 && !(target_flags_explicit & MASK_ABM))
1968 target_flags |= MASK_ABM;
1969 if (processor_alias_table[i].flags & PTA_SSE4A
1970 && !(target_flags_explicit & MASK_SSE4A))
1971 target_flags |= MASK_SSE4A;
1972 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF)))
1973 x86_sahf = true;
1974 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1975 error ("CPU you selected does not support x86-64 "
1976 "instruction set");
1977 break;
1980 if (i == pta_size)
1981 error ("bad value (%s) for -march= switch", ix86_arch_string);
1983 ix86_arch_mask = 1u << ix86_arch;
1984 for (i = 0; i < X86_ARCH_LAST; ++i)
1985 ix86_arch_features[i] &= ix86_arch_mask;
1987 for (i = 0; i < pta_size; i++)
1988 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1990 ix86_tune = processor_alias_table[i].processor;
1991 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1993 if (ix86_tune_defaulted)
1995 ix86_tune_string = "x86-64";
1996 for (i = 0; i < pta_size; i++)
1997 if (! strcmp (ix86_tune_string,
1998 processor_alias_table[i].name))
1999 break;
2000 ix86_tune = processor_alias_table[i].processor;
2002 else
2003 error ("CPU you selected does not support x86-64 "
2004 "instruction set");
2006 /* Intel CPUs have always interpreted SSE prefetch instructions as
2007 NOPs; so, we can enable SSE prefetch instructions even when
2008 -mtune (rather than -march) points us to a processor that has them.
2009 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2010 higher processors. */
2011 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
2012 x86_prefetch_sse = true;
2013 break;
2015 if (i == pta_size)
2016 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2018 ix86_tune_mask = 1u << ix86_tune;
2019 for (i = 0; i < X86_TUNE_LAST; ++i)
2020 ix86_tune_features[i] &= ix86_tune_mask;
2022 if (optimize_size)
2023 ix86_cost = &size_cost;
2024 else
2025 ix86_cost = processor_target_table[ix86_tune].cost;
2026 target_flags |= processor_target_table[ix86_tune].target_enable;
2027 target_flags &= ~processor_target_table[ix86_tune].target_disable;
2029 /* Arrange to set up i386_stack_locals for all functions. */
2030 init_machine_status = ix86_init_machine_status;
2032 /* Validate -mregparm= value. */
2033 if (ix86_regparm_string)
2035 if (TARGET_64BIT)
2036 warning (0, "-mregparm is ignored in 64-bit mode");
2037 i = atoi (ix86_regparm_string);
2038 if (i < 0 || i > REGPARM_MAX)
2039 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2040 else
2041 ix86_regparm = i;
2043 if (TARGET_64BIT)
2044 ix86_regparm = REGPARM_MAX;
2046 /* If the user has provided any of the -malign-* options,
2047 warn and use that value only if -falign-* is not set.
2048 Remove this code in GCC 3.2 or later. */
2049 if (ix86_align_loops_string)
2051 warning (0, "-malign-loops is obsolete, use -falign-loops");
2052 if (align_loops == 0)
2054 i = atoi (ix86_align_loops_string);
2055 if (i < 0 || i > MAX_CODE_ALIGN)
2056 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2057 else
2058 align_loops = 1 << i;
2062 if (ix86_align_jumps_string)
2064 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2065 if (align_jumps == 0)
2067 i = atoi (ix86_align_jumps_string);
2068 if (i < 0 || i > MAX_CODE_ALIGN)
2069 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2070 else
2071 align_jumps = 1 << i;
2075 if (ix86_align_funcs_string)
2077 warning (0, "-malign-functions is obsolete, use -falign-functions");
2078 if (align_functions == 0)
2080 i = atoi (ix86_align_funcs_string);
2081 if (i < 0 || i > MAX_CODE_ALIGN)
2082 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2083 else
2084 align_functions = 1 << i;
2088 /* Default align_* from the processor table. */
2089 if (align_loops == 0)
2091 align_loops = processor_target_table[ix86_tune].align_loop;
2092 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2094 if (align_jumps == 0)
2096 align_jumps = processor_target_table[ix86_tune].align_jump;
2097 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2099 if (align_functions == 0)
2101 align_functions = processor_target_table[ix86_tune].align_func;
2104 /* Validate -mbranch-cost= value, or provide default. */
2105 ix86_branch_cost = ix86_cost->branch_cost;
2106 if (ix86_branch_cost_string)
2108 i = atoi (ix86_branch_cost_string);
2109 if (i < 0 || i > 5)
2110 error ("-mbranch-cost=%d is not between 0 and 5", i);
2111 else
2112 ix86_branch_cost = i;
2114 if (ix86_section_threshold_string)
2116 i = atoi (ix86_section_threshold_string);
2117 if (i < 0)
2118 error ("-mlarge-data-threshold=%d is negative", i);
2119 else
2120 ix86_section_threshold = i;
2123 if (ix86_tls_dialect_string)
2125 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2126 ix86_tls_dialect = TLS_DIALECT_GNU;
2127 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2128 ix86_tls_dialect = TLS_DIALECT_GNU2;
2129 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2130 ix86_tls_dialect = TLS_DIALECT_SUN;
2131 else
2132 error ("bad value (%s) for -mtls-dialect= switch",
2133 ix86_tls_dialect_string);
2136 if (ix87_precision_string)
2138 i = atoi (ix87_precision_string);
2139 if (i != 32 && i != 64 && i != 80)
2140 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
2143 /* Keep nonleaf frame pointers. */
2144 if (flag_omit_frame_pointer)
2145 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2146 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2147 flag_omit_frame_pointer = 1;
2149 /* If we're doing fast math, we don't care about comparison order
2150 wrt NaNs. This lets us use a shorter comparison sequence. */
2151 if (flag_finite_math_only)
2152 target_flags &= ~MASK_IEEE_FP;
2154 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2155 since the insns won't need emulation. */
2156 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
2157 target_flags &= ~MASK_NO_FANCY_MATH_387;
2159 /* Likewise, if the target doesn't have a 387, or we've specified
2160 software floating point, don't use 387 inline intrinsics. */
2161 if (!TARGET_80387)
2162 target_flags |= MASK_NO_FANCY_MATH_387;
2164 /* Turn on SSSE3 builtins for -msse4.1. */
2165 if (TARGET_SSE4_1)
2166 target_flags |= MASK_SSSE3;
2168 /* Turn on SSE3 builtins for -mssse3. */
2169 if (TARGET_SSSE3)
2170 target_flags |= MASK_SSE3;
2172 /* Turn on SSE3 builtins for -msse4a. */
2173 if (TARGET_SSE4A)
2174 target_flags |= MASK_SSE3;
2176 /* Turn on SSE2 builtins for -msse3. */
2177 if (TARGET_SSE3)
2178 target_flags |= MASK_SSE2;
2180 /* Turn on SSE builtins for -msse2. */
2181 if (TARGET_SSE2)
2182 target_flags |= MASK_SSE;
2184 /* Turn on MMX builtins for -msse. */
2185 if (TARGET_SSE)
2187 target_flags |= MASK_MMX & ~target_flags_explicit;
2188 x86_prefetch_sse = true;
2191 /* Turn on MMX builtins for 3Dnow. */
2192 if (TARGET_3DNOW)
2193 target_flags |= MASK_MMX;
2195 /* Turn on POPCNT builtins for -mabm. */
2196 if (TARGET_ABM)
2197 target_flags |= MASK_POPCNT;
2199 if (TARGET_64BIT)
2201 if (TARGET_RTD)
2202 warning (0, "-mrtd is ignored in 64bit mode");
2204 /* Enable by default the SSE and MMX builtins. Do allow the user to
2205 explicitly disable any of these. In particular, disabling SSE and
2206 MMX for kernel code is extremely useful. */
2207 target_flags
2208 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | TARGET_SUBTARGET64_DEFAULT)
2209 & ~target_flags_explicit);
2211 else
2213 /* i386 ABI does not specify red zone. It still makes sense to use it
2214 when programmer takes care to stack from being destroyed. */
2215 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2216 target_flags |= MASK_NO_RED_ZONE;
2219 /* Validate -mpreferred-stack-boundary= value, or provide default.
2220 The default of 128 bits is for Pentium III's SSE __m128. We can't
2221 change it because of optimize_size. Otherwise, we can't mix object
2222 files compiled with -Os and -On. */
2223 ix86_preferred_stack_boundary = 128;
2224 if (ix86_preferred_stack_boundary_string)
2226 i = atoi (ix86_preferred_stack_boundary_string);
2227 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2228 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2229 TARGET_64BIT ? 4 : 2);
2230 else
2231 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2234 /* Accept -msseregparm only if at least SSE support is enabled. */
2235 if (TARGET_SSEREGPARM
2236 && ! TARGET_SSE)
2237 error ("-msseregparm used without SSE enabled");
2239 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2240 if (ix86_fpmath_string != 0)
2242 if (! strcmp (ix86_fpmath_string, "387"))
2243 ix86_fpmath = FPMATH_387;
2244 else if (! strcmp (ix86_fpmath_string, "sse"))
2246 if (!TARGET_SSE)
2248 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2249 ix86_fpmath = FPMATH_387;
2251 else
2252 ix86_fpmath = FPMATH_SSE;
2254 else if (! strcmp (ix86_fpmath_string, "387,sse")
2255 || ! strcmp (ix86_fpmath_string, "sse,387"))
2257 if (!TARGET_SSE)
2259 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2260 ix86_fpmath = FPMATH_387;
2262 else if (!TARGET_80387)
2264 warning (0, "387 instruction set disabled, using SSE arithmetics");
2265 ix86_fpmath = FPMATH_SSE;
2267 else
2268 ix86_fpmath = FPMATH_SSE | FPMATH_387;
2270 else
2271 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2274 /* If the i387 is disabled, then do not return values in it. */
2275 if (!TARGET_80387)
2276 target_flags &= ~MASK_FLOAT_RETURNS;
2278 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
2279 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2280 && !optimize_size)
2281 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2283 /* ??? Unwind info is not correct around the CFG unless either a frame
2284 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2285 unwind info generation to be aware of the CFG and propagating states
2286 around edges. */
2287 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2288 || flag_exceptions || flag_non_call_exceptions)
2289 && flag_omit_frame_pointer
2290 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2292 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2293 warning (0, "unwind tables currently require either a frame pointer "
2294 "or -maccumulate-outgoing-args for correctness");
2295 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2298 /* For sane SSE instruction set generation we need fcomi instruction.
2299 It is safe to enable all CMOVE instructions. */
2300 if (TARGET_SSE)
2301 TARGET_CMOVE = 1;
2303 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2305 char *p;
2306 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2307 p = strchr (internal_label_prefix, 'X');
2308 internal_label_prefix_len = p - internal_label_prefix;
2309 *p = '\0';
2312 /* When scheduling description is not available, disable scheduler pass
2313 so it won't slow down the compilation and make x87 code slower. */
2314 if (!TARGET_SCHEDULE)
2315 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2317 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2318 set_param_value ("simultaneous-prefetches",
2319 ix86_cost->simultaneous_prefetches);
2320 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2321 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2324 /* Return true if this goes in large data/bss. */
2326 static bool
2327 ix86_in_large_data_p (tree exp)
2329 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
2330 return false;
2332 /* Functions are never large data. */
2333 if (TREE_CODE (exp) == FUNCTION_DECL)
2334 return false;
2336 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
2338 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
2339 if (strcmp (section, ".ldata") == 0
2340 || strcmp (section, ".lbss") == 0)
2341 return true;
2342 return false;
2344 else
2346 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
2348 /* If this is an incomplete type with size 0, then we can't put it
2349 in data because it might be too big when completed. */
2350 if (!size || size > ix86_section_threshold)
2351 return true;
2354 return false;
2357 /* Switch to the appropriate section for output of DECL.
2358 DECL is either a `VAR_DECL' node or a constant of some sort.
2359 RELOC indicates whether forming the initial value of DECL requires
2360 link-time relocations. */
2362 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
2363 ATTRIBUTE_UNUSED;
2365 static section *
2366 x86_64_elf_select_section (tree decl, int reloc,
2367 unsigned HOST_WIDE_INT align)
2369 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2370 && ix86_in_large_data_p (decl))
2372 const char *sname = NULL;
2373 unsigned int flags = SECTION_WRITE;
2374 switch (categorize_decl_for_section (decl, reloc))
2376 case SECCAT_DATA:
2377 sname = ".ldata";
2378 break;
2379 case SECCAT_DATA_REL:
2380 sname = ".ldata.rel";
2381 break;
2382 case SECCAT_DATA_REL_LOCAL:
2383 sname = ".ldata.rel.local";
2384 break;
2385 case SECCAT_DATA_REL_RO:
2386 sname = ".ldata.rel.ro";
2387 break;
2388 case SECCAT_DATA_REL_RO_LOCAL:
2389 sname = ".ldata.rel.ro.local";
2390 break;
2391 case SECCAT_BSS:
2392 sname = ".lbss";
2393 flags |= SECTION_BSS;
2394 break;
2395 case SECCAT_RODATA:
2396 case SECCAT_RODATA_MERGE_STR:
2397 case SECCAT_RODATA_MERGE_STR_INIT:
2398 case SECCAT_RODATA_MERGE_CONST:
2399 sname = ".lrodata";
2400 flags = 0;
2401 break;
2402 case SECCAT_SRODATA:
2403 case SECCAT_SDATA:
2404 case SECCAT_SBSS:
2405 gcc_unreachable ();
2406 case SECCAT_TEXT:
2407 case SECCAT_TDATA:
2408 case SECCAT_TBSS:
2409 /* We don't split these for medium model. Place them into
2410 default sections and hope for best. */
2411 break;
2413 if (sname)
2415 /* We might get called with string constants, but get_named_section
2416 doesn't like them as they are not DECLs. Also, we need to set
2417 flags in that case. */
2418 if (!DECL_P (decl))
2419 return get_section (sname, flags, NULL);
2420 return get_named_section (decl, sname, reloc);
2423 return default_elf_select_section (decl, reloc, align);
2426 /* Build up a unique section name, expressed as a
2427 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2428 RELOC indicates whether the initial value of EXP requires
2429 link-time relocations. */
2431 static void ATTRIBUTE_UNUSED
2432 x86_64_elf_unique_section (tree decl, int reloc)
2434 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2435 && ix86_in_large_data_p (decl))
2437 const char *prefix = NULL;
2438 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2439 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2441 switch (categorize_decl_for_section (decl, reloc))
2443 case SECCAT_DATA:
2444 case SECCAT_DATA_REL:
2445 case SECCAT_DATA_REL_LOCAL:
2446 case SECCAT_DATA_REL_RO:
2447 case SECCAT_DATA_REL_RO_LOCAL:
2448 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2449 break;
2450 case SECCAT_BSS:
2451 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2452 break;
2453 case SECCAT_RODATA:
2454 case SECCAT_RODATA_MERGE_STR:
2455 case SECCAT_RODATA_MERGE_STR_INIT:
2456 case SECCAT_RODATA_MERGE_CONST:
2457 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2458 break;
2459 case SECCAT_SRODATA:
2460 case SECCAT_SDATA:
2461 case SECCAT_SBSS:
2462 gcc_unreachable ();
2463 case SECCAT_TEXT:
2464 case SECCAT_TDATA:
2465 case SECCAT_TBSS:
2466 /* We don't split these for medium model. Place them into
2467 default sections and hope for best. */
2468 break;
2470 if (prefix)
2472 const char *name;
2473 size_t nlen, plen;
2474 char *string;
2475 plen = strlen (prefix);
2477 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2478 name = targetm.strip_name_encoding (name);
2479 nlen = strlen (name);
2481 string = alloca (nlen + plen + 1);
2482 memcpy (string, prefix, plen);
2483 memcpy (string + plen, name, nlen + 1);
2485 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2486 return;
2489 default_unique_section (decl, reloc);
2492 #ifdef COMMON_ASM_OP
2493 /* This says how to output assembler code to declare an
2494 uninitialized external linkage data object.
2496 For medium model x86-64 we need to use .largecomm opcode for
2497 large objects. */
2498 void
2499 x86_elf_aligned_common (FILE *file,
2500 const char *name, unsigned HOST_WIDE_INT size,
2501 int align)
2503 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2504 && size > (unsigned int)ix86_section_threshold)
2505 fprintf (file, ".largecomm\t");
2506 else
2507 fprintf (file, "%s", COMMON_ASM_OP);
2508 assemble_name (file, name);
2509 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2510 size, align / BITS_PER_UNIT);
2512 #endif
2514 /* Utility function for targets to use in implementing
2515 ASM_OUTPUT_ALIGNED_BSS. */
2517 void
2518 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2519 const char *name, unsigned HOST_WIDE_INT size,
2520 int align)
2522 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2523 && size > (unsigned int)ix86_section_threshold)
2524 switch_to_section (get_named_section (decl, ".lbss", 0));
2525 else
2526 switch_to_section (bss_section);
2527 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2528 #ifdef ASM_DECLARE_OBJECT_NAME
2529 last_assemble_variable_decl = decl;
2530 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2531 #else
2532 /* Standard thing is just output label for the object. */
2533 ASM_OUTPUT_LABEL (file, name);
2534 #endif /* ASM_DECLARE_OBJECT_NAME */
2535 ASM_OUTPUT_SKIP (file, size ? size : 1);
2538 void
2539 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2541 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2542 make the problem with not enough registers even worse. */
2543 #ifdef INSN_SCHEDULING
2544 if (level > 1)
2545 flag_schedule_insns = 0;
2546 #endif
2548 if (TARGET_MACHO)
2549 /* The Darwin libraries never set errno, so we might as well
2550 avoid calling them when that's the only reason we would. */
2551 flag_errno_math = 0;
2553 /* The default values of these switches depend on the TARGET_64BIT
2554 that is not known at this moment. Mark these values with 2 and
2555 let user the to override these. In case there is no command line option
2556 specifying them, we will set the defaults in override_options. */
2557 if (optimize >= 1)
2558 flag_omit_frame_pointer = 2;
2559 flag_pcc_struct_return = 2;
2560 flag_asynchronous_unwind_tables = 2;
2561 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2562 SUBTARGET_OPTIMIZATION_OPTIONS;
2563 #endif
2566 /* Decide whether we can make a sibling call to a function. DECL is the
2567 declaration of the function being targeted by the call and EXP is the
2568 CALL_EXPR representing the call. */
2570 static bool
2571 ix86_function_ok_for_sibcall (tree decl, tree exp)
2573 tree func;
2574 rtx a, b;
2576 /* If we are generating position-independent code, we cannot sibcall
2577 optimize any indirect call, or a direct call to a global function,
2578 as the PLT requires %ebx be live. */
2579 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2580 return false;
2582 if (decl)
2583 func = decl;
2584 else
2586 func = TREE_TYPE (CALL_EXPR_FN (exp));
2587 if (POINTER_TYPE_P (func))
2588 func = TREE_TYPE (func);
2591 /* Check that the return value locations are the same. Like
2592 if we are returning floats on the 80387 register stack, we cannot
2593 make a sibcall from a function that doesn't return a float to a
2594 function that does or, conversely, from a function that does return
2595 a float to a function that doesn't; the necessary stack adjustment
2596 would not be executed. This is also the place we notice
2597 differences in the return value ABI. Note that it is ok for one
2598 of the functions to have void return type as long as the return
2599 value of the other is passed in a register. */
2600 a = ix86_function_value (TREE_TYPE (exp), func, false);
2601 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2602 cfun->decl, false);
2603 if (STACK_REG_P (a) || STACK_REG_P (b))
2605 if (!rtx_equal_p (a, b))
2606 return false;
2608 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2610 else if (!rtx_equal_p (a, b))
2611 return false;
2613 /* If this call is indirect, we'll need to be able to use a call-clobbered
2614 register for the address of the target function. Make sure that all
2615 such registers are not used for passing parameters. */
2616 if (!decl && !TARGET_64BIT)
2618 tree type;
2620 /* We're looking at the CALL_EXPR, we need the type of the function. */
2621 type = CALL_EXPR_FN (exp); /* pointer expression */
2622 type = TREE_TYPE (type); /* pointer type */
2623 type = TREE_TYPE (type); /* function type */
2625 if (ix86_function_regparm (type, NULL) >= 3)
2627 /* ??? Need to count the actual number of registers to be used,
2628 not the possible number of registers. Fix later. */
2629 return false;
2633 /* Dllimport'd functions are also called indirectly. */
2634 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
2635 && decl && DECL_DLLIMPORT_P (decl)
2636 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2637 return false;
2639 /* If we forced aligned the stack, then sibcalling would unalign the
2640 stack, which may break the called function. */
2641 if (cfun->machine->force_align_arg_pointer)
2642 return false;
2644 /* Otherwise okay. That also includes certain types of indirect calls. */
2645 return true;
2648 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2649 calling convention attributes;
2650 arguments as in struct attribute_spec.handler. */
2652 static tree
2653 ix86_handle_cconv_attribute (tree *node, tree name,
2654 tree args,
2655 int flags ATTRIBUTE_UNUSED,
2656 bool *no_add_attrs)
2658 if (TREE_CODE (*node) != FUNCTION_TYPE
2659 && TREE_CODE (*node) != METHOD_TYPE
2660 && TREE_CODE (*node) != FIELD_DECL
2661 && TREE_CODE (*node) != TYPE_DECL)
2663 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2664 IDENTIFIER_POINTER (name));
2665 *no_add_attrs = true;
2666 return NULL_TREE;
2669 /* Can combine regparm with all attributes but fastcall. */
2670 if (is_attribute_p ("regparm", name))
2672 tree cst;
2674 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2676 error ("fastcall and regparm attributes are not compatible");
2679 cst = TREE_VALUE (args);
2680 if (TREE_CODE (cst) != INTEGER_CST)
2682 warning (OPT_Wattributes,
2683 "%qs attribute requires an integer constant argument",
2684 IDENTIFIER_POINTER (name));
2685 *no_add_attrs = true;
2687 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2689 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2690 IDENTIFIER_POINTER (name), REGPARM_MAX);
2691 *no_add_attrs = true;
2694 if (!TARGET_64BIT
2695 && lookup_attribute (ix86_force_align_arg_pointer_string,
2696 TYPE_ATTRIBUTES (*node))
2697 && compare_tree_int (cst, REGPARM_MAX-1))
2699 error ("%s functions limited to %d register parameters",
2700 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2703 return NULL_TREE;
2706 if (TARGET_64BIT)
2708 /* Do not warn when emulating the MS ABI. */
2709 if (!TARGET_64BIT_MS_ABI)
2710 warning (OPT_Wattributes, "%qs attribute ignored",
2711 IDENTIFIER_POINTER (name));
2712 *no_add_attrs = true;
2713 return NULL_TREE;
2716 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2717 if (is_attribute_p ("fastcall", name))
2719 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2721 error ("fastcall and cdecl attributes are not compatible");
2723 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2725 error ("fastcall and stdcall attributes are not compatible");
2727 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2729 error ("fastcall and regparm attributes are not compatible");
2733 /* Can combine stdcall with fastcall (redundant), regparm and
2734 sseregparm. */
2735 else if (is_attribute_p ("stdcall", name))
2737 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2739 error ("stdcall and cdecl attributes are not compatible");
2741 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2743 error ("stdcall and fastcall attributes are not compatible");
2747 /* Can combine cdecl with regparm and sseregparm. */
2748 else if (is_attribute_p ("cdecl", name))
2750 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2752 error ("stdcall and cdecl attributes are not compatible");
2754 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2756 error ("fastcall and cdecl attributes are not compatible");
2760 /* Can combine sseregparm with all attributes. */
2762 return NULL_TREE;
2765 /* Return 0 if the attributes for two types are incompatible, 1 if they
2766 are compatible, and 2 if they are nearly compatible (which causes a
2767 warning to be generated). */
2769 static int
2770 ix86_comp_type_attributes (tree type1, tree type2)
2772 /* Check for mismatch of non-default calling convention. */
2773 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2775 if (TREE_CODE (type1) != FUNCTION_TYPE)
2776 return 1;
2778 /* Check for mismatched fastcall/regparm types. */
2779 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2780 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2781 || (ix86_function_regparm (type1, NULL)
2782 != ix86_function_regparm (type2, NULL)))
2783 return 0;
2785 /* Check for mismatched sseregparm types. */
2786 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2787 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2788 return 0;
2790 /* Check for mismatched return types (cdecl vs stdcall). */
2791 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2792 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2793 return 0;
2795 return 1;
2798 /* Return the regparm value for a function with the indicated TYPE and DECL.
2799 DECL may be NULL when calling function indirectly
2800 or considering a libcall. */
2802 static int
2803 ix86_function_regparm (tree type, tree decl)
2805 tree attr;
2806 int regparm = ix86_regparm;
2808 if (TARGET_64BIT)
2809 return regparm;
2811 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2812 if (attr)
2813 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2815 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2816 return 2;
2818 /* Use register calling convention for local functions when possible. */
2819 if (decl && TREE_CODE (decl) == FUNCTION_DECL
2820 && flag_unit_at_a_time && !profile_flag)
2822 struct cgraph_local_info *i = cgraph_local_info (decl);
2823 if (i && i->local)
2825 int local_regparm, globals = 0, regno;
2826 struct function *f;
2828 /* Make sure no regparm register is taken by a
2829 global register variable. */
2830 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2831 if (global_regs[local_regparm])
2832 break;
2834 /* We can't use regparm(3) for nested functions as these use
2835 static chain pointer in third argument. */
2836 if (local_regparm == 3
2837 && decl_function_context (decl)
2838 && !DECL_NO_STATIC_CHAIN (decl))
2839 local_regparm = 2;
2841 /* If the function realigns its stackpointer, the prologue will
2842 clobber %ecx. If we've already generated code for the callee,
2843 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
2844 scanning the attributes for the self-realigning property. */
2845 f = DECL_STRUCT_FUNCTION (decl);
2846 if (local_regparm == 3
2847 && (f ? !!f->machine->force_align_arg_pointer
2848 : !!lookup_attribute (ix86_force_align_arg_pointer_string,
2849 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2850 local_regparm = 2;
2852 /* Each global register variable increases register preassure,
2853 so the more global reg vars there are, the smaller regparm
2854 optimization use, unless requested by the user explicitly. */
2855 for (regno = 0; regno < 6; regno++)
2856 if (global_regs[regno])
2857 globals++;
2858 local_regparm
2859 = globals < local_regparm ? local_regparm - globals : 0;
2861 if (local_regparm > regparm)
2862 regparm = local_regparm;
2866 return regparm;
2869 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2870 DFmode (2) arguments in SSE registers for a function with the
2871 indicated TYPE and DECL. DECL may be NULL when calling function
2872 indirectly or considering a libcall. Otherwise return 0. */
2874 static int
2875 ix86_function_sseregparm (tree type, tree decl)
2877 gcc_assert (!TARGET_64BIT);
2879 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2880 by the sseregparm attribute. */
2881 if (TARGET_SSEREGPARM
2882 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2884 if (!TARGET_SSE)
2886 if (decl)
2887 error ("Calling %qD with attribute sseregparm without "
2888 "SSE/SSE2 enabled", decl);
2889 else
2890 error ("Calling %qT with attribute sseregparm without "
2891 "SSE/SSE2 enabled", type);
2892 return 0;
2895 return 2;
2898 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
2899 (and DFmode for SSE2) arguments in SSE registers. */
2900 if (decl && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2902 struct cgraph_local_info *i = cgraph_local_info (decl);
2903 if (i && i->local)
2904 return TARGET_SSE2 ? 2 : 1;
2907 return 0;
2910 /* Return true if EAX is live at the start of the function. Used by
2911 ix86_expand_prologue to determine if we need special help before
2912 calling allocate_stack_worker. */
2914 static bool
2915 ix86_eax_live_at_start_p (void)
2917 /* Cheat. Don't bother working forward from ix86_function_regparm
2918 to the function type to whether an actual argument is located in
2919 eax. Instead just look at cfg info, which is still close enough
2920 to correct at this point. This gives false positives for broken
2921 functions that might use uninitialized data that happens to be
2922 allocated in eax, but who cares? */
2923 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2926 /* Return true if TYPE has a variable argument list. */
2928 static bool
2929 type_has_variadic_args_p (tree type)
2931 tree n, t = TYPE_ARG_TYPES (type);
2933 if (t == NULL)
2934 return false;
2936 while ((n = TREE_CHAIN (t)) != NULL)
2937 t = n;
2939 return TREE_VALUE (t) != void_type_node;
2942 /* Value is the number of bytes of arguments automatically
2943 popped when returning from a subroutine call.
2944 FUNDECL is the declaration node of the function (as a tree),
2945 FUNTYPE is the data type of the function (as a tree),
2946 or for a library call it is an identifier node for the subroutine name.
2947 SIZE is the number of bytes of arguments passed on the stack.
2949 On the 80386, the RTD insn may be used to pop them if the number
2950 of args is fixed, but if the number is variable then the caller
2951 must pop them all. RTD can't be used for library calls now
2952 because the library is compiled with the Unix compiler.
2953 Use of RTD is a selectable option, since it is incompatible with
2954 standard Unix calling sequences. If the option is not selected,
2955 the caller must always pop the args.
2957 The attribute stdcall is equivalent to RTD on a per module basis. */
2960 ix86_return_pops_args (tree fundecl, tree funtype, int size)
2962 int rtd;
2964 /* None of the 64-bit ABIs pop arguments. */
2965 if (TARGET_64BIT)
2966 return 0;
2968 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2970 /* Cdecl functions override -mrtd, and never pop the stack. */
2971 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
2973 /* Stdcall and fastcall functions will pop the stack if not
2974 variable args. */
2975 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2976 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2977 rtd = 1;
2979 if (rtd && ! type_has_variadic_args_p (funtype))
2980 return size;
2983 /* Lose any fake structure return argument if it is passed on the stack. */
2984 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2985 && !KEEP_AGGREGATE_RETURN_POINTER)
2987 int nregs = ix86_function_regparm (funtype, fundecl);
2988 if (nregs == 0)
2989 return GET_MODE_SIZE (Pmode);
2992 return 0;
2995 /* Argument support functions. */
2997 /* Return true when register may be used to pass function parameters. */
2998 bool
2999 ix86_function_arg_regno_p (int regno)
3001 int i;
3002 const int *parm_regs;
3004 if (!TARGET_64BIT)
3006 if (TARGET_MACHO)
3007 return (regno < REGPARM_MAX
3008 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
3009 else
3010 return (regno < REGPARM_MAX
3011 || (TARGET_MMX && MMX_REGNO_P (regno)
3012 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
3013 || (TARGET_SSE && SSE_REGNO_P (regno)
3014 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3017 if (TARGET_MACHO)
3019 if (SSE_REGNO_P (regno) && TARGET_SSE)
3020 return true;
3022 else
3024 if (TARGET_SSE && SSE_REGNO_P (regno)
3025 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3026 return true;
3029 /* RAX is used as hidden argument to va_arg functions. */
3030 if (!TARGET_64BIT_MS_ABI && regno == 0)
3031 return true;
3033 if (TARGET_64BIT_MS_ABI)
3034 parm_regs = x86_64_ms_abi_int_parameter_registers;
3035 else
3036 parm_regs = x86_64_int_parameter_registers;
3037 for (i = 0; i < REGPARM_MAX; i++)
3038 if (regno == parm_regs[i])
3039 return true;
3040 return false;
3043 /* Return if we do not know how to pass TYPE solely in registers. */
3045 static bool
3046 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
3048 if (must_pass_in_stack_var_size_or_pad (mode, type))
3049 return true;
3051 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3052 The layout_type routine is crafty and tries to trick us into passing
3053 currently unsupported vector types on the stack by using TImode. */
3054 return (!TARGET_64BIT && mode == TImode
3055 && type && TREE_CODE (type) != VECTOR_TYPE);
3058 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3059 for a call to a function whose data type is FNTYPE.
3060 For a library call, FNTYPE is 0. */
3062 void
3063 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3064 tree fntype, /* tree ptr for function decl */
3065 rtx libname, /* SYMBOL_REF of library name or 0 */
3066 tree fndecl)
3068 memset (cum, 0, sizeof (*cum));
3070 /* Set up the number of registers to use for passing arguments. */
3071 cum->nregs = ix86_regparm;
3072 if (TARGET_SSE)
3073 cum->sse_nregs = SSE_REGPARM_MAX;
3074 if (TARGET_MMX)
3075 cum->mmx_nregs = MMX_REGPARM_MAX;
3076 cum->warn_sse = true;
3077 cum->warn_mmx = true;
3078 cum->maybe_vaarg = (fntype
3079 ? (!TYPE_ARG_TYPES (fntype)
3080 || type_has_variadic_args_p (fntype))
3081 : !libname);
3083 if (!TARGET_64BIT)
3085 /* If there are variable arguments, then we won't pass anything
3086 in registers in 32-bit mode. */
3087 if (cum->maybe_vaarg)
3089 cum->nregs = 0;
3090 cum->sse_nregs = 0;
3091 cum->mmx_nregs = 0;
3092 cum->warn_sse = 0;
3093 cum->warn_mmx = 0;
3094 return;
3097 /* Use ecx and edx registers if function has fastcall attribute,
3098 else look for regparm information. */
3099 if (fntype)
3101 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3103 cum->nregs = 2;
3104 cum->fastcall = 1;
3106 else
3107 cum->nregs = ix86_function_regparm (fntype, fndecl);
3110 /* Set up the number of SSE registers used for passing SFmode
3111 and DFmode arguments. Warn for mismatching ABI. */
3112 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
3116 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3117 But in the case of vector types, it is some vector mode.
3119 When we have only some of our vector isa extensions enabled, then there
3120 are some modes for which vector_mode_supported_p is false. For these
3121 modes, the generic vector support in gcc will choose some non-vector mode
3122 in order to implement the type. By computing the natural mode, we'll
3123 select the proper ABI location for the operand and not depend on whatever
3124 the middle-end decides to do with these vector types. */
3126 static enum machine_mode
3127 type_natural_mode (tree type)
3129 enum machine_mode mode = TYPE_MODE (type);
3131 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3133 HOST_WIDE_INT size = int_size_in_bytes (type);
3134 if ((size == 8 || size == 16)
3135 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3136 && TYPE_VECTOR_SUBPARTS (type) > 1)
3138 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3140 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3141 mode = MIN_MODE_VECTOR_FLOAT;
3142 else
3143 mode = MIN_MODE_VECTOR_INT;
3145 /* Get the mode which has this inner mode and number of units. */
3146 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3147 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3148 && GET_MODE_INNER (mode) == innermode)
3149 return mode;
3151 gcc_unreachable ();
3155 return mode;
3158 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3159 this may not agree with the mode that the type system has chosen for the
3160 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3161 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3163 static rtx
3164 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3165 unsigned int regno)
3167 rtx tmp;
3169 if (orig_mode != BLKmode)
3170 tmp = gen_rtx_REG (orig_mode, regno);
3171 else
3173 tmp = gen_rtx_REG (mode, regno);
3174 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3175 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3178 return tmp;
3181 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3182 of this code is to classify each 8bytes of incoming argument by the register
3183 class and assign registers accordingly. */
3185 /* Return the union class of CLASS1 and CLASS2.
3186 See the x86-64 PS ABI for details. */
3188 static enum x86_64_reg_class
3189 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3191 /* Rule #1: If both classes are equal, this is the resulting class. */
3192 if (class1 == class2)
3193 return class1;
3195 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3196 the other class. */
3197 if (class1 == X86_64_NO_CLASS)
3198 return class2;
3199 if (class2 == X86_64_NO_CLASS)
3200 return class1;
3202 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3203 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3204 return X86_64_MEMORY_CLASS;
3206 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3207 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3208 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3209 return X86_64_INTEGERSI_CLASS;
3210 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3211 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3212 return X86_64_INTEGER_CLASS;
3214 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3215 MEMORY is used. */
3216 if (class1 == X86_64_X87_CLASS
3217 || class1 == X86_64_X87UP_CLASS
3218 || class1 == X86_64_COMPLEX_X87_CLASS
3219 || class2 == X86_64_X87_CLASS
3220 || class2 == X86_64_X87UP_CLASS
3221 || class2 == X86_64_COMPLEX_X87_CLASS)
3222 return X86_64_MEMORY_CLASS;
3224 /* Rule #6: Otherwise class SSE is used. */
3225 return X86_64_SSE_CLASS;
3228 /* Classify the argument of type TYPE and mode MODE.
3229 CLASSES will be filled by the register class used to pass each word
3230 of the operand. The number of words is returned. In case the parameter
3231 should be passed in memory, 0 is returned. As a special case for zero
3232 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3234 BIT_OFFSET is used internally for handling records and specifies offset
3235 of the offset in bits modulo 256 to avoid overflow cases.
3237 See the x86-64 PS ABI for details.
3240 static int
3241 classify_argument (enum machine_mode mode, tree type,
3242 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3244 HOST_WIDE_INT bytes =
3245 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3246 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3248 /* Variable sized entities are always passed/returned in memory. */
3249 if (bytes < 0)
3250 return 0;
3252 if (mode != VOIDmode
3253 && targetm.calls.must_pass_in_stack (mode, type))
3254 return 0;
3256 if (type && AGGREGATE_TYPE_P (type))
3258 int i;
3259 tree field;
3260 enum x86_64_reg_class subclasses[MAX_CLASSES];
3262 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3263 if (bytes > 16)
3264 return 0;
3266 for (i = 0; i < words; i++)
3267 classes[i] = X86_64_NO_CLASS;
3269 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3270 signalize memory class, so handle it as special case. */
3271 if (!words)
3273 classes[0] = X86_64_NO_CLASS;
3274 return 1;
3277 /* Classify each field of record and merge classes. */
3278 switch (TREE_CODE (type))
3280 case RECORD_TYPE:
3281 /* And now merge the fields of structure. */
3282 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3284 if (TREE_CODE (field) == FIELD_DECL)
3286 int num;
3288 if (TREE_TYPE (field) == error_mark_node)
3289 continue;
3291 /* Bitfields are always classified as integer. Handle them
3292 early, since later code would consider them to be
3293 misaligned integers. */
3294 if (DECL_BIT_FIELD (field))
3296 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3297 i < ((int_bit_position (field) + (bit_offset % 64))
3298 + tree_low_cst (DECL_SIZE (field), 0)
3299 + 63) / 8 / 8; i++)
3300 classes[i] =
3301 merge_classes (X86_64_INTEGER_CLASS,
3302 classes[i]);
3304 else
3306 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3307 TREE_TYPE (field), subclasses,
3308 (int_bit_position (field)
3309 + bit_offset) % 256);
3310 if (!num)
3311 return 0;
3312 for (i = 0; i < num; i++)
3314 int pos =
3315 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3316 classes[i + pos] =
3317 merge_classes (subclasses[i], classes[i + pos]);
3322 break;
3324 case ARRAY_TYPE:
3325 /* Arrays are handled as small records. */
3327 int num;
3328 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3329 TREE_TYPE (type), subclasses, bit_offset);
3330 if (!num)
3331 return 0;
3333 /* The partial classes are now full classes. */
3334 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3335 subclasses[0] = X86_64_SSE_CLASS;
3336 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3337 subclasses[0] = X86_64_INTEGER_CLASS;
3339 for (i = 0; i < words; i++)
3340 classes[i] = subclasses[i % num];
3342 break;
3344 case UNION_TYPE:
3345 case QUAL_UNION_TYPE:
3346 /* Unions are similar to RECORD_TYPE but offset is always 0.
3348 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3350 if (TREE_CODE (field) == FIELD_DECL)
3352 int num;
3354 if (TREE_TYPE (field) == error_mark_node)
3355 continue;
3357 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3358 TREE_TYPE (field), subclasses,
3359 bit_offset);
3360 if (!num)
3361 return 0;
3362 for (i = 0; i < num; i++)
3363 classes[i] = merge_classes (subclasses[i], classes[i]);
3366 break;
3368 default:
3369 gcc_unreachable ();
3372 /* Final merger cleanup. */
3373 for (i = 0; i < words; i++)
3375 /* If one class is MEMORY, everything should be passed in
3376 memory. */
3377 if (classes[i] == X86_64_MEMORY_CLASS)
3378 return 0;
3380 /* The X86_64_SSEUP_CLASS should be always preceded by
3381 X86_64_SSE_CLASS. */
3382 if (classes[i] == X86_64_SSEUP_CLASS
3383 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3384 classes[i] = X86_64_SSE_CLASS;
3386 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3387 if (classes[i] == X86_64_X87UP_CLASS
3388 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3389 classes[i] = X86_64_SSE_CLASS;
3391 return words;
3394 /* Compute alignment needed. We align all types to natural boundaries with
3395 exception of XFmode that is aligned to 64bits. */
3396 if (mode != VOIDmode && mode != BLKmode)
3398 int mode_alignment = GET_MODE_BITSIZE (mode);
3400 if (mode == XFmode)
3401 mode_alignment = 128;
3402 else if (mode == XCmode)
3403 mode_alignment = 256;
3404 if (COMPLEX_MODE_P (mode))
3405 mode_alignment /= 2;
3406 /* Misaligned fields are always returned in memory. */
3407 if (bit_offset % mode_alignment)
3408 return 0;
3411 /* for V1xx modes, just use the base mode */
3412 if (VECTOR_MODE_P (mode)
3413 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3414 mode = GET_MODE_INNER (mode);
3416 /* Classification of atomic types. */
3417 switch (mode)
3419 case SDmode:
3420 case DDmode:
3421 classes[0] = X86_64_SSE_CLASS;
3422 return 1;
3423 case TDmode:
3424 classes[0] = X86_64_SSE_CLASS;
3425 classes[1] = X86_64_SSEUP_CLASS;
3426 return 2;
3427 case DImode:
3428 case SImode:
3429 case HImode:
3430 case QImode:
3431 case CSImode:
3432 case CHImode:
3433 case CQImode:
3434 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3435 classes[0] = X86_64_INTEGERSI_CLASS;
3436 else
3437 classes[0] = X86_64_INTEGER_CLASS;
3438 return 1;
3439 case CDImode:
3440 case TImode:
3441 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3442 return 2;
3443 case CTImode:
3444 return 0;
3445 case SFmode:
3446 if (!(bit_offset % 64))
3447 classes[0] = X86_64_SSESF_CLASS;
3448 else
3449 classes[0] = X86_64_SSE_CLASS;
3450 return 1;
3451 case DFmode:
3452 classes[0] = X86_64_SSEDF_CLASS;
3453 return 1;
3454 case XFmode:
3455 classes[0] = X86_64_X87_CLASS;
3456 classes[1] = X86_64_X87UP_CLASS;
3457 return 2;
3458 case TFmode:
3459 classes[0] = X86_64_SSE_CLASS;
3460 classes[1] = X86_64_SSEUP_CLASS;
3461 return 2;
3462 case SCmode:
3463 classes[0] = X86_64_SSE_CLASS;
3464 return 1;
3465 case DCmode:
3466 classes[0] = X86_64_SSEDF_CLASS;
3467 classes[1] = X86_64_SSEDF_CLASS;
3468 return 2;
3469 case XCmode:
3470 classes[0] = X86_64_COMPLEX_X87_CLASS;
3471 return 1;
3472 case TCmode:
3473 /* This modes is larger than 16 bytes. */
3474 return 0;
3475 case V4SFmode:
3476 case V4SImode:
3477 case V16QImode:
3478 case V8HImode:
3479 case V2DFmode:
3480 case V2DImode:
3481 classes[0] = X86_64_SSE_CLASS;
3482 classes[1] = X86_64_SSEUP_CLASS;
3483 return 2;
3484 case V2SFmode:
3485 case V2SImode:
3486 case V4HImode:
3487 case V8QImode:
3488 classes[0] = X86_64_SSE_CLASS;
3489 return 1;
3490 case BLKmode:
3491 case VOIDmode:
3492 return 0;
3493 default:
3494 gcc_assert (VECTOR_MODE_P (mode));
3496 if (bytes > 16)
3497 return 0;
3499 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3501 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3502 classes[0] = X86_64_INTEGERSI_CLASS;
3503 else
3504 classes[0] = X86_64_INTEGER_CLASS;
3505 classes[1] = X86_64_INTEGER_CLASS;
3506 return 1 + (bytes > 8);
3510 /* Examine the argument and return set number of register required in each
3511 class. Return 0 iff parameter should be passed in memory. */
3512 static int
3513 examine_argument (enum machine_mode mode, tree type, int in_return,
3514 int *int_nregs, int *sse_nregs)
3516 enum x86_64_reg_class class[MAX_CLASSES];
3517 int n = classify_argument (mode, type, class, 0);
3519 *int_nregs = 0;
3520 *sse_nregs = 0;
3521 if (!n)
3522 return 0;
3523 for (n--; n >= 0; n--)
3524 switch (class[n])
3526 case X86_64_INTEGER_CLASS:
3527 case X86_64_INTEGERSI_CLASS:
3528 (*int_nregs)++;
3529 break;
3530 case X86_64_SSE_CLASS:
3531 case X86_64_SSESF_CLASS:
3532 case X86_64_SSEDF_CLASS:
3533 (*sse_nregs)++;
3534 break;
3535 case X86_64_NO_CLASS:
3536 case X86_64_SSEUP_CLASS:
3537 break;
3538 case X86_64_X87_CLASS:
3539 case X86_64_X87UP_CLASS:
3540 if (!in_return)
3541 return 0;
3542 break;
3543 case X86_64_COMPLEX_X87_CLASS:
3544 return in_return ? 2 : 0;
3545 case X86_64_MEMORY_CLASS:
3546 gcc_unreachable ();
3548 return 1;
3551 /* Construct container for the argument used by GCC interface. See
3552 FUNCTION_ARG for the detailed description. */
3554 static rtx
3555 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3556 tree type, int in_return, int nintregs, int nsseregs,
3557 const int *intreg, int sse_regno)
3559 /* The following variables hold the static issued_error state. */
3560 static bool issued_sse_arg_error;
3561 static bool issued_sse_ret_error;
3562 static bool issued_x87_ret_error;
3564 enum machine_mode tmpmode;
3565 int bytes =
3566 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3567 enum x86_64_reg_class class[MAX_CLASSES];
3568 int n;
3569 int i;
3570 int nexps = 0;
3571 int needed_sseregs, needed_intregs;
3572 rtx exp[MAX_CLASSES];
3573 rtx ret;
3575 n = classify_argument (mode, type, class, 0);
3576 if (!n)
3577 return NULL;
3578 if (!examine_argument (mode, type, in_return, &needed_intregs,
3579 &needed_sseregs))
3580 return NULL;
3581 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3582 return NULL;
3584 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3585 some less clueful developer tries to use floating-point anyway. */
3586 if (needed_sseregs && !TARGET_SSE)
3588 if (in_return)
3590 if (!issued_sse_ret_error)
3592 error ("SSE register return with SSE disabled");
3593 issued_sse_ret_error = true;
3596 else if (!issued_sse_arg_error)
3598 error ("SSE register argument with SSE disabled");
3599 issued_sse_arg_error = true;
3601 return NULL;
3604 /* Likewise, error if the ABI requires us to return values in the
3605 x87 registers and the user specified -mno-80387. */
3606 if (!TARGET_80387 && in_return)
3607 for (i = 0; i < n; i++)
3608 if (class[i] == X86_64_X87_CLASS
3609 || class[i] == X86_64_X87UP_CLASS
3610 || class[i] == X86_64_COMPLEX_X87_CLASS)
3612 if (!issued_x87_ret_error)
3614 error ("x87 register return with x87 disabled");
3615 issued_x87_ret_error = true;
3617 return NULL;
3620 /* First construct simple cases. Avoid SCmode, since we want to use
3621 single register to pass this type. */
3622 if (n == 1 && mode != SCmode)
3623 switch (class[0])
3625 case X86_64_INTEGER_CLASS:
3626 case X86_64_INTEGERSI_CLASS:
3627 return gen_rtx_REG (mode, intreg[0]);
3628 case X86_64_SSE_CLASS:
3629 case X86_64_SSESF_CLASS:
3630 case X86_64_SSEDF_CLASS:
3631 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3632 case X86_64_X87_CLASS:
3633 case X86_64_COMPLEX_X87_CLASS:
3634 return gen_rtx_REG (mode, FIRST_STACK_REG);
3635 case X86_64_NO_CLASS:
3636 /* Zero sized array, struct or class. */
3637 return NULL;
3638 default:
3639 gcc_unreachable ();
3641 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3642 && mode != BLKmode)
3643 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3645 if (n == 2
3646 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3647 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3648 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3649 && class[1] == X86_64_INTEGER_CLASS
3650 && (mode == CDImode || mode == TImode || mode == TFmode)
3651 && intreg[0] + 1 == intreg[1])
3652 return gen_rtx_REG (mode, intreg[0]);
3654 /* Otherwise figure out the entries of the PARALLEL. */
3655 for (i = 0; i < n; i++)
3657 switch (class[i])
3659 case X86_64_NO_CLASS:
3660 break;
3661 case X86_64_INTEGER_CLASS:
3662 case X86_64_INTEGERSI_CLASS:
3663 /* Merge TImodes on aligned occasions here too. */
3664 if (i * 8 + 8 > bytes)
3665 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3666 else if (class[i] == X86_64_INTEGERSI_CLASS)
3667 tmpmode = SImode;
3668 else
3669 tmpmode = DImode;
3670 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3671 if (tmpmode == BLKmode)
3672 tmpmode = DImode;
3673 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3674 gen_rtx_REG (tmpmode, *intreg),
3675 GEN_INT (i*8));
3676 intreg++;
3677 break;
3678 case X86_64_SSESF_CLASS:
3679 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3680 gen_rtx_REG (SFmode,
3681 SSE_REGNO (sse_regno)),
3682 GEN_INT (i*8));
3683 sse_regno++;
3684 break;
3685 case X86_64_SSEDF_CLASS:
3686 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3687 gen_rtx_REG (DFmode,
3688 SSE_REGNO (sse_regno)),
3689 GEN_INT (i*8));
3690 sse_regno++;
3691 break;
3692 case X86_64_SSE_CLASS:
3693 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3694 tmpmode = TImode;
3695 else
3696 tmpmode = DImode;
3697 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3698 gen_rtx_REG (tmpmode,
3699 SSE_REGNO (sse_regno)),
3700 GEN_INT (i*8));
3701 if (tmpmode == TImode)
3702 i++;
3703 sse_regno++;
3704 break;
3705 default:
3706 gcc_unreachable ();
3710 /* Empty aligned struct, union or class. */
3711 if (nexps == 0)
3712 return NULL;
3714 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3715 for (i = 0; i < nexps; i++)
3716 XVECEXP (ret, 0, i) = exp [i];
3717 return ret;
3720 /* Update the data in CUM to advance over an argument of mode MODE
3721 and data type TYPE. (TYPE is null for libcalls where that information
3722 may not be available.) */
3724 static void
3725 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3726 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
3728 switch (mode)
3730 default:
3731 break;
3733 case BLKmode:
3734 if (bytes < 0)
3735 break;
3736 /* FALLTHRU */
3738 case DImode:
3739 case SImode:
3740 case HImode:
3741 case QImode:
3742 cum->words += words;
3743 cum->nregs -= words;
3744 cum->regno += words;
3746 if (cum->nregs <= 0)
3748 cum->nregs = 0;
3749 cum->regno = 0;
3751 break;
3753 case DFmode:
3754 if (cum->float_in_sse < 2)
3755 break;
3756 case SFmode:
3757 if (cum->float_in_sse < 1)
3758 break;
3759 /* FALLTHRU */
3761 case TImode:
3762 case V16QImode:
3763 case V8HImode:
3764 case V4SImode:
3765 case V2DImode:
3766 case V4SFmode:
3767 case V2DFmode:
3768 if (!type || !AGGREGATE_TYPE_P (type))
3770 cum->sse_words += words;
3771 cum->sse_nregs -= 1;
3772 cum->sse_regno += 1;
3773 if (cum->sse_nregs <= 0)
3775 cum->sse_nregs = 0;
3776 cum->sse_regno = 0;
3779 break;
3781 case V8QImode:
3782 case V4HImode:
3783 case V2SImode:
3784 case V2SFmode:
3785 if (!type || !AGGREGATE_TYPE_P (type))
3787 cum->mmx_words += words;
3788 cum->mmx_nregs -= 1;
3789 cum->mmx_regno += 1;
3790 if (cum->mmx_nregs <= 0)
3792 cum->mmx_nregs = 0;
3793 cum->mmx_regno = 0;
3796 break;
3800 static void
3801 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3802 tree type, HOST_WIDE_INT words)
3804 int int_nregs, sse_nregs;
3806 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3807 cum->words += words;
3808 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3810 cum->nregs -= int_nregs;
3811 cum->sse_nregs -= sse_nregs;
3812 cum->regno += int_nregs;
3813 cum->sse_regno += sse_nregs;
3815 else
3816 cum->words += words;
3819 static void
3820 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
3821 HOST_WIDE_INT words)
3823 /* Otherwise, this should be passed indirect. */
3824 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
3826 cum->words += words;
3827 if (cum->nregs > 0)
3829 cum->nregs -= 1;
3830 cum->regno += 1;
3834 void
3835 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3836 tree type, int named ATTRIBUTE_UNUSED)
3838 HOST_WIDE_INT bytes, words;
3840 if (mode == BLKmode)
3841 bytes = int_size_in_bytes (type);
3842 else
3843 bytes = GET_MODE_SIZE (mode);
3844 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3846 if (type)
3847 mode = type_natural_mode (type);
3849 if (TARGET_64BIT_MS_ABI)
3850 function_arg_advance_ms_64 (cum, bytes, words);
3851 else if (TARGET_64BIT)
3852 function_arg_advance_64 (cum, mode, type, words);
3853 else
3854 function_arg_advance_32 (cum, mode, type, bytes, words);
3857 /* Define where to put the arguments to a function.
3858 Value is zero to push the argument on the stack,
3859 or a hard register in which to store the argument.
3861 MODE is the argument's machine mode.
3862 TYPE is the data type of the argument (as a tree).
3863 This is null for libcalls where that information may
3864 not be available.
3865 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3866 the preceding args and about the function being called.
3867 NAMED is nonzero if this argument is a named parameter
3868 (otherwise it is an extra parameter matching an ellipsis). */
3870 static rtx
3871 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3872 enum machine_mode orig_mode, tree type,
3873 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
3875 static bool warnedsse, warnedmmx;
3877 /* Avoid the AL settings for the Unix64 ABI. */
3878 if (mode == VOIDmode)
3879 return constm1_rtx;
3881 switch (mode)
3883 default:
3884 break;
3886 case BLKmode:
3887 if (bytes < 0)
3888 break;
3889 /* FALLTHRU */
3890 case DImode:
3891 case SImode:
3892 case HImode:
3893 case QImode:
3894 if (words <= cum->nregs)
3896 int regno = cum->regno;
3898 /* Fastcall allocates the first two DWORD (SImode) or
3899 smaller arguments to ECX and EDX. */
3900 if (cum->fastcall)
3902 if (mode == BLKmode || mode == DImode)
3903 break;
3905 /* ECX not EAX is the first allocated register. */
3906 if (regno == 0)
3907 regno = 2;
3909 return gen_rtx_REG (mode, regno);
3911 break;
3913 case DFmode:
3914 if (cum->float_in_sse < 2)
3915 break;
3916 case SFmode:
3917 if (cum->float_in_sse < 1)
3918 break;
3919 /* FALLTHRU */
3920 case TImode:
3921 case V16QImode:
3922 case V8HImode:
3923 case V4SImode:
3924 case V2DImode:
3925 case V4SFmode:
3926 case V2DFmode:
3927 if (!type || !AGGREGATE_TYPE_P (type))
3929 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
3931 warnedsse = true;
3932 warning (0, "SSE vector argument without SSE enabled "
3933 "changes the ABI");
3935 if (cum->sse_nregs)
3936 return gen_reg_or_parallel (mode, orig_mode,
3937 cum->sse_regno + FIRST_SSE_REG);
3939 break;
3941 case V8QImode:
3942 case V4HImode:
3943 case V2SImode:
3944 case V2SFmode:
3945 if (!type || !AGGREGATE_TYPE_P (type))
3947 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
3949 warnedmmx = true;
3950 warning (0, "MMX vector argument without MMX enabled "
3951 "changes the ABI");
3953 if (cum->mmx_nregs)
3954 return gen_reg_or_parallel (mode, orig_mode,
3955 cum->mmx_regno + FIRST_MMX_REG);
3957 break;
3960 return NULL_RTX;
3963 static rtx
3964 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3965 enum machine_mode orig_mode, tree type)
3967 /* Handle a hidden AL argument containing number of registers
3968 for varargs x86-64 functions. */
3969 if (mode == VOIDmode)
3970 return GEN_INT (cum->maybe_vaarg
3971 ? (cum->sse_nregs < 0
3972 ? SSE_REGPARM_MAX
3973 : cum->sse_regno)
3974 : -1);
3976 return construct_container (mode, orig_mode, type, 0, cum->nregs,
3977 cum->sse_nregs,
3978 &x86_64_int_parameter_registers [cum->regno],
3979 cum->sse_regno);
3982 static rtx
3983 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3984 enum machine_mode orig_mode, int named)
3986 unsigned int regno;
3988 /* Avoid the AL settings for the Unix64 ABI. */
3989 if (mode == VOIDmode)
3990 return constm1_rtx;
3992 /* If we've run out of registers, it goes on the stack. */
3993 if (cum->nregs == 0)
3994 return NULL_RTX;
3996 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
3998 /* Only floating point modes are passed in anything but integer regs. */
3999 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
4001 if (named)
4002 regno = cum->regno + FIRST_SSE_REG;
4003 else
4005 rtx t1, t2;
4007 /* Unnamed floating parameters are passed in both the
4008 SSE and integer registers. */
4009 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
4010 t2 = gen_rtx_REG (mode, regno);
4011 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
4012 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
4013 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
4017 return gen_reg_or_parallel (mode, orig_mode, regno);
4021 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
4022 tree type, int named)
4024 enum machine_mode mode = omode;
4025 HOST_WIDE_INT bytes, words;
4027 if (mode == BLKmode)
4028 bytes = int_size_in_bytes (type);
4029 else
4030 bytes = GET_MODE_SIZE (mode);
4031 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4033 /* To simplify the code below, represent vector types with a vector mode
4034 even if MMX/SSE are not active. */
4035 if (type && TREE_CODE (type) == VECTOR_TYPE)
4036 mode = type_natural_mode (type);
4038 if (TARGET_64BIT_MS_ABI)
4039 return function_arg_ms_64 (cum, mode, omode, named);
4040 else if (TARGET_64BIT)
4041 return function_arg_64 (cum, mode, omode, type);
4042 else
4043 return function_arg_32 (cum, mode, omode, type, bytes, words);
4046 /* A C expression that indicates when an argument must be passed by
4047 reference. If nonzero for an argument, a copy of that argument is
4048 made in memory and a pointer to the argument is passed instead of
4049 the argument itself. The pointer is passed in whatever way is
4050 appropriate for passing a pointer to that type. */
4052 static bool
4053 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4054 enum machine_mode mode ATTRIBUTE_UNUSED,
4055 tree type, bool named ATTRIBUTE_UNUSED)
4057 if (TARGET_64BIT_MS_ABI)
4059 if (type)
4061 /* Arrays are passed by reference. */
4062 if (TREE_CODE (type) == ARRAY_TYPE)
4063 return true;
4065 if (AGGREGATE_TYPE_P (type))
4067 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
4068 are passed by reference. */
4069 int el2 = exact_log2 (int_size_in_bytes (type));
4070 return !(el2 >= 0 && el2 <= 3);
4074 /* __m128 is passed by reference. */
4075 /* ??? How to handle complex? For now treat them as structs,
4076 and pass them by reference if they're too large. */
4077 if (GET_MODE_SIZE (mode) > 8)
4078 return true;
4080 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
4081 return 1;
4083 return 0;
4086 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4087 ABI. Only called if TARGET_SSE. */
4088 static bool
4089 contains_128bit_aligned_vector_p (tree type)
4091 enum machine_mode mode = TYPE_MODE (type);
4092 if (SSE_REG_MODE_P (mode)
4093 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4094 return true;
4095 if (TYPE_ALIGN (type) < 128)
4096 return false;
4098 if (AGGREGATE_TYPE_P (type))
4100 /* Walk the aggregates recursively. */
4101 switch (TREE_CODE (type))
4103 case RECORD_TYPE:
4104 case UNION_TYPE:
4105 case QUAL_UNION_TYPE:
4107 tree field;
4109 /* Walk all the structure fields. */
4110 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4112 if (TREE_CODE (field) == FIELD_DECL
4113 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
4114 return true;
4116 break;
4119 case ARRAY_TYPE:
4120 /* Just for use if some languages passes arrays by value. */
4121 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
4122 return true;
4123 break;
4125 default:
4126 gcc_unreachable ();
4129 return false;
4132 /* Gives the alignment boundary, in bits, of an argument with the
4133 specified mode and type. */
4136 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4138 int align;
4139 if (type)
4140 align = TYPE_ALIGN (type);
4141 else
4142 align = GET_MODE_ALIGNMENT (mode);
4143 if (align < PARM_BOUNDARY)
4144 align = PARM_BOUNDARY;
4145 if (!TARGET_64BIT)
4147 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4148 make an exception for SSE modes since these require 128bit
4149 alignment.
4151 The handling here differs from field_alignment. ICC aligns MMX
4152 arguments to 4 byte boundaries, while structure fields are aligned
4153 to 8 byte boundaries. */
4154 if (!TARGET_SSE)
4155 align = PARM_BOUNDARY;
4156 else if (!type)
4158 if (!SSE_REG_MODE_P (mode))
4159 align = PARM_BOUNDARY;
4161 else
4163 if (!contains_128bit_aligned_vector_p (type))
4164 align = PARM_BOUNDARY;
4167 if (align > 128)
4168 align = 128;
4169 return align;
4172 /* Return true if N is a possible register number of function value. */
4174 bool
4175 ix86_function_value_regno_p (int regno)
4177 switch (regno)
4179 case 0:
4180 return true;
4182 case FIRST_FLOAT_REG:
4183 if (TARGET_64BIT_MS_ABI)
4184 return false;
4185 return TARGET_FLOAT_RETURNS_IN_80387;
4187 case FIRST_SSE_REG:
4188 return TARGET_SSE;
4190 case FIRST_MMX_REG:
4191 if (TARGET_MACHO || TARGET_64BIT)
4192 return false;
4193 return TARGET_MMX;
4196 return false;
4199 /* Define how to find the value returned by a function.
4200 VALTYPE is the data type of the value (as a tree).
4201 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4202 otherwise, FUNC is 0. */
4204 static rtx
4205 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
4206 tree fntype, tree fn)
4208 unsigned int regno;
4210 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4211 we normally prevent this case when mmx is not available. However
4212 some ABIs may require the result to be returned like DImode. */
4213 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4214 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
4216 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4217 we prevent this case when sse is not available. However some ABIs
4218 may require the result to be returned like integer TImode. */
4219 else if (mode == TImode
4220 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4221 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
4223 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4224 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
4225 regno = FIRST_FLOAT_REG;
4226 else
4227 /* Most things go in %eax. */
4228 regno = 0;
4230 /* Override FP return register with %xmm0 for local functions when
4231 SSE math is enabled or for functions with sseregparm attribute. */
4232 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4234 int sse_level = ix86_function_sseregparm (fntype, fn);
4235 if ((sse_level >= 1 && mode == SFmode)
4236 || (sse_level == 2 && mode == DFmode))
4237 regno = FIRST_SSE_REG;
4240 return gen_rtx_REG (orig_mode, regno);
4243 static rtx
4244 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
4245 tree valtype)
4247 rtx ret;
4249 /* Handle libcalls, which don't provide a type node. */
4250 if (valtype == NULL)
4252 switch (mode)
4254 case SFmode:
4255 case SCmode:
4256 case DFmode:
4257 case DCmode:
4258 case TFmode:
4259 case SDmode:
4260 case DDmode:
4261 case TDmode:
4262 return gen_rtx_REG (mode, FIRST_SSE_REG);
4263 case XFmode:
4264 case XCmode:
4265 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4266 case TCmode:
4267 return NULL;
4268 default:
4269 return gen_rtx_REG (mode, 0);
4273 ret = construct_container (mode, orig_mode, valtype, 1,
4274 REGPARM_MAX, SSE_REGPARM_MAX,
4275 x86_64_int_return_registers, 0);
4277 /* For zero sized structures, construct_container returns NULL, but we
4278 need to keep rest of compiler happy by returning meaningful value. */
4279 if (!ret)
4280 ret = gen_rtx_REG (orig_mode, 0);
4282 return ret;
4285 static rtx
4286 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
4288 unsigned int regno = 0;
4290 if (TARGET_SSE)
4292 if (mode == SFmode || mode == DFmode)
4293 regno = FIRST_SSE_REG;
4294 else if (VECTOR_MODE_P (mode) || GET_MODE_SIZE (mode) == 16)
4295 regno = FIRST_SSE_REG;
4298 return gen_rtx_REG (orig_mode, regno);
4301 static rtx
4302 ix86_function_value_1 (tree valtype, tree fntype_or_decl,
4303 enum machine_mode orig_mode, enum machine_mode mode)
4305 tree fn, fntype;
4307 fn = NULL_TREE;
4308 if (fntype_or_decl && DECL_P (fntype_or_decl))
4309 fn = fntype_or_decl;
4310 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4312 if (TARGET_64BIT_MS_ABI)
4313 return function_value_ms_64 (orig_mode, mode);
4314 else if (TARGET_64BIT)
4315 return function_value_64 (orig_mode, mode, valtype);
4316 else
4317 return function_value_32 (orig_mode, mode, fntype, fn);
4320 static rtx
4321 ix86_function_value (tree valtype, tree fntype_or_decl,
4322 bool outgoing ATTRIBUTE_UNUSED)
4324 enum machine_mode mode, orig_mode;
4326 orig_mode = TYPE_MODE (valtype);
4327 mode = type_natural_mode (valtype);
4328 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4332 ix86_libcall_value (enum machine_mode mode)
4334 return ix86_function_value_1 (NULL, NULL, mode, mode);
4337 /* Return true iff type is returned in memory. */
4339 static int
4340 return_in_memory_32 (tree type, enum machine_mode mode)
4342 HOST_WIDE_INT size;
4344 if (mode == BLKmode)
4345 return 1;
4347 size = int_size_in_bytes (type);
4349 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4350 return 0;
4352 if (VECTOR_MODE_P (mode) || mode == TImode)
4354 /* User-created vectors small enough to fit in EAX. */
4355 if (size < 8)
4356 return 0;
4358 /* MMX/3dNow values are returned in MM0,
4359 except when it doesn't exits. */
4360 if (size == 8)
4361 return (TARGET_MMX ? 0 : 1);
4363 /* SSE values are returned in XMM0, except when it doesn't exist. */
4364 if (size == 16)
4365 return (TARGET_SSE ? 0 : 1);
4368 if (mode == XFmode)
4369 return 0;
4371 if (mode == TDmode)
4372 return 1;
4374 if (size > 12)
4375 return 1;
4376 return 0;
4379 static int
4380 return_in_memory_64 (tree type, enum machine_mode mode)
4382 int needed_intregs, needed_sseregs;
4383 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4386 static int
4387 return_in_memory_ms_64 (tree type, enum machine_mode mode)
4389 HOST_WIDE_INT size = int_size_in_bytes (type);
4391 /* __m128 and friends are returned in xmm0. */
4392 if (size == 16 && VECTOR_MODE_P (mode))
4393 return 0;
4395 /* Otherwise, the size must be exactly in [1248]. */
4396 return (size != 1 && size != 2 && size != 4 && size != 8);
4400 ix86_return_in_memory (tree type)
4402 enum machine_mode mode = type_natural_mode (type);
4404 if (TARGET_64BIT_MS_ABI)
4405 return return_in_memory_ms_64 (type, mode);
4406 else if (TARGET_64BIT)
4407 return return_in_memory_64 (type, mode);
4408 else
4409 return return_in_memory_32 (type, mode);
4412 /* Return false iff TYPE is returned in memory. This version is used
4413 on Solaris 10. It is similar to the generic ix86_return_in_memory,
4414 but differs notably in that when MMX is available, 8-byte vectors
4415 are returned in memory, rather than in MMX registers. */
4417 int
4418 ix86_sol10_return_in_memory (tree type)
4420 int size;
4421 enum machine_mode mode = type_natural_mode (type);
4423 if (TARGET_64BIT)
4424 return return_in_memory_64 (type, mode);
4426 if (mode == BLKmode)
4427 return 1;
4429 size = int_size_in_bytes (type);
4431 if (VECTOR_MODE_P (mode))
4433 /* Return in memory only if MMX registers *are* available. This
4434 seems backwards, but it is consistent with the existing
4435 Solaris x86 ABI. */
4436 if (size == 8)
4437 return TARGET_MMX;
4438 if (size == 16)
4439 return !TARGET_SSE;
4441 else if (mode == TImode)
4442 return !TARGET_SSE;
4443 else if (mode == XFmode)
4444 return 0;
4446 return size > 12;
4449 /* When returning SSE vector types, we have a choice of either
4450 (1) being abi incompatible with a -march switch, or
4451 (2) generating an error.
4452 Given no good solution, I think the safest thing is one warning.
4453 The user won't be able to use -Werror, but....
4455 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4456 called in response to actually generating a caller or callee that
4457 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4458 via aggregate_value_p for general type probing from tree-ssa. */
4460 static rtx
4461 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4463 static bool warnedsse, warnedmmx;
4465 if (!TARGET_64BIT && type)
4467 /* Look at the return type of the function, not the function type. */
4468 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4470 if (!TARGET_SSE && !warnedsse)
4472 if (mode == TImode
4473 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4475 warnedsse = true;
4476 warning (0, "SSE vector return without SSE enabled "
4477 "changes the ABI");
4481 if (!TARGET_MMX && !warnedmmx)
4483 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4485 warnedmmx = true;
4486 warning (0, "MMX vector return without MMX enabled "
4487 "changes the ABI");
4492 return NULL;
4496 /* Create the va_list data type. */
4498 static tree
4499 ix86_build_builtin_va_list (void)
4501 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4503 /* For i386 we use plain pointer to argument area. */
4504 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4505 return build_pointer_type (char_type_node);
4507 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4508 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4510 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4511 unsigned_type_node);
4512 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4513 unsigned_type_node);
4514 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4515 ptr_type_node);
4516 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4517 ptr_type_node);
4519 va_list_gpr_counter_field = f_gpr;
4520 va_list_fpr_counter_field = f_fpr;
4522 DECL_FIELD_CONTEXT (f_gpr) = record;
4523 DECL_FIELD_CONTEXT (f_fpr) = record;
4524 DECL_FIELD_CONTEXT (f_ovf) = record;
4525 DECL_FIELD_CONTEXT (f_sav) = record;
4527 TREE_CHAIN (record) = type_decl;
4528 TYPE_NAME (record) = type_decl;
4529 TYPE_FIELDS (record) = f_gpr;
4530 TREE_CHAIN (f_gpr) = f_fpr;
4531 TREE_CHAIN (f_fpr) = f_ovf;
4532 TREE_CHAIN (f_ovf) = f_sav;
4534 layout_type (record);
4536 /* The correct type is an array type of one element. */
4537 return build_array_type (record, build_index_type (size_zero_node));
4540 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4542 static void
4543 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
4545 rtx save_area, mem;
4546 rtx label;
4547 rtx label_ref;
4548 rtx tmp_reg;
4549 rtx nsse_reg;
4550 int set;
4551 int i;
4553 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4554 return;
4556 /* Indicate to allocate space on the stack for varargs save area. */
4557 ix86_save_varrargs_registers = 1;
4558 cfun->stack_alignment_needed = 128;
4560 save_area = frame_pointer_rtx;
4561 set = get_varargs_alias_set ();
4563 for (i = cum->regno;
4564 i < ix86_regparm
4565 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4566 i++)
4568 mem = gen_rtx_MEM (Pmode,
4569 plus_constant (save_area, i * UNITS_PER_WORD));
4570 MEM_NOTRAP_P (mem) = 1;
4571 set_mem_alias_set (mem, set);
4572 emit_move_insn (mem, gen_rtx_REG (Pmode,
4573 x86_64_int_parameter_registers[i]));
4576 if (cum->sse_nregs && cfun->va_list_fpr_size)
4578 /* Now emit code to save SSE registers. The AX parameter contains number
4579 of SSE parameter registers used to call this function. We use
4580 sse_prologue_save insn template that produces computed jump across
4581 SSE saves. We need some preparation work to get this working. */
4583 label = gen_label_rtx ();
4584 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4586 /* Compute address to jump to :
4587 label - 5*eax + nnamed_sse_arguments*5 */
4588 tmp_reg = gen_reg_rtx (Pmode);
4589 nsse_reg = gen_reg_rtx (Pmode);
4590 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4591 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4592 gen_rtx_MULT (Pmode, nsse_reg,
4593 GEN_INT (4))));
4594 if (cum->sse_regno)
4595 emit_move_insn
4596 (nsse_reg,
4597 gen_rtx_CONST (DImode,
4598 gen_rtx_PLUS (DImode,
4599 label_ref,
4600 GEN_INT (cum->sse_regno * 4))));
4601 else
4602 emit_move_insn (nsse_reg, label_ref);
4603 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4605 /* Compute address of memory block we save into. We always use pointer
4606 pointing 127 bytes after first byte to store - this is needed to keep
4607 instruction size limited by 4 bytes. */
4608 tmp_reg = gen_reg_rtx (Pmode);
4609 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4610 plus_constant (save_area,
4611 8 * REGPARM_MAX + 127)));
4612 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4613 MEM_NOTRAP_P (mem) = 1;
4614 set_mem_alias_set (mem, set);
4615 set_mem_align (mem, BITS_PER_WORD);
4617 /* And finally do the dirty job! */
4618 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4619 GEN_INT (cum->sse_regno), label));
4623 static void
4624 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
4626 int set = get_varargs_alias_set ();
4627 int i;
4629 for (i = cum->regno; i < REGPARM_MAX; i++)
4631 rtx reg, mem;
4633 mem = gen_rtx_MEM (Pmode,
4634 plus_constant (virtual_incoming_args_rtx,
4635 i * UNITS_PER_WORD));
4636 MEM_NOTRAP_P (mem) = 1;
4637 set_mem_alias_set (mem, set);
4639 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
4640 emit_move_insn (mem, reg);
4644 static void
4645 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4646 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4647 int no_rtl)
4649 CUMULATIVE_ARGS next_cum;
4650 tree fntype;
4651 int stdarg_p;
4653 /* This argument doesn't appear to be used anymore. Which is good,
4654 because the old code here didn't suppress rtl generation. */
4655 gcc_assert (!no_rtl);
4657 if (!TARGET_64BIT)
4658 return;
4660 fntype = TREE_TYPE (current_function_decl);
4661 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4662 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4663 != void_type_node));
4665 /* For varargs, we do not want to skip the dummy va_dcl argument.
4666 For stdargs, we do want to skip the last named argument. */
4667 next_cum = *cum;
4668 if (stdarg_p)
4669 function_arg_advance (&next_cum, mode, type, 1);
4671 if (TARGET_64BIT_MS_ABI)
4672 setup_incoming_varargs_ms_64 (&next_cum);
4673 else
4674 setup_incoming_varargs_64 (&next_cum);
4677 /* Implement va_start. */
4679 void
4680 ix86_va_start (tree valist, rtx nextarg)
4682 HOST_WIDE_INT words, n_gpr, n_fpr;
4683 tree f_gpr, f_fpr, f_ovf, f_sav;
4684 tree gpr, fpr, ovf, sav, t;
4685 tree type;
4687 /* Only 64bit target needs something special. */
4688 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4690 std_expand_builtin_va_start (valist, nextarg);
4691 return;
4694 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4695 f_fpr = TREE_CHAIN (f_gpr);
4696 f_ovf = TREE_CHAIN (f_fpr);
4697 f_sav = TREE_CHAIN (f_ovf);
4699 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4700 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4701 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4702 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4703 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4705 /* Count number of gp and fp argument registers used. */
4706 words = current_function_args_info.words;
4707 n_gpr = current_function_args_info.regno;
4708 n_fpr = current_function_args_info.sse_regno;
4710 if (cfun->va_list_gpr_size)
4712 type = TREE_TYPE (gpr);
4713 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
4714 build_int_cst (type, n_gpr * 8));
4715 TREE_SIDE_EFFECTS (t) = 1;
4716 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4719 if (cfun->va_list_fpr_size)
4721 type = TREE_TYPE (fpr);
4722 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
4723 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4724 TREE_SIDE_EFFECTS (t) = 1;
4725 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4728 /* Find the overflow area. */
4729 type = TREE_TYPE (ovf);
4730 t = make_tree (type, virtual_incoming_args_rtx);
4731 if (words != 0)
4732 t = build2 (PLUS_EXPR, type, t,
4733 build_int_cst (type, words * UNITS_PER_WORD));
4734 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
4735 TREE_SIDE_EFFECTS (t) = 1;
4736 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4738 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4740 /* Find the register save area.
4741 Prologue of the function save it right above stack frame. */
4742 type = TREE_TYPE (sav);
4743 t = make_tree (type, frame_pointer_rtx);
4744 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
4745 TREE_SIDE_EFFECTS (t) = 1;
4746 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4750 /* Implement va_arg. */
4752 static tree
4753 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4755 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4756 tree f_gpr, f_fpr, f_ovf, f_sav;
4757 tree gpr, fpr, ovf, sav, t;
4758 int size, rsize;
4759 tree lab_false, lab_over = NULL_TREE;
4760 tree addr, t2;
4761 rtx container;
4762 int indirect_p = 0;
4763 tree ptrtype;
4764 enum machine_mode nat_mode;
4766 /* Only 64bit target needs something special. */
4767 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4768 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4770 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4771 f_fpr = TREE_CHAIN (f_gpr);
4772 f_ovf = TREE_CHAIN (f_fpr);
4773 f_sav = TREE_CHAIN (f_ovf);
4775 valist = build_va_arg_indirect_ref (valist);
4776 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4777 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4778 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4779 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4781 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4782 if (indirect_p)
4783 type = build_pointer_type (type);
4784 size = int_size_in_bytes (type);
4785 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4787 nat_mode = type_natural_mode (type);
4788 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4789 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4791 /* Pull the value out of the saved registers. */
4793 addr = create_tmp_var (ptr_type_node, "addr");
4794 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4796 if (container)
4798 int needed_intregs, needed_sseregs;
4799 bool need_temp;
4800 tree int_addr, sse_addr;
4802 lab_false = create_artificial_label ();
4803 lab_over = create_artificial_label ();
4805 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4807 need_temp = (!REG_P (container)
4808 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4809 || TYPE_ALIGN (type) > 128));
4811 /* In case we are passing structure, verify that it is consecutive block
4812 on the register save area. If not we need to do moves. */
4813 if (!need_temp && !REG_P (container))
4815 /* Verify that all registers are strictly consecutive */
4816 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4818 int i;
4820 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4822 rtx slot = XVECEXP (container, 0, i);
4823 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4824 || INTVAL (XEXP (slot, 1)) != i * 16)
4825 need_temp = 1;
4828 else
4830 int i;
4832 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4834 rtx slot = XVECEXP (container, 0, i);
4835 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4836 || INTVAL (XEXP (slot, 1)) != i * 8)
4837 need_temp = 1;
4841 if (!need_temp)
4843 int_addr = addr;
4844 sse_addr = addr;
4846 else
4848 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4849 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4850 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4851 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4854 /* First ensure that we fit completely in registers. */
4855 if (needed_intregs)
4857 t = build_int_cst (TREE_TYPE (gpr),
4858 (REGPARM_MAX - needed_intregs + 1) * 8);
4859 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4860 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4861 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4862 gimplify_and_add (t, pre_p);
4864 if (needed_sseregs)
4866 t = build_int_cst (TREE_TYPE (fpr),
4867 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4868 + REGPARM_MAX * 8);
4869 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4870 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4871 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4872 gimplify_and_add (t, pre_p);
4875 /* Compute index to start of area used for integer regs. */
4876 if (needed_intregs)
4878 /* int_addr = gpr + sav; */
4879 t = fold_convert (ptr_type_node, gpr);
4880 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4881 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
4882 gimplify_and_add (t, pre_p);
4884 if (needed_sseregs)
4886 /* sse_addr = fpr + sav; */
4887 t = fold_convert (ptr_type_node, fpr);
4888 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4889 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
4890 gimplify_and_add (t, pre_p);
4892 if (need_temp)
4894 int i;
4895 tree temp = create_tmp_var (type, "va_arg_tmp");
4897 /* addr = &temp; */
4898 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4899 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
4900 gimplify_and_add (t, pre_p);
4902 for (i = 0; i < XVECLEN (container, 0); i++)
4904 rtx slot = XVECEXP (container, 0, i);
4905 rtx reg = XEXP (slot, 0);
4906 enum machine_mode mode = GET_MODE (reg);
4907 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4908 tree addr_type = build_pointer_type (piece_type);
4909 tree src_addr, src;
4910 int src_offset;
4911 tree dest_addr, dest;
4913 if (SSE_REGNO_P (REGNO (reg)))
4915 src_addr = sse_addr;
4916 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4918 else
4920 src_addr = int_addr;
4921 src_offset = REGNO (reg) * 8;
4923 src_addr = fold_convert (addr_type, src_addr);
4924 src_addr = fold_build2 (PLUS_EXPR, addr_type, src_addr,
4925 size_int (src_offset));
4926 src = build_va_arg_indirect_ref (src_addr);
4928 dest_addr = fold_convert (addr_type, addr);
4929 dest_addr = fold_build2 (PLUS_EXPR, addr_type, dest_addr,
4930 size_int (INTVAL (XEXP (slot, 1))));
4931 dest = build_va_arg_indirect_ref (dest_addr);
4933 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
4934 gimplify_and_add (t, pre_p);
4938 if (needed_intregs)
4940 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4941 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4942 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
4943 gimplify_and_add (t, pre_p);
4945 if (needed_sseregs)
4947 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4948 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4949 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
4950 gimplify_and_add (t, pre_p);
4953 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4954 gimplify_and_add (t, pre_p);
4956 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4957 append_to_statement_list (t, pre_p);
4960 /* ... otherwise out of the overflow area. */
4962 /* Care for on-stack alignment if needed. */
4963 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4964 || integer_zerop (TYPE_SIZE (type)))
4965 t = ovf;
4966 else
4968 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4969 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4970 build_int_cst (TREE_TYPE (ovf), align - 1));
4971 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4972 build_int_cst (TREE_TYPE (t), -align));
4974 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4976 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
4977 gimplify_and_add (t2, pre_p);
4979 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4980 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4981 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
4982 gimplify_and_add (t, pre_p);
4984 if (container)
4986 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4987 append_to_statement_list (t, pre_p);
4990 ptrtype = build_pointer_type (type);
4991 addr = fold_convert (ptrtype, addr);
4993 if (indirect_p)
4994 addr = build_va_arg_indirect_ref (addr);
4995 return build_va_arg_indirect_ref (addr);
4998 /* Return nonzero if OPNUM's MEM should be matched
4999 in movabs* patterns. */
5002 ix86_check_movabs (rtx insn, int opnum)
5004 rtx set, mem;
5006 set = PATTERN (insn);
5007 if (GET_CODE (set) == PARALLEL)
5008 set = XVECEXP (set, 0, 0);
5009 gcc_assert (GET_CODE (set) == SET);
5010 mem = XEXP (set, opnum);
5011 while (GET_CODE (mem) == SUBREG)
5012 mem = SUBREG_REG (mem);
5013 gcc_assert (MEM_P (mem));
5014 return (volatile_ok || !MEM_VOLATILE_P (mem));
5017 /* Initialize the table of extra 80387 mathematical constants. */
5019 static void
5020 init_ext_80387_constants (void)
5022 static const char * cst[5] =
5024 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5025 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5026 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5027 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5028 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5030 int i;
5032 for (i = 0; i < 5; i++)
5034 real_from_string (&ext_80387_constants_table[i], cst[i]);
5035 /* Ensure each constant is rounded to XFmode precision. */
5036 real_convert (&ext_80387_constants_table[i],
5037 XFmode, &ext_80387_constants_table[i]);
5040 ext_80387_constants_init = 1;
5043 /* Return true if the constant is something that can be loaded with
5044 a special instruction. */
5047 standard_80387_constant_p (rtx x)
5049 enum machine_mode mode = GET_MODE (x);
5051 REAL_VALUE_TYPE r;
5053 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
5054 return -1;
5056 if (x == CONST0_RTX (mode))
5057 return 1;
5058 if (x == CONST1_RTX (mode))
5059 return 2;
5061 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5063 /* For XFmode constants, try to find a special 80387 instruction when
5064 optimizing for size or on those CPUs that benefit from them. */
5065 if (mode == XFmode
5066 && (optimize_size || TARGET_EXT_80387_CONSTANTS))
5068 int i;
5070 if (! ext_80387_constants_init)
5071 init_ext_80387_constants ();
5073 for (i = 0; i < 5; i++)
5074 if (real_identical (&r, &ext_80387_constants_table[i]))
5075 return i + 3;
5078 /* Load of the constant -0.0 or -1.0 will be split as
5079 fldz;fchs or fld1;fchs sequence. */
5080 if (real_isnegzero (&r))
5081 return 8;
5082 if (real_identical (&r, &dconstm1))
5083 return 9;
5085 return 0;
5088 /* Return the opcode of the special instruction to be used to load
5089 the constant X. */
5091 const char *
5092 standard_80387_constant_opcode (rtx x)
5094 switch (standard_80387_constant_p (x))
5096 case 1:
5097 return "fldz";
5098 case 2:
5099 return "fld1";
5100 case 3:
5101 return "fldlg2";
5102 case 4:
5103 return "fldln2";
5104 case 5:
5105 return "fldl2e";
5106 case 6:
5107 return "fldl2t";
5108 case 7:
5109 return "fldpi";
5110 case 8:
5111 case 9:
5112 return "#";
5113 default:
5114 gcc_unreachable ();
5118 /* Return the CONST_DOUBLE representing the 80387 constant that is
5119 loaded by the specified special instruction. The argument IDX
5120 matches the return value from standard_80387_constant_p. */
5123 standard_80387_constant_rtx (int idx)
5125 int i;
5127 if (! ext_80387_constants_init)
5128 init_ext_80387_constants ();
5130 switch (idx)
5132 case 3:
5133 case 4:
5134 case 5:
5135 case 6:
5136 case 7:
5137 i = idx - 3;
5138 break;
5140 default:
5141 gcc_unreachable ();
5144 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5145 XFmode);
5148 /* Return 1 if mode is a valid mode for sse. */
5149 static int
5150 standard_sse_mode_p (enum machine_mode mode)
5152 switch (mode)
5154 case V16QImode:
5155 case V8HImode:
5156 case V4SImode:
5157 case V2DImode:
5158 case V4SFmode:
5159 case V2DFmode:
5160 return 1;
5162 default:
5163 return 0;
5167 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5170 standard_sse_constant_p (rtx x)
5172 enum machine_mode mode = GET_MODE (x);
5174 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5175 return 1;
5176 if (vector_all_ones_operand (x, mode)
5177 && standard_sse_mode_p (mode))
5178 return TARGET_SSE2 ? 2 : -1;
5180 return 0;
5183 /* Return the opcode of the special instruction to be used to load
5184 the constant X. */
5186 const char *
5187 standard_sse_constant_opcode (rtx insn, rtx x)
5189 switch (standard_sse_constant_p (x))
5191 case 1:
5192 if (get_attr_mode (insn) == MODE_V4SF)
5193 return "xorps\t%0, %0";
5194 else if (get_attr_mode (insn) == MODE_V2DF)
5195 return "xorpd\t%0, %0";
5196 else
5197 return "pxor\t%0, %0";
5198 case 2:
5199 return "pcmpeqd\t%0, %0";
5201 gcc_unreachable ();
5204 /* Returns 1 if OP contains a symbol reference */
5207 symbolic_reference_mentioned_p (rtx op)
5209 const char *fmt;
5210 int i;
5212 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5213 return 1;
5215 fmt = GET_RTX_FORMAT (GET_CODE (op));
5216 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5218 if (fmt[i] == 'E')
5220 int j;
5222 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5223 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5224 return 1;
5227 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5228 return 1;
5231 return 0;
5234 /* Return 1 if it is appropriate to emit `ret' instructions in the
5235 body of a function. Do this only if the epilogue is simple, needing a
5236 couple of insns. Prior to reloading, we can't tell how many registers
5237 must be saved, so return 0 then. Return 0 if there is no frame
5238 marker to de-allocate. */
5241 ix86_can_use_return_insn_p (void)
5243 struct ix86_frame frame;
5245 if (! reload_completed || frame_pointer_needed)
5246 return 0;
5248 /* Don't allow more than 32 pop, since that's all we can do
5249 with one instruction. */
5250 if (current_function_pops_args
5251 && current_function_args_size >= 32768)
5252 return 0;
5254 ix86_compute_frame_layout (&frame);
5255 return frame.to_allocate == 0 && frame.nregs == 0;
5258 /* Value should be nonzero if functions must have frame pointers.
5259 Zero means the frame pointer need not be set up (and parms may
5260 be accessed via the stack pointer) in functions that seem suitable. */
5263 ix86_frame_pointer_required (void)
5265 /* If we accessed previous frames, then the generated code expects
5266 to be able to access the saved ebp value in our frame. */
5267 if (cfun->machine->accesses_prev_frame)
5268 return 1;
5270 /* Several x86 os'es need a frame pointer for other reasons,
5271 usually pertaining to setjmp. */
5272 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5273 return 1;
5275 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5276 the frame pointer by default. Turn it back on now if we've not
5277 got a leaf function. */
5278 if (TARGET_OMIT_LEAF_FRAME_POINTER
5279 && (!current_function_is_leaf
5280 || ix86_current_function_calls_tls_descriptor))
5281 return 1;
5283 if (current_function_profile)
5284 return 1;
5286 return 0;
5289 /* Record that the current function accesses previous call frames. */
5291 void
5292 ix86_setup_frame_addresses (void)
5294 cfun->machine->accesses_prev_frame = 1;
5297 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5298 # define USE_HIDDEN_LINKONCE 1
5299 #else
5300 # define USE_HIDDEN_LINKONCE 0
5301 #endif
5303 static int pic_labels_used;
5305 /* Fills in the label name that should be used for a pc thunk for
5306 the given register. */
5308 static void
5309 get_pc_thunk_name (char name[32], unsigned int regno)
5311 gcc_assert (!TARGET_64BIT);
5313 if (USE_HIDDEN_LINKONCE)
5314 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5315 else
5316 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5320 /* This function generates code for -fpic that loads %ebx with
5321 the return address of the caller and then returns. */
5323 void
5324 ix86_file_end (void)
5326 rtx xops[2];
5327 int regno;
5329 for (regno = 0; regno < 8; ++regno)
5331 char name[32];
5333 if (! ((pic_labels_used >> regno) & 1))
5334 continue;
5336 get_pc_thunk_name (name, regno);
5338 #if TARGET_MACHO
5339 if (TARGET_MACHO)
5341 switch_to_section (darwin_sections[text_coal_section]);
5342 fputs ("\t.weak_definition\t", asm_out_file);
5343 assemble_name (asm_out_file, name);
5344 fputs ("\n\t.private_extern\t", asm_out_file);
5345 assemble_name (asm_out_file, name);
5346 fputs ("\n", asm_out_file);
5347 ASM_OUTPUT_LABEL (asm_out_file, name);
5349 else
5350 #endif
5351 if (USE_HIDDEN_LINKONCE)
5353 tree decl;
5355 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5356 error_mark_node);
5357 TREE_PUBLIC (decl) = 1;
5358 TREE_STATIC (decl) = 1;
5359 DECL_ONE_ONLY (decl) = 1;
5361 (*targetm.asm_out.unique_section) (decl, 0);
5362 switch_to_section (get_named_section (decl, NULL, 0));
5364 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5365 fputs ("\t.hidden\t", asm_out_file);
5366 assemble_name (asm_out_file, name);
5367 fputc ('\n', asm_out_file);
5368 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5370 else
5372 switch_to_section (text_section);
5373 ASM_OUTPUT_LABEL (asm_out_file, name);
5376 xops[0] = gen_rtx_REG (SImode, regno);
5377 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5378 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5379 output_asm_insn ("ret", xops);
5382 if (NEED_INDICATE_EXEC_STACK)
5383 file_end_indicate_exec_stack ();
5386 /* Emit code for the SET_GOT patterns. */
5388 const char *
5389 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5391 rtx xops[3];
5393 xops[0] = dest;
5395 if (TARGET_VXWORKS_RTP && flag_pic)
5397 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5398 xops[2] = gen_rtx_MEM (Pmode,
5399 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
5400 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5402 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5403 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5404 an unadorned address. */
5405 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5406 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
5407 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
5408 return "";
5411 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5413 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5415 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5417 if (!flag_pic)
5418 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5419 else
5420 output_asm_insn ("call\t%a2", xops);
5422 #if TARGET_MACHO
5423 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5424 is what will be referenced by the Mach-O PIC subsystem. */
5425 if (!label)
5426 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5427 #endif
5429 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5430 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5432 if (flag_pic)
5433 output_asm_insn ("pop{l}\t%0", xops);
5435 else
5437 char name[32];
5438 get_pc_thunk_name (name, REGNO (dest));
5439 pic_labels_used |= 1 << REGNO (dest);
5441 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5442 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5443 output_asm_insn ("call\t%X2", xops);
5444 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5445 is what will be referenced by the Mach-O PIC subsystem. */
5446 #if TARGET_MACHO
5447 if (!label)
5448 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5449 else
5450 targetm.asm_out.internal_label (asm_out_file, "L",
5451 CODE_LABEL_NUMBER (label));
5452 #endif
5455 if (TARGET_MACHO)
5456 return "";
5458 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5459 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5460 else
5461 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5463 return "";
5466 /* Generate an "push" pattern for input ARG. */
5468 static rtx
5469 gen_push (rtx arg)
5471 return gen_rtx_SET (VOIDmode,
5472 gen_rtx_MEM (Pmode,
5473 gen_rtx_PRE_DEC (Pmode,
5474 stack_pointer_rtx)),
5475 arg);
5478 /* Return >= 0 if there is an unused call-clobbered register available
5479 for the entire function. */
5481 static unsigned int
5482 ix86_select_alt_pic_regnum (void)
5484 if (current_function_is_leaf && !current_function_profile
5485 && !ix86_current_function_calls_tls_descriptor)
5487 int i;
5488 for (i = 2; i >= 0; --i)
5489 if (!regs_ever_live[i])
5490 return i;
5493 return INVALID_REGNUM;
5496 /* Return 1 if we need to save REGNO. */
5497 static int
5498 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5500 if (pic_offset_table_rtx
5501 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5502 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5503 || current_function_profile
5504 || current_function_calls_eh_return
5505 || current_function_uses_const_pool))
5507 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5508 return 0;
5509 return 1;
5512 if (current_function_calls_eh_return && maybe_eh_return)
5514 unsigned i;
5515 for (i = 0; ; i++)
5517 unsigned test = EH_RETURN_DATA_REGNO (i);
5518 if (test == INVALID_REGNUM)
5519 break;
5520 if (test == regno)
5521 return 1;
5525 if (cfun->machine->force_align_arg_pointer
5526 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5527 return 1;
5529 return (regs_ever_live[regno]
5530 && !call_used_regs[regno]
5531 && !fixed_regs[regno]
5532 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5535 /* Return number of registers to be saved on the stack. */
5537 static int
5538 ix86_nsaved_regs (void)
5540 int nregs = 0;
5541 int regno;
5543 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5544 if (ix86_save_reg (regno, true))
5545 nregs++;
5546 return nregs;
5549 /* Return the offset between two registers, one to be eliminated, and the other
5550 its replacement, at the start of a routine. */
5552 HOST_WIDE_INT
5553 ix86_initial_elimination_offset (int from, int to)
5555 struct ix86_frame frame;
5556 ix86_compute_frame_layout (&frame);
5558 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5559 return frame.hard_frame_pointer_offset;
5560 else if (from == FRAME_POINTER_REGNUM
5561 && to == HARD_FRAME_POINTER_REGNUM)
5562 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5563 else
5565 gcc_assert (to == STACK_POINTER_REGNUM);
5567 if (from == ARG_POINTER_REGNUM)
5568 return frame.stack_pointer_offset;
5570 gcc_assert (from == FRAME_POINTER_REGNUM);
5571 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5575 /* Fill structure ix86_frame about frame of currently computed function. */
5577 static void
5578 ix86_compute_frame_layout (struct ix86_frame *frame)
5580 HOST_WIDE_INT total_size;
5581 unsigned int stack_alignment_needed;
5582 HOST_WIDE_INT offset;
5583 unsigned int preferred_alignment;
5584 HOST_WIDE_INT size = get_frame_size ();
5586 frame->nregs = ix86_nsaved_regs ();
5587 total_size = size;
5589 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5590 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5592 /* During reload iteration the amount of registers saved can change.
5593 Recompute the value as needed. Do not recompute when amount of registers
5594 didn't change as reload does multiple calls to the function and does not
5595 expect the decision to change within single iteration. */
5596 if (!optimize_size
5597 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5599 int count = frame->nregs;
5601 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5602 /* The fast prologue uses move instead of push to save registers. This
5603 is significantly longer, but also executes faster as modern hardware
5604 can execute the moves in parallel, but can't do that for push/pop.
5606 Be careful about choosing what prologue to emit: When function takes
5607 many instructions to execute we may use slow version as well as in
5608 case function is known to be outside hot spot (this is known with
5609 feedback only). Weight the size of function by number of registers
5610 to save as it is cheap to use one or two push instructions but very
5611 slow to use many of them. */
5612 if (count)
5613 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5614 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5615 || (flag_branch_probabilities
5616 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5617 cfun->machine->use_fast_prologue_epilogue = false;
5618 else
5619 cfun->machine->use_fast_prologue_epilogue
5620 = !expensive_function_p (count);
5622 if (TARGET_PROLOGUE_USING_MOVE
5623 && cfun->machine->use_fast_prologue_epilogue)
5624 frame->save_regs_using_mov = true;
5625 else
5626 frame->save_regs_using_mov = false;
5629 /* Skip return address and saved base pointer. */
5630 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5632 frame->hard_frame_pointer_offset = offset;
5634 /* Do some sanity checking of stack_alignment_needed and
5635 preferred_alignment, since i386 port is the only using those features
5636 that may break easily. */
5638 gcc_assert (!size || stack_alignment_needed);
5639 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5640 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5641 gcc_assert (stack_alignment_needed
5642 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5644 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5645 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5647 /* Register save area */
5648 offset += frame->nregs * UNITS_PER_WORD;
5650 /* Va-arg area */
5651 if (ix86_save_varrargs_registers)
5653 offset += X86_64_VARARGS_SIZE;
5654 frame->va_arg_size = X86_64_VARARGS_SIZE;
5656 else
5657 frame->va_arg_size = 0;
5659 /* Align start of frame for local function. */
5660 frame->padding1 = ((offset + stack_alignment_needed - 1)
5661 & -stack_alignment_needed) - offset;
5663 offset += frame->padding1;
5665 /* Frame pointer points here. */
5666 frame->frame_pointer_offset = offset;
5668 offset += size;
5670 /* Add outgoing arguments area. Can be skipped if we eliminated
5671 all the function calls as dead code.
5672 Skipping is however impossible when function calls alloca. Alloca
5673 expander assumes that last current_function_outgoing_args_size
5674 of stack frame are unused. */
5675 if (ACCUMULATE_OUTGOING_ARGS
5676 && (!current_function_is_leaf || current_function_calls_alloca
5677 || ix86_current_function_calls_tls_descriptor))
5679 offset += current_function_outgoing_args_size;
5680 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5682 else
5683 frame->outgoing_arguments_size = 0;
5685 /* Align stack boundary. Only needed if we're calling another function
5686 or using alloca. */
5687 if (!current_function_is_leaf || current_function_calls_alloca
5688 || ix86_current_function_calls_tls_descriptor)
5689 frame->padding2 = ((offset + preferred_alignment - 1)
5690 & -preferred_alignment) - offset;
5691 else
5692 frame->padding2 = 0;
5694 offset += frame->padding2;
5696 /* We've reached end of stack frame. */
5697 frame->stack_pointer_offset = offset;
5699 /* Size prologue needs to allocate. */
5700 frame->to_allocate =
5701 (size + frame->padding1 + frame->padding2
5702 + frame->outgoing_arguments_size + frame->va_arg_size);
5704 if ((!frame->to_allocate && frame->nregs <= 1)
5705 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5706 frame->save_regs_using_mov = false;
5708 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5709 && current_function_is_leaf
5710 && !ix86_current_function_calls_tls_descriptor)
5712 frame->red_zone_size = frame->to_allocate;
5713 if (frame->save_regs_using_mov)
5714 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5715 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5716 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5718 else
5719 frame->red_zone_size = 0;
5720 frame->to_allocate -= frame->red_zone_size;
5721 frame->stack_pointer_offset -= frame->red_zone_size;
5722 #if 0
5723 fprintf (stderr, "\n");
5724 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
5725 fprintf (stderr, "size: %ld\n", (long)size);
5726 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
5727 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
5728 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
5729 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
5730 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
5731 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
5732 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
5733 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
5734 (long)frame->hard_frame_pointer_offset);
5735 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
5736 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
5737 fprintf (stderr, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca);
5738 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
5739 #endif
5742 /* Emit code to save registers in the prologue. */
5744 static void
5745 ix86_emit_save_regs (void)
5747 unsigned int regno;
5748 rtx insn;
5750 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5751 if (ix86_save_reg (regno, true))
5753 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5754 RTX_FRAME_RELATED_P (insn) = 1;
5758 /* Emit code to save registers using MOV insns. First register
5759 is restored from POINTER + OFFSET. */
5760 static void
5761 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5763 unsigned int regno;
5764 rtx insn;
5766 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5767 if (ix86_save_reg (regno, true))
5769 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5770 Pmode, offset),
5771 gen_rtx_REG (Pmode, regno));
5772 RTX_FRAME_RELATED_P (insn) = 1;
5773 offset += UNITS_PER_WORD;
5777 /* Expand prologue or epilogue stack adjustment.
5778 The pattern exist to put a dependency on all ebp-based memory accesses.
5779 STYLE should be negative if instructions should be marked as frame related,
5780 zero if %r11 register is live and cannot be freely used and positive
5781 otherwise. */
5783 static void
5784 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5786 rtx insn;
5788 if (! TARGET_64BIT)
5789 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5790 else if (x86_64_immediate_operand (offset, DImode))
5791 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5792 else
5794 rtx r11;
5795 /* r11 is used by indirect sibcall return as well, set before the
5796 epilogue and used after the epilogue. ATM indirect sibcall
5797 shouldn't be used together with huge frame sizes in one
5798 function because of the frame_size check in sibcall.c. */
5799 gcc_assert (style);
5800 r11 = gen_rtx_REG (DImode, R11_REG);
5801 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5802 if (style < 0)
5803 RTX_FRAME_RELATED_P (insn) = 1;
5804 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5805 offset));
5807 if (style < 0)
5808 RTX_FRAME_RELATED_P (insn) = 1;
5811 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5813 static rtx
5814 ix86_internal_arg_pointer (void)
5816 bool has_force_align_arg_pointer =
5817 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5818 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5819 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5820 && DECL_NAME (current_function_decl)
5821 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5822 && DECL_FILE_SCOPE_P (current_function_decl))
5823 || ix86_force_align_arg_pointer
5824 || has_force_align_arg_pointer)
5826 /* Nested functions can't realign the stack due to a register
5827 conflict. */
5828 if (DECL_CONTEXT (current_function_decl)
5829 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5831 if (ix86_force_align_arg_pointer)
5832 warning (0, "-mstackrealign ignored for nested functions");
5833 if (has_force_align_arg_pointer)
5834 error ("%s not supported for nested functions",
5835 ix86_force_align_arg_pointer_string);
5836 return virtual_incoming_args_rtx;
5838 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5839 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5841 else
5842 return virtual_incoming_args_rtx;
5845 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5846 This is called from dwarf2out.c to emit call frame instructions
5847 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5848 static void
5849 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5851 rtx unspec = SET_SRC (pattern);
5852 gcc_assert (GET_CODE (unspec) == UNSPEC);
5854 switch (index)
5856 case UNSPEC_REG_SAVE:
5857 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5858 SET_DEST (pattern));
5859 break;
5860 case UNSPEC_DEF_CFA:
5861 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5862 INTVAL (XVECEXP (unspec, 0, 0)));
5863 break;
5864 default:
5865 gcc_unreachable ();
5869 /* Expand the prologue into a bunch of separate insns. */
5871 void
5872 ix86_expand_prologue (void)
5874 rtx insn;
5875 bool pic_reg_used;
5876 struct ix86_frame frame;
5877 HOST_WIDE_INT allocate;
5879 ix86_compute_frame_layout (&frame);
5881 if (cfun->machine->force_align_arg_pointer)
5883 rtx x, y;
5885 /* Grab the argument pointer. */
5886 x = plus_constant (stack_pointer_rtx, 4);
5887 y = cfun->machine->force_align_arg_pointer;
5888 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5889 RTX_FRAME_RELATED_P (insn) = 1;
5891 /* The unwind info consists of two parts: install the fafp as the cfa,
5892 and record the fafp as the "save register" of the stack pointer.
5893 The later is there in order that the unwinder can see where it
5894 should restore the stack pointer across the and insn. */
5895 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5896 x = gen_rtx_SET (VOIDmode, y, x);
5897 RTX_FRAME_RELATED_P (x) = 1;
5898 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5899 UNSPEC_REG_SAVE);
5900 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5901 RTX_FRAME_RELATED_P (y) = 1;
5902 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5903 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5904 REG_NOTES (insn) = x;
5906 /* Align the stack. */
5907 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5908 GEN_INT (-16)));
5910 /* And here we cheat like madmen with the unwind info. We force the
5911 cfa register back to sp+4, which is exactly what it was at the
5912 start of the function. Re-pushing the return address results in
5913 the return at the same spot relative to the cfa, and thus is
5914 correct wrt the unwind info. */
5915 x = cfun->machine->force_align_arg_pointer;
5916 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5917 insn = emit_insn (gen_push (x));
5918 RTX_FRAME_RELATED_P (insn) = 1;
5920 x = GEN_INT (4);
5921 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5922 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5923 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5924 REG_NOTES (insn) = x;
5927 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5928 slower on all targets. Also sdb doesn't like it. */
5930 if (frame_pointer_needed)
5932 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5933 RTX_FRAME_RELATED_P (insn) = 1;
5935 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5936 RTX_FRAME_RELATED_P (insn) = 1;
5939 allocate = frame.to_allocate;
5941 if (!frame.save_regs_using_mov)
5942 ix86_emit_save_regs ();
5943 else
5944 allocate += frame.nregs * UNITS_PER_WORD;
5946 /* When using red zone we may start register saving before allocating
5947 the stack frame saving one cycle of the prologue. */
5948 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5949 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5950 : stack_pointer_rtx,
5951 -frame.nregs * UNITS_PER_WORD);
5953 if (allocate == 0)
5955 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5956 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5957 GEN_INT (-allocate), -1);
5958 else
5960 /* Only valid for Win32. */
5961 rtx eax = gen_rtx_REG (Pmode, 0);
5962 bool eax_live;
5963 rtx t;
5965 gcc_assert (!TARGET_64BIT || TARGET_64BIT_MS_ABI);
5967 if (TARGET_64BIT_MS_ABI)
5968 eax_live = false;
5969 else
5970 eax_live = ix86_eax_live_at_start_p ();
5972 if (eax_live)
5974 emit_insn (gen_push (eax));
5975 allocate -= UNITS_PER_WORD;
5978 emit_move_insn (eax, GEN_INT (allocate));
5980 if (TARGET_64BIT)
5981 insn = gen_allocate_stack_worker_64 (eax);
5982 else
5983 insn = gen_allocate_stack_worker_32 (eax);
5984 insn = emit_insn (insn);
5985 RTX_FRAME_RELATED_P (insn) = 1;
5986 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5987 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5988 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5989 t, REG_NOTES (insn));
5991 if (eax_live)
5993 if (frame_pointer_needed)
5994 t = plus_constant (hard_frame_pointer_rtx,
5995 allocate
5996 - frame.to_allocate
5997 - frame.nregs * UNITS_PER_WORD);
5998 else
5999 t = plus_constant (stack_pointer_rtx, allocate);
6000 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
6004 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
6006 if (!frame_pointer_needed || !frame.to_allocate)
6007 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
6008 else
6009 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
6010 -frame.nregs * UNITS_PER_WORD);
6013 pic_reg_used = false;
6014 if (pic_offset_table_rtx
6015 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
6016 || current_function_profile))
6018 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
6020 if (alt_pic_reg_used != INVALID_REGNUM)
6021 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
6023 pic_reg_used = true;
6026 if (pic_reg_used)
6028 if (TARGET_64BIT)
6030 if (ix86_cmodel == CM_LARGE_PIC)
6032 rtx tmp_reg = gen_rtx_REG (DImode,
6033 FIRST_REX_INT_REG + 3 /* R11 */);
6034 rtx label = gen_label_rtx ();
6035 emit_label (label);
6036 LABEL_PRESERVE_P (label) = 1;
6037 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
6038 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
6039 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
6040 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6041 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
6042 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
6043 pic_offset_table_rtx, tmp_reg));
6045 else
6046 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6048 else
6049 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
6051 /* Even with accurate pre-reload life analysis, we can wind up
6052 deleting all references to the pic register after reload.
6053 Consider if cross-jumping unifies two sides of a branch
6054 controlled by a comparison vs the only read from a global.
6055 In which case, allow the set_got to be deleted, though we're
6056 too late to do anything about the ebx save in the prologue. */
6057 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
6060 /* Prevent function calls from be scheduled before the call to mcount.
6061 In the pic_reg_used case, make sure that the got load isn't deleted. */
6062 if (current_function_profile)
6063 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
6066 /* Emit code to restore saved registers using MOV insns. First register
6067 is restored from POINTER + OFFSET. */
6068 static void
6069 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
6070 int maybe_eh_return)
6072 int regno;
6073 rtx base_address = gen_rtx_MEM (Pmode, pointer);
6075 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6076 if (ix86_save_reg (regno, maybe_eh_return))
6078 /* Ensure that adjust_address won't be forced to produce pointer
6079 out of range allowed by x86-64 instruction set. */
6080 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
6082 rtx r11;
6084 r11 = gen_rtx_REG (DImode, R11_REG);
6085 emit_move_insn (r11, GEN_INT (offset));
6086 emit_insn (gen_adddi3 (r11, r11, pointer));
6087 base_address = gen_rtx_MEM (Pmode, r11);
6088 offset = 0;
6090 emit_move_insn (gen_rtx_REG (Pmode, regno),
6091 adjust_address (base_address, Pmode, offset));
6092 offset += UNITS_PER_WORD;
6096 /* Restore function stack, frame, and registers. */
6098 void
6099 ix86_expand_epilogue (int style)
6101 int regno;
6102 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
6103 struct ix86_frame frame;
6104 HOST_WIDE_INT offset;
6106 ix86_compute_frame_layout (&frame);
6108 /* Calculate start of saved registers relative to ebp. Special care
6109 must be taken for the normal return case of a function using
6110 eh_return: the eax and edx registers are marked as saved, but not
6111 restored along this path. */
6112 offset = frame.nregs;
6113 if (current_function_calls_eh_return && style != 2)
6114 offset -= 2;
6115 offset *= -UNITS_PER_WORD;
6117 /* If we're only restoring one register and sp is not valid then
6118 using a move instruction to restore the register since it's
6119 less work than reloading sp and popping the register.
6121 The default code result in stack adjustment using add/lea instruction,
6122 while this code results in LEAVE instruction (or discrete equivalent),
6123 so it is profitable in some other cases as well. Especially when there
6124 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6125 and there is exactly one register to pop. This heuristic may need some
6126 tuning in future. */
6127 if ((!sp_valid && frame.nregs <= 1)
6128 || (TARGET_EPILOGUE_USING_MOVE
6129 && cfun->machine->use_fast_prologue_epilogue
6130 && (frame.nregs > 1 || frame.to_allocate))
6131 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
6132 || (frame_pointer_needed && TARGET_USE_LEAVE
6133 && cfun->machine->use_fast_prologue_epilogue
6134 && frame.nregs == 1)
6135 || current_function_calls_eh_return)
6137 /* Restore registers. We can use ebp or esp to address the memory
6138 locations. If both are available, default to ebp, since offsets
6139 are known to be small. Only exception is esp pointing directly to the
6140 end of block of saved registers, where we may simplify addressing
6141 mode. */
6143 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
6144 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
6145 frame.to_allocate, style == 2);
6146 else
6147 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
6148 offset, style == 2);
6150 /* eh_return epilogues need %ecx added to the stack pointer. */
6151 if (style == 2)
6153 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
6155 if (frame_pointer_needed)
6157 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
6158 tmp = plus_constant (tmp, UNITS_PER_WORD);
6159 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
6161 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
6162 emit_move_insn (hard_frame_pointer_rtx, tmp);
6164 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
6165 const0_rtx, style);
6167 else
6169 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
6170 tmp = plus_constant (tmp, (frame.to_allocate
6171 + frame.nregs * UNITS_PER_WORD));
6172 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
6175 else if (!frame_pointer_needed)
6176 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6177 GEN_INT (frame.to_allocate
6178 + frame.nregs * UNITS_PER_WORD),
6179 style);
6180 /* If not an i386, mov & pop is faster than "leave". */
6181 else if (TARGET_USE_LEAVE || optimize_size
6182 || !cfun->machine->use_fast_prologue_epilogue)
6183 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6184 else
6186 pro_epilogue_adjust_stack (stack_pointer_rtx,
6187 hard_frame_pointer_rtx,
6188 const0_rtx, style);
6189 if (TARGET_64BIT)
6190 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6191 else
6192 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6195 else
6197 /* First step is to deallocate the stack frame so that we can
6198 pop the registers. */
6199 if (!sp_valid)
6201 gcc_assert (frame_pointer_needed);
6202 pro_epilogue_adjust_stack (stack_pointer_rtx,
6203 hard_frame_pointer_rtx,
6204 GEN_INT (offset), style);
6206 else if (frame.to_allocate)
6207 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6208 GEN_INT (frame.to_allocate), style);
6210 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6211 if (ix86_save_reg (regno, false))
6213 if (TARGET_64BIT)
6214 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6215 else
6216 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6218 if (frame_pointer_needed)
6220 /* Leave results in shorter dependency chains on CPUs that are
6221 able to grok it fast. */
6222 if (TARGET_USE_LEAVE)
6223 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6224 else if (TARGET_64BIT)
6225 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6226 else
6227 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6231 if (cfun->machine->force_align_arg_pointer)
6233 emit_insn (gen_addsi3 (stack_pointer_rtx,
6234 cfun->machine->force_align_arg_pointer,
6235 GEN_INT (-4)));
6238 /* Sibcall epilogues don't want a return instruction. */
6239 if (style == 0)
6240 return;
6242 if (current_function_pops_args && current_function_args_size)
6244 rtx popc = GEN_INT (current_function_pops_args);
6246 /* i386 can only pop 64K bytes. If asked to pop more, pop
6247 return address, do explicit add, and jump indirectly to the
6248 caller. */
6250 if (current_function_pops_args >= 65536)
6252 rtx ecx = gen_rtx_REG (SImode, 2);
6254 /* There is no "pascal" calling convention in any 64bit ABI. */
6255 gcc_assert (!TARGET_64BIT);
6257 emit_insn (gen_popsi1 (ecx));
6258 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6259 emit_jump_insn (gen_return_indirect_internal (ecx));
6261 else
6262 emit_jump_insn (gen_return_pop_internal (popc));
6264 else
6265 emit_jump_insn (gen_return_internal ());
6268 /* Reset from the function's potential modifications. */
6270 static void
6271 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6272 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6274 if (pic_offset_table_rtx)
6275 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
6276 #if TARGET_MACHO
6277 /* Mach-O doesn't support labels at the end of objects, so if
6278 it looks like we might want one, insert a NOP. */
6280 rtx insn = get_last_insn ();
6281 while (insn
6282 && NOTE_P (insn)
6283 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
6284 insn = PREV_INSN (insn);
6285 if (insn
6286 && (LABEL_P (insn)
6287 || (NOTE_P (insn)
6288 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
6289 fputs ("\tnop\n", file);
6291 #endif
6295 /* Extract the parts of an RTL expression that is a valid memory address
6296 for an instruction. Return 0 if the structure of the address is
6297 grossly off. Return -1 if the address contains ASHIFT, so it is not
6298 strictly valid, but still used for computing length of lea instruction. */
6301 ix86_decompose_address (rtx addr, struct ix86_address *out)
6303 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6304 rtx base_reg, index_reg;
6305 HOST_WIDE_INT scale = 1;
6306 rtx scale_rtx = NULL_RTX;
6307 int retval = 1;
6308 enum ix86_address_seg seg = SEG_DEFAULT;
6310 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
6311 base = addr;
6312 else if (GET_CODE (addr) == PLUS)
6314 rtx addends[4], op;
6315 int n = 0, i;
6317 op = addr;
6320 if (n >= 4)
6321 return 0;
6322 addends[n++] = XEXP (op, 1);
6323 op = XEXP (op, 0);
6325 while (GET_CODE (op) == PLUS);
6326 if (n >= 4)
6327 return 0;
6328 addends[n] = op;
6330 for (i = n; i >= 0; --i)
6332 op = addends[i];
6333 switch (GET_CODE (op))
6335 case MULT:
6336 if (index)
6337 return 0;
6338 index = XEXP (op, 0);
6339 scale_rtx = XEXP (op, 1);
6340 break;
6342 case UNSPEC:
6343 if (XINT (op, 1) == UNSPEC_TP
6344 && TARGET_TLS_DIRECT_SEG_REFS
6345 && seg == SEG_DEFAULT)
6346 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6347 else
6348 return 0;
6349 break;
6351 case REG:
6352 case SUBREG:
6353 if (!base)
6354 base = op;
6355 else if (!index)
6356 index = op;
6357 else
6358 return 0;
6359 break;
6361 case CONST:
6362 case CONST_INT:
6363 case SYMBOL_REF:
6364 case LABEL_REF:
6365 if (disp)
6366 return 0;
6367 disp = op;
6368 break;
6370 default:
6371 return 0;
6375 else if (GET_CODE (addr) == MULT)
6377 index = XEXP (addr, 0); /* index*scale */
6378 scale_rtx = XEXP (addr, 1);
6380 else if (GET_CODE (addr) == ASHIFT)
6382 rtx tmp;
6384 /* We're called for lea too, which implements ashift on occasion. */
6385 index = XEXP (addr, 0);
6386 tmp = XEXP (addr, 1);
6387 if (!CONST_INT_P (tmp))
6388 return 0;
6389 scale = INTVAL (tmp);
6390 if ((unsigned HOST_WIDE_INT) scale > 3)
6391 return 0;
6392 scale = 1 << scale;
6393 retval = -1;
6395 else
6396 disp = addr; /* displacement */
6398 /* Extract the integral value of scale. */
6399 if (scale_rtx)
6401 if (!CONST_INT_P (scale_rtx))
6402 return 0;
6403 scale = INTVAL (scale_rtx);
6406 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6407 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6409 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6410 if (base_reg && index_reg && scale == 1
6411 && (index_reg == arg_pointer_rtx
6412 || index_reg == frame_pointer_rtx
6413 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6415 rtx tmp;
6416 tmp = base, base = index, index = tmp;
6417 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6420 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6421 if ((base_reg == hard_frame_pointer_rtx
6422 || base_reg == frame_pointer_rtx
6423 || base_reg == arg_pointer_rtx) && !disp)
6424 disp = const0_rtx;
6426 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6427 Avoid this by transforming to [%esi+0]. */
6428 if (ix86_tune == PROCESSOR_K6 && !optimize_size
6429 && base_reg && !index_reg && !disp
6430 && REG_P (base_reg)
6431 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6432 disp = const0_rtx;
6434 /* Special case: encode reg+reg instead of reg*2. */
6435 if (!base && index && scale && scale == 2)
6436 base = index, base_reg = index_reg, scale = 1;
6438 /* Special case: scaling cannot be encoded without base or displacement. */
6439 if (!base && !disp && index && scale != 1)
6440 disp = const0_rtx;
6442 out->base = base;
6443 out->index = index;
6444 out->disp = disp;
6445 out->scale = scale;
6446 out->seg = seg;
6448 return retval;
6451 /* Return cost of the memory address x.
6452 For i386, it is better to use a complex address than let gcc copy
6453 the address into a reg and make a new pseudo. But not if the address
6454 requires to two regs - that would mean more pseudos with longer
6455 lifetimes. */
6456 static int
6457 ix86_address_cost (rtx x)
6459 struct ix86_address parts;
6460 int cost = 1;
6461 int ok = ix86_decompose_address (x, &parts);
6463 gcc_assert (ok);
6465 if (parts.base && GET_CODE (parts.base) == SUBREG)
6466 parts.base = SUBREG_REG (parts.base);
6467 if (parts.index && GET_CODE (parts.index) == SUBREG)
6468 parts.index = SUBREG_REG (parts.index);
6470 /* More complex memory references are better. */
6471 if (parts.disp && parts.disp != const0_rtx)
6472 cost--;
6473 if (parts.seg != SEG_DEFAULT)
6474 cost--;
6476 /* Attempt to minimize number of registers in the address. */
6477 if ((parts.base
6478 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6479 || (parts.index
6480 && (!REG_P (parts.index)
6481 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6482 cost++;
6484 if (parts.base
6485 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6486 && parts.index
6487 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6488 && parts.base != parts.index)
6489 cost++;
6491 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6492 since it's predecode logic can't detect the length of instructions
6493 and it degenerates to vector decoded. Increase cost of such
6494 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6495 to split such addresses or even refuse such addresses at all.
6497 Following addressing modes are affected:
6498 [base+scale*index]
6499 [scale*index+disp]
6500 [base+index]
6502 The first and last case may be avoidable by explicitly coding the zero in
6503 memory address, but I don't have AMD-K6 machine handy to check this
6504 theory. */
6506 if (TARGET_K6
6507 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6508 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6509 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6510 cost += 10;
6512 return cost;
6515 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6516 this is used for to form addresses to local data when -fPIC is in
6517 use. */
6519 static bool
6520 darwin_local_data_pic (rtx disp)
6522 if (GET_CODE (disp) == MINUS)
6524 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6525 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6526 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6528 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6529 if (! strcmp (sym_name, "<pic base>"))
6530 return true;
6534 return false;
6537 /* Determine if a given RTX is a valid constant. We already know this
6538 satisfies CONSTANT_P. */
6540 bool
6541 legitimate_constant_p (rtx x)
6543 switch (GET_CODE (x))
6545 case CONST:
6546 x = XEXP (x, 0);
6548 if (GET_CODE (x) == PLUS)
6550 if (!CONST_INT_P (XEXP (x, 1)))
6551 return false;
6552 x = XEXP (x, 0);
6555 if (TARGET_MACHO && darwin_local_data_pic (x))
6556 return true;
6558 /* Only some unspecs are valid as "constants". */
6559 if (GET_CODE (x) == UNSPEC)
6560 switch (XINT (x, 1))
6562 case UNSPEC_GOT:
6563 case UNSPEC_GOTOFF:
6564 case UNSPEC_PLTOFF:
6565 return TARGET_64BIT;
6566 case UNSPEC_TPOFF:
6567 case UNSPEC_NTPOFF:
6568 x = XVECEXP (x, 0, 0);
6569 return (GET_CODE (x) == SYMBOL_REF
6570 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6571 case UNSPEC_DTPOFF:
6572 x = XVECEXP (x, 0, 0);
6573 return (GET_CODE (x) == SYMBOL_REF
6574 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6575 default:
6576 return false;
6579 /* We must have drilled down to a symbol. */
6580 if (GET_CODE (x) == LABEL_REF)
6581 return true;
6582 if (GET_CODE (x) != SYMBOL_REF)
6583 return false;
6584 /* FALLTHRU */
6586 case SYMBOL_REF:
6587 /* TLS symbols are never valid. */
6588 if (SYMBOL_REF_TLS_MODEL (x))
6589 return false;
6591 /* DLLIMPORT symbols are never valid. */
6592 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
6593 && SYMBOL_REF_DLLIMPORT_P (x))
6594 return false;
6595 break;
6597 case CONST_DOUBLE:
6598 if (GET_MODE (x) == TImode
6599 && x != CONST0_RTX (TImode)
6600 && !TARGET_64BIT)
6601 return false;
6602 break;
6604 case CONST_VECTOR:
6605 if (x == CONST0_RTX (GET_MODE (x)))
6606 return true;
6607 return false;
6609 default:
6610 break;
6613 /* Otherwise we handle everything else in the move patterns. */
6614 return true;
6617 /* Determine if it's legal to put X into the constant pool. This
6618 is not possible for the address of thread-local symbols, which
6619 is checked above. */
6621 static bool
6622 ix86_cannot_force_const_mem (rtx x)
6624 /* We can always put integral constants and vectors in memory. */
6625 switch (GET_CODE (x))
6627 case CONST_INT:
6628 case CONST_DOUBLE:
6629 case CONST_VECTOR:
6630 return false;
6632 default:
6633 break;
6635 return !legitimate_constant_p (x);
6638 /* Determine if a given RTX is a valid constant address. */
6640 bool
6641 constant_address_p (rtx x)
6643 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6646 /* Nonzero if the constant value X is a legitimate general operand
6647 when generating PIC code. It is given that flag_pic is on and
6648 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6650 bool
6651 legitimate_pic_operand_p (rtx x)
6653 rtx inner;
6655 switch (GET_CODE (x))
6657 case CONST:
6658 inner = XEXP (x, 0);
6659 if (GET_CODE (inner) == PLUS
6660 && CONST_INT_P (XEXP (inner, 1)))
6661 inner = XEXP (inner, 0);
6663 /* Only some unspecs are valid as "constants". */
6664 if (GET_CODE (inner) == UNSPEC)
6665 switch (XINT (inner, 1))
6667 case UNSPEC_GOT:
6668 case UNSPEC_GOTOFF:
6669 case UNSPEC_PLTOFF:
6670 return TARGET_64BIT;
6671 case UNSPEC_TPOFF:
6672 x = XVECEXP (inner, 0, 0);
6673 return (GET_CODE (x) == SYMBOL_REF
6674 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6675 default:
6676 return false;
6678 /* FALLTHRU */
6680 case SYMBOL_REF:
6681 case LABEL_REF:
6682 return legitimate_pic_address_disp_p (x);
6684 default:
6685 return true;
6689 /* Determine if a given CONST RTX is a valid memory displacement
6690 in PIC mode. */
6693 legitimate_pic_address_disp_p (rtx disp)
6695 bool saw_plus;
6697 /* In 64bit mode we can allow direct addresses of symbols and labels
6698 when they are not dynamic symbols. */
6699 if (TARGET_64BIT)
6701 rtx op0 = disp, op1;
6703 switch (GET_CODE (disp))
6705 case LABEL_REF:
6706 return true;
6708 case CONST:
6709 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6710 break;
6711 op0 = XEXP (XEXP (disp, 0), 0);
6712 op1 = XEXP (XEXP (disp, 0), 1);
6713 if (!CONST_INT_P (op1)
6714 || INTVAL (op1) >= 16*1024*1024
6715 || INTVAL (op1) < -16*1024*1024)
6716 break;
6717 if (GET_CODE (op0) == LABEL_REF)
6718 return true;
6719 if (GET_CODE (op0) != SYMBOL_REF)
6720 break;
6721 /* FALLTHRU */
6723 case SYMBOL_REF:
6724 /* TLS references should always be enclosed in UNSPEC. */
6725 if (SYMBOL_REF_TLS_MODEL (op0))
6726 return false;
6727 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
6728 && ix86_cmodel != CM_LARGE_PIC)
6729 return true;
6730 break;
6732 default:
6733 break;
6736 if (GET_CODE (disp) != CONST)
6737 return 0;
6738 disp = XEXP (disp, 0);
6740 if (TARGET_64BIT)
6742 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6743 of GOT tables. We should not need these anyway. */
6744 if (GET_CODE (disp) != UNSPEC
6745 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6746 && XINT (disp, 1) != UNSPEC_GOTOFF
6747 && XINT (disp, 1) != UNSPEC_PLTOFF))
6748 return 0;
6750 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6751 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6752 return 0;
6753 return 1;
6756 saw_plus = false;
6757 if (GET_CODE (disp) == PLUS)
6759 if (!CONST_INT_P (XEXP (disp, 1)))
6760 return 0;
6761 disp = XEXP (disp, 0);
6762 saw_plus = true;
6765 if (TARGET_MACHO && darwin_local_data_pic (disp))
6766 return 1;
6768 if (GET_CODE (disp) != UNSPEC)
6769 return 0;
6771 switch (XINT (disp, 1))
6773 case UNSPEC_GOT:
6774 if (saw_plus)
6775 return false;
6776 /* We need to check for both symbols and labels because VxWorks loads
6777 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
6778 details. */
6779 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6780 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
6781 case UNSPEC_GOTOFF:
6782 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6783 While ABI specify also 32bit relocation but we don't produce it in
6784 small PIC model at all. */
6785 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6786 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6787 && !TARGET_64BIT)
6788 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
6789 return false;
6790 case UNSPEC_GOTTPOFF:
6791 case UNSPEC_GOTNTPOFF:
6792 case UNSPEC_INDNTPOFF:
6793 if (saw_plus)
6794 return false;
6795 disp = XVECEXP (disp, 0, 0);
6796 return (GET_CODE (disp) == SYMBOL_REF
6797 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6798 case UNSPEC_NTPOFF:
6799 disp = XVECEXP (disp, 0, 0);
6800 return (GET_CODE (disp) == SYMBOL_REF
6801 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6802 case UNSPEC_DTPOFF:
6803 disp = XVECEXP (disp, 0, 0);
6804 return (GET_CODE (disp) == SYMBOL_REF
6805 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6808 return 0;
6811 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6812 memory address for an instruction. The MODE argument is the machine mode
6813 for the MEM expression that wants to use this address.
6815 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6816 convert common non-canonical forms to canonical form so that they will
6817 be recognized. */
6820 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
6821 rtx addr, int strict)
6823 struct ix86_address parts;
6824 rtx base, index, disp;
6825 HOST_WIDE_INT scale;
6826 const char *reason = NULL;
6827 rtx reason_rtx = NULL_RTX;
6829 if (ix86_decompose_address (addr, &parts) <= 0)
6831 reason = "decomposition failed";
6832 goto report_error;
6835 base = parts.base;
6836 index = parts.index;
6837 disp = parts.disp;
6838 scale = parts.scale;
6840 /* Validate base register.
6842 Don't allow SUBREG's that span more than a word here. It can lead to spill
6843 failures when the base is one word out of a two word structure, which is
6844 represented internally as a DImode int. */
6846 if (base)
6848 rtx reg;
6849 reason_rtx = base;
6851 if (REG_P (base))
6852 reg = base;
6853 else if (GET_CODE (base) == SUBREG
6854 && REG_P (SUBREG_REG (base))
6855 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6856 <= UNITS_PER_WORD)
6857 reg = SUBREG_REG (base);
6858 else
6860 reason = "base is not a register";
6861 goto report_error;
6864 if (GET_MODE (base) != Pmode)
6866 reason = "base is not in Pmode";
6867 goto report_error;
6870 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6871 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6873 reason = "base is not valid";
6874 goto report_error;
6878 /* Validate index register.
6880 Don't allow SUBREG's that span more than a word here -- same as above. */
6882 if (index)
6884 rtx reg;
6885 reason_rtx = index;
6887 if (REG_P (index))
6888 reg = index;
6889 else if (GET_CODE (index) == SUBREG
6890 && REG_P (SUBREG_REG (index))
6891 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6892 <= UNITS_PER_WORD)
6893 reg = SUBREG_REG (index);
6894 else
6896 reason = "index is not a register";
6897 goto report_error;
6900 if (GET_MODE (index) != Pmode)
6902 reason = "index is not in Pmode";
6903 goto report_error;
6906 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6907 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6909 reason = "index is not valid";
6910 goto report_error;
6914 /* Validate scale factor. */
6915 if (scale != 1)
6917 reason_rtx = GEN_INT (scale);
6918 if (!index)
6920 reason = "scale without index";
6921 goto report_error;
6924 if (scale != 2 && scale != 4 && scale != 8)
6926 reason = "scale is not a valid multiplier";
6927 goto report_error;
6931 /* Validate displacement. */
6932 if (disp)
6934 reason_rtx = disp;
6936 if (GET_CODE (disp) == CONST
6937 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6938 switch (XINT (XEXP (disp, 0), 1))
6940 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6941 used. While ABI specify also 32bit relocations, we don't produce
6942 them at all and use IP relative instead. */
6943 case UNSPEC_GOT:
6944 case UNSPEC_GOTOFF:
6945 gcc_assert (flag_pic);
6946 if (!TARGET_64BIT)
6947 goto is_legitimate_pic;
6948 reason = "64bit address unspec";
6949 goto report_error;
6951 case UNSPEC_GOTPCREL:
6952 gcc_assert (flag_pic);
6953 goto is_legitimate_pic;
6955 case UNSPEC_GOTTPOFF:
6956 case UNSPEC_GOTNTPOFF:
6957 case UNSPEC_INDNTPOFF:
6958 case UNSPEC_NTPOFF:
6959 case UNSPEC_DTPOFF:
6960 break;
6962 default:
6963 reason = "invalid address unspec";
6964 goto report_error;
6967 else if (SYMBOLIC_CONST (disp)
6968 && (flag_pic
6969 || (TARGET_MACHO
6970 #if TARGET_MACHO
6971 && MACHOPIC_INDIRECT
6972 && !machopic_operand_p (disp)
6973 #endif
6977 is_legitimate_pic:
6978 if (TARGET_64BIT && (index || base))
6980 /* foo@dtpoff(%rX) is ok. */
6981 if (GET_CODE (disp) != CONST
6982 || GET_CODE (XEXP (disp, 0)) != PLUS
6983 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6984 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
6985 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6986 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6988 reason = "non-constant pic memory reference";
6989 goto report_error;
6992 else if (! legitimate_pic_address_disp_p (disp))
6994 reason = "displacement is an invalid pic construct";
6995 goto report_error;
6998 /* This code used to verify that a symbolic pic displacement
6999 includes the pic_offset_table_rtx register.
7001 While this is good idea, unfortunately these constructs may
7002 be created by "adds using lea" optimization for incorrect
7003 code like:
7005 int a;
7006 int foo(int i)
7008 return *(&a+i);
7011 This code is nonsensical, but results in addressing
7012 GOT table with pic_offset_table_rtx base. We can't
7013 just refuse it easily, since it gets matched by
7014 "addsi3" pattern, that later gets split to lea in the
7015 case output register differs from input. While this
7016 can be handled by separate addsi pattern for this case
7017 that never results in lea, this seems to be easier and
7018 correct fix for crash to disable this test. */
7020 else if (GET_CODE (disp) != LABEL_REF
7021 && !CONST_INT_P (disp)
7022 && (GET_CODE (disp) != CONST
7023 || !legitimate_constant_p (disp))
7024 && (GET_CODE (disp) != SYMBOL_REF
7025 || !legitimate_constant_p (disp)))
7027 reason = "displacement is not constant";
7028 goto report_error;
7030 else if (TARGET_64BIT
7031 && !x86_64_immediate_operand (disp, VOIDmode))
7033 reason = "displacement is out of range";
7034 goto report_error;
7038 /* Everything looks valid. */
7039 return TRUE;
7041 report_error:
7042 return FALSE;
7045 /* Return a unique alias set for the GOT. */
7047 static HOST_WIDE_INT
7048 ix86_GOT_alias_set (void)
7050 static HOST_WIDE_INT set = -1;
7051 if (set == -1)
7052 set = new_alias_set ();
7053 return set;
7056 /* Return a legitimate reference for ORIG (an address) using the
7057 register REG. If REG is 0, a new pseudo is generated.
7059 There are two types of references that must be handled:
7061 1. Global data references must load the address from the GOT, via
7062 the PIC reg. An insn is emitted to do this load, and the reg is
7063 returned.
7065 2. Static data references, constant pool addresses, and code labels
7066 compute the address as an offset from the GOT, whose base is in
7067 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
7068 differentiate them from global data objects. The returned
7069 address is the PIC reg + an unspec constant.
7071 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7072 reg also appears in the address. */
7074 static rtx
7075 legitimize_pic_address (rtx orig, rtx reg)
7077 rtx addr = orig;
7078 rtx new = orig;
7079 rtx base;
7081 #if TARGET_MACHO
7082 if (TARGET_MACHO && !TARGET_64BIT)
7084 if (reg == 0)
7085 reg = gen_reg_rtx (Pmode);
7086 /* Use the generic Mach-O PIC machinery. */
7087 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
7089 #endif
7091 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
7092 new = addr;
7093 else if (TARGET_64BIT
7094 && ix86_cmodel != CM_SMALL_PIC
7095 && gotoff_operand (addr, Pmode))
7097 rtx tmpreg;
7098 /* This symbol may be referenced via a displacement from the PIC
7099 base address (@GOTOFF). */
7101 if (reload_in_progress)
7102 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7103 if (GET_CODE (addr) == CONST)
7104 addr = XEXP (addr, 0);
7105 if (GET_CODE (addr) == PLUS)
7107 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7108 UNSPEC_GOTOFF);
7109 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
7111 else
7112 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7113 new = gen_rtx_CONST (Pmode, new);
7114 if (!reg)
7115 tmpreg = gen_reg_rtx (Pmode);
7116 else
7117 tmpreg = reg;
7118 emit_move_insn (tmpreg, new);
7120 if (reg != 0)
7122 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
7123 tmpreg, 1, OPTAB_DIRECT);
7124 new = reg;
7126 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
7128 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
7130 /* This symbol may be referenced via a displacement from the PIC
7131 base address (@GOTOFF). */
7133 if (reload_in_progress)
7134 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7135 if (GET_CODE (addr) == CONST)
7136 addr = XEXP (addr, 0);
7137 if (GET_CODE (addr) == PLUS)
7139 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7140 UNSPEC_GOTOFF);
7141 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
7143 else
7144 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7145 new = gen_rtx_CONST (Pmode, new);
7146 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7148 if (reg != 0)
7150 emit_move_insn (reg, new);
7151 new = reg;
7154 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7155 /* We can't use @GOTOFF for text labels on VxWorks;
7156 see gotoff_operand. */
7157 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
7159 /* Given that we've already handled dllimport variables separately
7160 in legitimize_address, and all other variables should satisfy
7161 legitimate_pic_address_disp_p, we should never arrive here. */
7162 gcc_assert (!TARGET_64BIT_MS_ABI);
7164 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
7166 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7167 new = gen_rtx_CONST (Pmode, new);
7168 new = gen_const_mem (Pmode, new);
7169 set_mem_alias_set (new, ix86_GOT_alias_set ());
7171 if (reg == 0)
7172 reg = gen_reg_rtx (Pmode);
7173 /* Use directly gen_movsi, otherwise the address is loaded
7174 into register for CSE. We don't want to CSE this addresses,
7175 instead we CSE addresses from the GOT table, so skip this. */
7176 emit_insn (gen_movsi (reg, new));
7177 new = reg;
7179 else
7181 /* This symbol must be referenced via a load from the
7182 Global Offset Table (@GOT). */
7184 if (reload_in_progress)
7185 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7186 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7187 new = gen_rtx_CONST (Pmode, new);
7188 if (TARGET_64BIT)
7189 new = force_reg (Pmode, new);
7190 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7191 new = gen_const_mem (Pmode, new);
7192 set_mem_alias_set (new, ix86_GOT_alias_set ());
7194 if (reg == 0)
7195 reg = gen_reg_rtx (Pmode);
7196 emit_move_insn (reg, new);
7197 new = reg;
7200 else
7202 if (CONST_INT_P (addr)
7203 && !x86_64_immediate_operand (addr, VOIDmode))
7205 if (reg)
7207 emit_move_insn (reg, addr);
7208 new = reg;
7210 else
7211 new = force_reg (Pmode, addr);
7213 else if (GET_CODE (addr) == CONST)
7215 addr = XEXP (addr, 0);
7217 /* We must match stuff we generate before. Assume the only
7218 unspecs that can get here are ours. Not that we could do
7219 anything with them anyway.... */
7220 if (GET_CODE (addr) == UNSPEC
7221 || (GET_CODE (addr) == PLUS
7222 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7223 return orig;
7224 gcc_assert (GET_CODE (addr) == PLUS);
7226 if (GET_CODE (addr) == PLUS)
7228 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7230 /* Check first to see if this is a constant offset from a @GOTOFF
7231 symbol reference. */
7232 if (gotoff_operand (op0, Pmode)
7233 && CONST_INT_P (op1))
7235 if (!TARGET_64BIT)
7237 if (reload_in_progress)
7238 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7239 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7240 UNSPEC_GOTOFF);
7241 new = gen_rtx_PLUS (Pmode, new, op1);
7242 new = gen_rtx_CONST (Pmode, new);
7243 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7245 if (reg != 0)
7247 emit_move_insn (reg, new);
7248 new = reg;
7251 else
7253 if (INTVAL (op1) < -16*1024*1024
7254 || INTVAL (op1) >= 16*1024*1024)
7256 if (!x86_64_immediate_operand (op1, Pmode))
7257 op1 = force_reg (Pmode, op1);
7258 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7262 else
7264 base = legitimize_pic_address (XEXP (addr, 0), reg);
7265 new = legitimize_pic_address (XEXP (addr, 1),
7266 base == reg ? NULL_RTX : reg);
7268 if (CONST_INT_P (new))
7269 new = plus_constant (base, INTVAL (new));
7270 else
7272 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
7274 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
7275 new = XEXP (new, 1);
7277 new = gen_rtx_PLUS (Pmode, base, new);
7282 return new;
7285 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7287 static rtx
7288 get_thread_pointer (int to_reg)
7290 rtx tp, reg, insn;
7292 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7293 if (!to_reg)
7294 return tp;
7296 reg = gen_reg_rtx (Pmode);
7297 insn = gen_rtx_SET (VOIDmode, reg, tp);
7298 insn = emit_insn (insn);
7300 return reg;
7303 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7304 false if we expect this to be used for a memory address and true if
7305 we expect to load the address into a register. */
7307 static rtx
7308 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7310 rtx dest, base, off, pic, tp;
7311 int type;
7313 switch (model)
7315 case TLS_MODEL_GLOBAL_DYNAMIC:
7316 dest = gen_reg_rtx (Pmode);
7317 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7319 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7321 rtx rax = gen_rtx_REG (Pmode, 0), insns;
7323 start_sequence ();
7324 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7325 insns = get_insns ();
7326 end_sequence ();
7328 CONST_OR_PURE_CALL_P (insns) = 1;
7329 emit_libcall_block (insns, dest, rax, x);
7331 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7332 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7333 else
7334 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7336 if (TARGET_GNU2_TLS)
7338 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7340 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7342 break;
7344 case TLS_MODEL_LOCAL_DYNAMIC:
7345 base = gen_reg_rtx (Pmode);
7346 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7348 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7350 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
7352 start_sequence ();
7353 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7354 insns = get_insns ();
7355 end_sequence ();
7357 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7358 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7359 CONST_OR_PURE_CALL_P (insns) = 1;
7360 emit_libcall_block (insns, base, rax, note);
7362 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7363 emit_insn (gen_tls_local_dynamic_base_64 (base));
7364 else
7365 emit_insn (gen_tls_local_dynamic_base_32 (base));
7367 if (TARGET_GNU2_TLS)
7369 rtx x = ix86_tls_module_base ();
7371 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7372 gen_rtx_MINUS (Pmode, x, tp));
7375 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7376 off = gen_rtx_CONST (Pmode, off);
7378 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7380 if (TARGET_GNU2_TLS)
7382 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7384 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7387 break;
7389 case TLS_MODEL_INITIAL_EXEC:
7390 if (TARGET_64BIT)
7392 pic = NULL;
7393 type = UNSPEC_GOTNTPOFF;
7395 else if (flag_pic)
7397 if (reload_in_progress)
7398 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7399 pic = pic_offset_table_rtx;
7400 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7402 else if (!TARGET_ANY_GNU_TLS)
7404 pic = gen_reg_rtx (Pmode);
7405 emit_insn (gen_set_got (pic));
7406 type = UNSPEC_GOTTPOFF;
7408 else
7410 pic = NULL;
7411 type = UNSPEC_INDNTPOFF;
7414 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7415 off = gen_rtx_CONST (Pmode, off);
7416 if (pic)
7417 off = gen_rtx_PLUS (Pmode, pic, off);
7418 off = gen_const_mem (Pmode, off);
7419 set_mem_alias_set (off, ix86_GOT_alias_set ());
7421 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7423 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7424 off = force_reg (Pmode, off);
7425 return gen_rtx_PLUS (Pmode, base, off);
7427 else
7429 base = get_thread_pointer (true);
7430 dest = gen_reg_rtx (Pmode);
7431 emit_insn (gen_subsi3 (dest, base, off));
7433 break;
7435 case TLS_MODEL_LOCAL_EXEC:
7436 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7437 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7438 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7439 off = gen_rtx_CONST (Pmode, off);
7441 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7443 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7444 return gen_rtx_PLUS (Pmode, base, off);
7446 else
7448 base = get_thread_pointer (true);
7449 dest = gen_reg_rtx (Pmode);
7450 emit_insn (gen_subsi3 (dest, base, off));
7452 break;
7454 default:
7455 gcc_unreachable ();
7458 return dest;
7461 /* Create or return the unique __imp_DECL dllimport symbol corresponding
7462 to symbol DECL. */
7464 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
7465 htab_t dllimport_map;
7467 static tree
7468 get_dllimport_decl (tree decl)
7470 struct tree_map *h, in;
7471 void **loc;
7472 const char *name;
7473 const char *prefix;
7474 size_t namelen, prefixlen;
7475 char *imp_name;
7476 tree to;
7477 rtx rtl;
7479 if (!dllimport_map)
7480 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
7482 in.hash = htab_hash_pointer (decl);
7483 in.base.from = decl;
7484 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
7485 h = *loc;
7486 if (h)
7487 return h->to;
7489 *loc = h = ggc_alloc (sizeof (struct tree_map));
7490 h->hash = in.hash;
7491 h->base.from = decl;
7492 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
7493 DECL_ARTIFICIAL (to) = 1;
7494 DECL_IGNORED_P (to) = 1;
7495 DECL_EXTERNAL (to) = 1;
7496 TREE_READONLY (to) = 1;
7498 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
7499 name = targetm.strip_name_encoding (name);
7500 if (name[0] == FASTCALL_PREFIX)
7502 name++;
7503 prefix = "*__imp_";
7505 else
7506 prefix = "*__imp__";
7508 namelen = strlen (name);
7509 prefixlen = strlen (prefix);
7510 imp_name = alloca (namelen + prefixlen + 1);
7511 memcpy (imp_name, prefix, prefixlen);
7512 memcpy (imp_name + prefixlen, name, namelen + 1);
7514 name = ggc_alloc_string (imp_name, namelen + prefixlen);
7515 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
7516 SET_SYMBOL_REF_DECL (rtl, to);
7517 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
7519 rtl = gen_const_mem (Pmode, rtl);
7520 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
7522 SET_DECL_RTL (to, rtl);
7524 return to;
7527 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
7528 true if we require the result be a register. */
7530 static rtx
7531 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
7533 tree imp_decl;
7534 rtx x;
7536 gcc_assert (SYMBOL_REF_DECL (symbol));
7537 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
7539 x = DECL_RTL (imp_decl);
7540 if (want_reg)
7541 x = force_reg (Pmode, x);
7542 return x;
7545 /* Try machine-dependent ways of modifying an illegitimate address
7546 to be legitimate. If we find one, return the new, valid address.
7547 This macro is used in only one place: `memory_address' in explow.c.
7549 OLDX is the address as it was before break_out_memory_refs was called.
7550 In some cases it is useful to look at this to decide what needs to be done.
7552 MODE and WIN are passed so that this macro can use
7553 GO_IF_LEGITIMATE_ADDRESS.
7555 It is always safe for this macro to do nothing. It exists to recognize
7556 opportunities to optimize the output.
7558 For the 80386, we handle X+REG by loading X into a register R and
7559 using R+REG. R will go in a general reg and indexing will be used.
7560 However, if REG is a broken-out memory address or multiplication,
7561 nothing needs to be done because REG can certainly go in a general reg.
7563 When -fpic is used, special handling is needed for symbolic references.
7564 See comments by legitimize_pic_address in i386.c for details. */
7567 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7569 int changed = 0;
7570 unsigned log;
7572 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7573 if (log)
7574 return legitimize_tls_address (x, log, false);
7575 if (GET_CODE (x) == CONST
7576 && GET_CODE (XEXP (x, 0)) == PLUS
7577 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7578 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7580 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
7581 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7584 if (flag_pic && SYMBOLIC_CONST (x))
7585 return legitimize_pic_address (x, 0);
7587 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
7589 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
7590 return legitimize_dllimport_symbol (x, true);
7591 if (GET_CODE (x) == CONST
7592 && GET_CODE (XEXP (x, 0)) == PLUS
7593 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7594 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
7596 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
7597 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7601 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7602 if (GET_CODE (x) == ASHIFT
7603 && CONST_INT_P (XEXP (x, 1))
7604 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7606 changed = 1;
7607 log = INTVAL (XEXP (x, 1));
7608 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7609 GEN_INT (1 << log));
7612 if (GET_CODE (x) == PLUS)
7614 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7616 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7617 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
7618 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7620 changed = 1;
7621 log = INTVAL (XEXP (XEXP (x, 0), 1));
7622 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7623 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7624 GEN_INT (1 << log));
7627 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7628 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
7629 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7631 changed = 1;
7632 log = INTVAL (XEXP (XEXP (x, 1), 1));
7633 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7634 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7635 GEN_INT (1 << log));
7638 /* Put multiply first if it isn't already. */
7639 if (GET_CODE (XEXP (x, 1)) == MULT)
7641 rtx tmp = XEXP (x, 0);
7642 XEXP (x, 0) = XEXP (x, 1);
7643 XEXP (x, 1) = tmp;
7644 changed = 1;
7647 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7648 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7649 created by virtual register instantiation, register elimination, and
7650 similar optimizations. */
7651 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7653 changed = 1;
7654 x = gen_rtx_PLUS (Pmode,
7655 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7656 XEXP (XEXP (x, 1), 0)),
7657 XEXP (XEXP (x, 1), 1));
7660 /* Canonicalize
7661 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7662 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7663 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7664 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7665 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7666 && CONSTANT_P (XEXP (x, 1)))
7668 rtx constant;
7669 rtx other = NULL_RTX;
7671 if (CONST_INT_P (XEXP (x, 1)))
7673 constant = XEXP (x, 1);
7674 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7676 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
7678 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7679 other = XEXP (x, 1);
7681 else
7682 constant = 0;
7684 if (constant)
7686 changed = 1;
7687 x = gen_rtx_PLUS (Pmode,
7688 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7689 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7690 plus_constant (other, INTVAL (constant)));
7694 if (changed && legitimate_address_p (mode, x, FALSE))
7695 return x;
7697 if (GET_CODE (XEXP (x, 0)) == MULT)
7699 changed = 1;
7700 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7703 if (GET_CODE (XEXP (x, 1)) == MULT)
7705 changed = 1;
7706 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7709 if (changed
7710 && REG_P (XEXP (x, 1))
7711 && REG_P (XEXP (x, 0)))
7712 return x;
7714 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7716 changed = 1;
7717 x = legitimize_pic_address (x, 0);
7720 if (changed && legitimate_address_p (mode, x, FALSE))
7721 return x;
7723 if (REG_P (XEXP (x, 0)))
7725 rtx temp = gen_reg_rtx (Pmode);
7726 rtx val = force_operand (XEXP (x, 1), temp);
7727 if (val != temp)
7728 emit_move_insn (temp, val);
7730 XEXP (x, 1) = temp;
7731 return x;
7734 else if (REG_P (XEXP (x, 1)))
7736 rtx temp = gen_reg_rtx (Pmode);
7737 rtx val = force_operand (XEXP (x, 0), temp);
7738 if (val != temp)
7739 emit_move_insn (temp, val);
7741 XEXP (x, 0) = temp;
7742 return x;
7746 return x;
7749 /* Print an integer constant expression in assembler syntax. Addition
7750 and subtraction are the only arithmetic that may appear in these
7751 expressions. FILE is the stdio stream to write to, X is the rtx, and
7752 CODE is the operand print code from the output string. */
7754 static void
7755 output_pic_addr_const (FILE *file, rtx x, int code)
7757 char buf[256];
7759 switch (GET_CODE (x))
7761 case PC:
7762 gcc_assert (flag_pic);
7763 putc ('.', file);
7764 break;
7766 case SYMBOL_REF:
7767 if (! TARGET_MACHO || TARGET_64BIT)
7768 output_addr_const (file, x);
7769 else
7771 const char *name = XSTR (x, 0);
7773 /* Mark the decl as referenced so that cgraph will
7774 output the function. */
7775 if (SYMBOL_REF_DECL (x))
7776 mark_decl_referenced (SYMBOL_REF_DECL (x));
7778 #if TARGET_MACHO
7779 if (MACHOPIC_INDIRECT
7780 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
7781 name = machopic_indirection_name (x, /*stub_p=*/true);
7782 #endif
7783 assemble_name (file, name);
7785 if (!TARGET_MACHO && !TARGET_64BIT_MS_ABI
7786 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7787 fputs ("@PLT", file);
7788 break;
7790 case LABEL_REF:
7791 x = XEXP (x, 0);
7792 /* FALLTHRU */
7793 case CODE_LABEL:
7794 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7795 assemble_name (asm_out_file, buf);
7796 break;
7798 case CONST_INT:
7799 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7800 break;
7802 case CONST:
7803 /* This used to output parentheses around the expression,
7804 but that does not work on the 386 (either ATT or BSD assembler). */
7805 output_pic_addr_const (file, XEXP (x, 0), code);
7806 break;
7808 case CONST_DOUBLE:
7809 if (GET_MODE (x) == VOIDmode)
7811 /* We can use %d if the number is <32 bits and positive. */
7812 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7813 fprintf (file, "0x%lx%08lx",
7814 (unsigned long) CONST_DOUBLE_HIGH (x),
7815 (unsigned long) CONST_DOUBLE_LOW (x));
7816 else
7817 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7819 else
7820 /* We can't handle floating point constants;
7821 PRINT_OPERAND must handle them. */
7822 output_operand_lossage ("floating constant misused");
7823 break;
7825 case PLUS:
7826 /* Some assemblers need integer constants to appear first. */
7827 if (CONST_INT_P (XEXP (x, 0)))
7829 output_pic_addr_const (file, XEXP (x, 0), code);
7830 putc ('+', file);
7831 output_pic_addr_const (file, XEXP (x, 1), code);
7833 else
7835 gcc_assert (CONST_INT_P (XEXP (x, 1)));
7836 output_pic_addr_const (file, XEXP (x, 1), code);
7837 putc ('+', file);
7838 output_pic_addr_const (file, XEXP (x, 0), code);
7840 break;
7842 case MINUS:
7843 if (!TARGET_MACHO)
7844 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7845 output_pic_addr_const (file, XEXP (x, 0), code);
7846 putc ('-', file);
7847 output_pic_addr_const (file, XEXP (x, 1), code);
7848 if (!TARGET_MACHO)
7849 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7850 break;
7852 case UNSPEC:
7853 gcc_assert (XVECLEN (x, 0) == 1);
7854 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7855 switch (XINT (x, 1))
7857 case UNSPEC_GOT:
7858 fputs ("@GOT", file);
7859 break;
7860 case UNSPEC_GOTOFF:
7861 fputs ("@GOTOFF", file);
7862 break;
7863 case UNSPEC_PLTOFF:
7864 fputs ("@PLTOFF", file);
7865 break;
7866 case UNSPEC_GOTPCREL:
7867 fputs ("@GOTPCREL(%rip)", file);
7868 break;
7869 case UNSPEC_GOTTPOFF:
7870 /* FIXME: This might be @TPOFF in Sun ld too. */
7871 fputs ("@GOTTPOFF", file);
7872 break;
7873 case UNSPEC_TPOFF:
7874 fputs ("@TPOFF", file);
7875 break;
7876 case UNSPEC_NTPOFF:
7877 if (TARGET_64BIT)
7878 fputs ("@TPOFF", file);
7879 else
7880 fputs ("@NTPOFF", file);
7881 break;
7882 case UNSPEC_DTPOFF:
7883 fputs ("@DTPOFF", file);
7884 break;
7885 case UNSPEC_GOTNTPOFF:
7886 if (TARGET_64BIT)
7887 fputs ("@GOTTPOFF(%rip)", file);
7888 else
7889 fputs ("@GOTNTPOFF", file);
7890 break;
7891 case UNSPEC_INDNTPOFF:
7892 fputs ("@INDNTPOFF", file);
7893 break;
7894 default:
7895 output_operand_lossage ("invalid UNSPEC as operand");
7896 break;
7898 break;
7900 default:
7901 output_operand_lossage ("invalid expression as operand");
7905 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7906 We need to emit DTP-relative relocations. */
7908 static void ATTRIBUTE_UNUSED
7909 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7911 fputs (ASM_LONG, file);
7912 output_addr_const (file, x);
7913 fputs ("@DTPOFF", file);
7914 switch (size)
7916 case 4:
7917 break;
7918 case 8:
7919 fputs (", 0", file);
7920 break;
7921 default:
7922 gcc_unreachable ();
7926 /* In the name of slightly smaller debug output, and to cater to
7927 general assembler lossage, recognize PIC+GOTOFF and turn it back
7928 into a direct symbol reference.
7930 On Darwin, this is necessary to avoid a crash, because Darwin
7931 has a different PIC label for each routine but the DWARF debugging
7932 information is not associated with any particular routine, so it's
7933 necessary to remove references to the PIC label from RTL stored by
7934 the DWARF output code. */
7936 static rtx
7937 ix86_delegitimize_address (rtx orig_x)
7939 rtx x = orig_x;
7940 /* reg_addend is NULL or a multiple of some register. */
7941 rtx reg_addend = NULL_RTX;
7942 /* const_addend is NULL or a const_int. */
7943 rtx const_addend = NULL_RTX;
7944 /* This is the result, or NULL. */
7945 rtx result = NULL_RTX;
7947 if (MEM_P (x))
7948 x = XEXP (x, 0);
7950 if (TARGET_64BIT)
7952 if (GET_CODE (x) != CONST
7953 || GET_CODE (XEXP (x, 0)) != UNSPEC
7954 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7955 || !MEM_P (orig_x))
7956 return orig_x;
7957 return XVECEXP (XEXP (x, 0), 0, 0);
7960 if (GET_CODE (x) != PLUS
7961 || GET_CODE (XEXP (x, 1)) != CONST)
7962 return orig_x;
7964 if (REG_P (XEXP (x, 0))
7965 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7966 /* %ebx + GOT/GOTOFF */
7968 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7970 /* %ebx + %reg * scale + GOT/GOTOFF */
7971 reg_addend = XEXP (x, 0);
7972 if (REG_P (XEXP (reg_addend, 0))
7973 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7974 reg_addend = XEXP (reg_addend, 1);
7975 else if (REG_P (XEXP (reg_addend, 1))
7976 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7977 reg_addend = XEXP (reg_addend, 0);
7978 else
7979 return orig_x;
7980 if (!REG_P (reg_addend)
7981 && GET_CODE (reg_addend) != MULT
7982 && GET_CODE (reg_addend) != ASHIFT)
7983 return orig_x;
7985 else
7986 return orig_x;
7988 x = XEXP (XEXP (x, 1), 0);
7989 if (GET_CODE (x) == PLUS
7990 && CONST_INT_P (XEXP (x, 1)))
7992 const_addend = XEXP (x, 1);
7993 x = XEXP (x, 0);
7996 if (GET_CODE (x) == UNSPEC
7997 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
7998 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
7999 result = XVECEXP (x, 0, 0);
8001 if (TARGET_MACHO && darwin_local_data_pic (x)
8002 && !MEM_P (orig_x))
8003 result = XEXP (x, 0);
8005 if (! result)
8006 return orig_x;
8008 if (const_addend)
8009 result = gen_rtx_PLUS (Pmode, result, const_addend);
8010 if (reg_addend)
8011 result = gen_rtx_PLUS (Pmode, reg_addend, result);
8012 return result;
8015 /* If X is a machine specific address (i.e. a symbol or label being
8016 referenced as a displacement from the GOT implemented using an
8017 UNSPEC), then return the base term. Otherwise return X. */
8020 ix86_find_base_term (rtx x)
8022 rtx term;
8024 if (TARGET_64BIT)
8026 if (GET_CODE (x) != CONST)
8027 return x;
8028 term = XEXP (x, 0);
8029 if (GET_CODE (term) == PLUS
8030 && (CONST_INT_P (XEXP (term, 1))
8031 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
8032 term = XEXP (term, 0);
8033 if (GET_CODE (term) != UNSPEC
8034 || XINT (term, 1) != UNSPEC_GOTPCREL)
8035 return x;
8037 term = XVECEXP (term, 0, 0);
8039 if (GET_CODE (term) != SYMBOL_REF
8040 && GET_CODE (term) != LABEL_REF)
8041 return x;
8043 return term;
8046 term = ix86_delegitimize_address (x);
8048 if (GET_CODE (term) != SYMBOL_REF
8049 && GET_CODE (term) != LABEL_REF)
8050 return x;
8052 return term;
8055 static void
8056 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
8057 int fp, FILE *file)
8059 const char *suffix;
8061 if (mode == CCFPmode || mode == CCFPUmode)
8063 enum rtx_code second_code, bypass_code;
8064 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
8065 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
8066 code = ix86_fp_compare_code_to_integer (code);
8067 mode = CCmode;
8069 if (reverse)
8070 code = reverse_condition (code);
8072 switch (code)
8074 case EQ:
8075 suffix = "e";
8076 break;
8077 case NE:
8078 suffix = "ne";
8079 break;
8080 case GT:
8081 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
8082 suffix = "g";
8083 break;
8084 case GTU:
8085 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
8086 Those same assemblers have the same but opposite lossage on cmov. */
8087 gcc_assert (mode == CCmode);
8088 suffix = fp ? "nbe" : "a";
8089 break;
8090 case LT:
8091 switch (mode)
8093 case CCNOmode:
8094 case CCGOCmode:
8095 suffix = "s";
8096 break;
8098 case CCmode:
8099 case CCGCmode:
8100 suffix = "l";
8101 break;
8103 default:
8104 gcc_unreachable ();
8106 break;
8107 case LTU:
8108 gcc_assert (mode == CCmode);
8109 suffix = "b";
8110 break;
8111 case GE:
8112 switch (mode)
8114 case CCNOmode:
8115 case CCGOCmode:
8116 suffix = "ns";
8117 break;
8119 case CCmode:
8120 case CCGCmode:
8121 suffix = "ge";
8122 break;
8124 default:
8125 gcc_unreachable ();
8127 break;
8128 case GEU:
8129 /* ??? As above. */
8130 gcc_assert (mode == CCmode);
8131 suffix = fp ? "nb" : "ae";
8132 break;
8133 case LE:
8134 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
8135 suffix = "le";
8136 break;
8137 case LEU:
8138 gcc_assert (mode == CCmode);
8139 suffix = "be";
8140 break;
8141 case UNORDERED:
8142 suffix = fp ? "u" : "p";
8143 break;
8144 case ORDERED:
8145 suffix = fp ? "nu" : "np";
8146 break;
8147 default:
8148 gcc_unreachable ();
8150 fputs (suffix, file);
8153 /* Print the name of register X to FILE based on its machine mode and number.
8154 If CODE is 'w', pretend the mode is HImode.
8155 If CODE is 'b', pretend the mode is QImode.
8156 If CODE is 'k', pretend the mode is SImode.
8157 If CODE is 'q', pretend the mode is DImode.
8158 If CODE is 'h', pretend the reg is the 'high' byte register.
8159 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8161 void
8162 print_reg (rtx x, int code, FILE *file)
8164 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
8165 && REGNO (x) != FRAME_POINTER_REGNUM
8166 && REGNO (x) != FLAGS_REG
8167 && REGNO (x) != FPSR_REG
8168 && REGNO (x) != FPCR_REG);
8170 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
8171 putc ('%', file);
8173 if (code == 'w' || MMX_REG_P (x))
8174 code = 2;
8175 else if (code == 'b')
8176 code = 1;
8177 else if (code == 'k')
8178 code = 4;
8179 else if (code == 'q')
8180 code = 8;
8181 else if (code == 'y')
8182 code = 3;
8183 else if (code == 'h')
8184 code = 0;
8185 else
8186 code = GET_MODE_SIZE (GET_MODE (x));
8188 /* Irritatingly, AMD extended registers use different naming convention
8189 from the normal registers. */
8190 if (REX_INT_REG_P (x))
8192 gcc_assert (TARGET_64BIT);
8193 switch (code)
8195 case 0:
8196 error ("extended registers have no high halves");
8197 break;
8198 case 1:
8199 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
8200 break;
8201 case 2:
8202 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
8203 break;
8204 case 4:
8205 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
8206 break;
8207 case 8:
8208 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
8209 break;
8210 default:
8211 error ("unsupported operand size for extended register");
8212 break;
8214 return;
8216 switch (code)
8218 case 3:
8219 if (STACK_TOP_P (x))
8221 fputs ("st(0)", file);
8222 break;
8224 /* FALLTHRU */
8225 case 8:
8226 case 4:
8227 case 12:
8228 if (! ANY_FP_REG_P (x))
8229 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
8230 /* FALLTHRU */
8231 case 16:
8232 case 2:
8233 normal:
8234 fputs (hi_reg_name[REGNO (x)], file);
8235 break;
8236 case 1:
8237 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
8238 goto normal;
8239 fputs (qi_reg_name[REGNO (x)], file);
8240 break;
8241 case 0:
8242 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
8243 goto normal;
8244 fputs (qi_high_reg_name[REGNO (x)], file);
8245 break;
8246 default:
8247 gcc_unreachable ();
8251 /* Locate some local-dynamic symbol still in use by this function
8252 so that we can print its name in some tls_local_dynamic_base
8253 pattern. */
8255 static int
8256 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8258 rtx x = *px;
8260 if (GET_CODE (x) == SYMBOL_REF
8261 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
8263 cfun->machine->some_ld_name = XSTR (x, 0);
8264 return 1;
8267 return 0;
8270 static const char *
8271 get_some_local_dynamic_name (void)
8273 rtx insn;
8275 if (cfun->machine->some_ld_name)
8276 return cfun->machine->some_ld_name;
8278 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
8279 if (INSN_P (insn)
8280 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
8281 return cfun->machine->some_ld_name;
8283 gcc_unreachable ();
8286 /* Meaning of CODE:
8287 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8288 C -- print opcode suffix for set/cmov insn.
8289 c -- like C, but print reversed condition
8290 F,f -- likewise, but for floating-point.
8291 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8292 otherwise nothing
8293 R -- print the prefix for register names.
8294 z -- print the opcode suffix for the size of the current operand.
8295 * -- print a star (in certain assembler syntax)
8296 A -- print an absolute memory reference.
8297 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8298 s -- print a shift double count, followed by the assemblers argument
8299 delimiter.
8300 b -- print the QImode name of the register for the indicated operand.
8301 %b0 would print %al if operands[0] is reg 0.
8302 w -- likewise, print the HImode name of the register.
8303 k -- likewise, print the SImode name of the register.
8304 q -- likewise, print the DImode name of the register.
8305 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8306 y -- print "st(0)" instead of "st" as a register.
8307 D -- print condition for SSE cmp instruction.
8308 P -- if PIC, print an @PLT suffix.
8309 X -- don't print any sort of PIC '@' suffix for a symbol.
8310 & -- print some in-use local-dynamic symbol name.
8311 H -- print a memory address offset by 8; used for sse high-parts
8314 void
8315 print_operand (FILE *file, rtx x, int code)
8317 if (code)
8319 switch (code)
8321 case '*':
8322 if (ASSEMBLER_DIALECT == ASM_ATT)
8323 putc ('*', file);
8324 return;
8326 case '&':
8327 assemble_name (file, get_some_local_dynamic_name ());
8328 return;
8330 case 'A':
8331 switch (ASSEMBLER_DIALECT)
8333 case ASM_ATT:
8334 putc ('*', file);
8335 break;
8337 case ASM_INTEL:
8338 /* Intel syntax. For absolute addresses, registers should not
8339 be surrounded by braces. */
8340 if (!REG_P (x))
8342 putc ('[', file);
8343 PRINT_OPERAND (file, x, 0);
8344 putc (']', file);
8345 return;
8347 break;
8349 default:
8350 gcc_unreachable ();
8353 PRINT_OPERAND (file, x, 0);
8354 return;
8357 case 'L':
8358 if (ASSEMBLER_DIALECT == ASM_ATT)
8359 putc ('l', file);
8360 return;
8362 case 'W':
8363 if (ASSEMBLER_DIALECT == ASM_ATT)
8364 putc ('w', file);
8365 return;
8367 case 'B':
8368 if (ASSEMBLER_DIALECT == ASM_ATT)
8369 putc ('b', file);
8370 return;
8372 case 'Q':
8373 if (ASSEMBLER_DIALECT == ASM_ATT)
8374 putc ('l', file);
8375 return;
8377 case 'S':
8378 if (ASSEMBLER_DIALECT == ASM_ATT)
8379 putc ('s', file);
8380 return;
8382 case 'T':
8383 if (ASSEMBLER_DIALECT == ASM_ATT)
8384 putc ('t', file);
8385 return;
8387 case 'z':
8388 /* 387 opcodes don't get size suffixes if the operands are
8389 registers. */
8390 if (STACK_REG_P (x))
8391 return;
8393 /* Likewise if using Intel opcodes. */
8394 if (ASSEMBLER_DIALECT == ASM_INTEL)
8395 return;
8397 /* This is the size of op from size of operand. */
8398 switch (GET_MODE_SIZE (GET_MODE (x)))
8400 case 1:
8401 putc ('b', file);
8402 return;
8404 case 2:
8405 if (MEM_P (x))
8407 #ifdef HAVE_GAS_FILDS_FISTS
8408 putc ('s', file);
8409 #endif
8410 return;
8412 else
8413 putc ('w', file);
8414 return;
8416 case 4:
8417 if (GET_MODE (x) == SFmode)
8419 putc ('s', file);
8420 return;
8422 else
8423 putc ('l', file);
8424 return;
8426 case 12:
8427 case 16:
8428 putc ('t', file);
8429 return;
8431 case 8:
8432 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
8434 #ifdef GAS_MNEMONICS
8435 putc ('q', file);
8436 #else
8437 putc ('l', file);
8438 putc ('l', file);
8439 #endif
8441 else
8442 putc ('l', file);
8443 return;
8445 default:
8446 gcc_unreachable ();
8449 case 'b':
8450 case 'w':
8451 case 'k':
8452 case 'q':
8453 case 'h':
8454 case 'y':
8455 case 'X':
8456 case 'P':
8457 break;
8459 case 's':
8460 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
8462 PRINT_OPERAND (file, x, 0);
8463 putc (',', file);
8465 return;
8467 case 'D':
8468 /* Little bit of braindamage here. The SSE compare instructions
8469 does use completely different names for the comparisons that the
8470 fp conditional moves. */
8471 switch (GET_CODE (x))
8473 case EQ:
8474 case UNEQ:
8475 fputs ("eq", file);
8476 break;
8477 case LT:
8478 case UNLT:
8479 fputs ("lt", file);
8480 break;
8481 case LE:
8482 case UNLE:
8483 fputs ("le", file);
8484 break;
8485 case UNORDERED:
8486 fputs ("unord", file);
8487 break;
8488 case NE:
8489 case LTGT:
8490 fputs ("neq", file);
8491 break;
8492 case UNGE:
8493 case GE:
8494 fputs ("nlt", file);
8495 break;
8496 case UNGT:
8497 case GT:
8498 fputs ("nle", file);
8499 break;
8500 case ORDERED:
8501 fputs ("ord", file);
8502 break;
8503 default:
8504 gcc_unreachable ();
8506 return;
8507 case 'O':
8508 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8509 if (ASSEMBLER_DIALECT == ASM_ATT)
8511 switch (GET_MODE (x))
8513 case HImode: putc ('w', file); break;
8514 case SImode:
8515 case SFmode: putc ('l', file); break;
8516 case DImode:
8517 case DFmode: putc ('q', file); break;
8518 default: gcc_unreachable ();
8520 putc ('.', file);
8522 #endif
8523 return;
8524 case 'C':
8525 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8526 return;
8527 case 'F':
8528 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8529 if (ASSEMBLER_DIALECT == ASM_ATT)
8530 putc ('.', file);
8531 #endif
8532 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8533 return;
8535 /* Like above, but reverse condition */
8536 case 'c':
8537 /* Check to see if argument to %c is really a constant
8538 and not a condition code which needs to be reversed. */
8539 if (!COMPARISON_P (x))
8541 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8542 return;
8544 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8545 return;
8546 case 'f':
8547 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8548 if (ASSEMBLER_DIALECT == ASM_ATT)
8549 putc ('.', file);
8550 #endif
8551 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8552 return;
8554 case 'H':
8555 /* It doesn't actually matter what mode we use here, as we're
8556 only going to use this for printing. */
8557 x = adjust_address_nv (x, DImode, 8);
8558 break;
8560 case '+':
8562 rtx x;
8564 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8565 return;
8567 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8568 if (x)
8570 int pred_val = INTVAL (XEXP (x, 0));
8572 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8573 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8575 int taken = pred_val > REG_BR_PROB_BASE / 2;
8576 int cputaken = final_forward_branch_p (current_output_insn) == 0;
8578 /* Emit hints only in the case default branch prediction
8579 heuristics would fail. */
8580 if (taken != cputaken)
8582 /* We use 3e (DS) prefix for taken branches and
8583 2e (CS) prefix for not taken branches. */
8584 if (taken)
8585 fputs ("ds ; ", file);
8586 else
8587 fputs ("cs ; ", file);
8591 return;
8593 default:
8594 output_operand_lossage ("invalid operand code '%c'", code);
8598 if (REG_P (x))
8599 print_reg (x, code, file);
8601 else if (MEM_P (x))
8603 /* No `byte ptr' prefix for call instructions. */
8604 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
8606 const char * size;
8607 switch (GET_MODE_SIZE (GET_MODE (x)))
8609 case 1: size = "BYTE"; break;
8610 case 2: size = "WORD"; break;
8611 case 4: size = "DWORD"; break;
8612 case 8: size = "QWORD"; break;
8613 case 12: size = "XWORD"; break;
8614 case 16: size = "XMMWORD"; break;
8615 default:
8616 gcc_unreachable ();
8619 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8620 if (code == 'b')
8621 size = "BYTE";
8622 else if (code == 'w')
8623 size = "WORD";
8624 else if (code == 'k')
8625 size = "DWORD";
8627 fputs (size, file);
8628 fputs (" PTR ", file);
8631 x = XEXP (x, 0);
8632 /* Avoid (%rip) for call operands. */
8633 if (CONSTANT_ADDRESS_P (x) && code == 'P'
8634 && !CONST_INT_P (x))
8635 output_addr_const (file, x);
8636 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
8637 output_operand_lossage ("invalid constraints for operand");
8638 else
8639 output_address (x);
8642 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
8644 REAL_VALUE_TYPE r;
8645 long l;
8647 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8648 REAL_VALUE_TO_TARGET_SINGLE (r, l);
8650 if (ASSEMBLER_DIALECT == ASM_ATT)
8651 putc ('$', file);
8652 fprintf (file, "0x%08lx", l);
8655 /* These float cases don't actually occur as immediate operands. */
8656 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8658 char dstr[30];
8660 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8661 fprintf (file, "%s", dstr);
8664 else if (GET_CODE (x) == CONST_DOUBLE
8665 && GET_MODE (x) == XFmode)
8667 char dstr[30];
8669 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8670 fprintf (file, "%s", dstr);
8673 else
8675 /* We have patterns that allow zero sets of memory, for instance.
8676 In 64-bit mode, we should probably support all 8-byte vectors,
8677 since we can in fact encode that into an immediate. */
8678 if (GET_CODE (x) == CONST_VECTOR)
8680 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8681 x = const0_rtx;
8684 if (code != 'P')
8686 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
8688 if (ASSEMBLER_DIALECT == ASM_ATT)
8689 putc ('$', file);
8691 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8692 || GET_CODE (x) == LABEL_REF)
8694 if (ASSEMBLER_DIALECT == ASM_ATT)
8695 putc ('$', file);
8696 else
8697 fputs ("OFFSET FLAT:", file);
8700 if (CONST_INT_P (x))
8701 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8702 else if (flag_pic)
8703 output_pic_addr_const (file, x, code);
8704 else
8705 output_addr_const (file, x);
8709 /* Print a memory operand whose address is ADDR. */
8711 void
8712 print_operand_address (FILE *file, rtx addr)
8714 struct ix86_address parts;
8715 rtx base, index, disp;
8716 int scale;
8717 int ok = ix86_decompose_address (addr, &parts);
8719 gcc_assert (ok);
8721 base = parts.base;
8722 index = parts.index;
8723 disp = parts.disp;
8724 scale = parts.scale;
8726 switch (parts.seg)
8728 case SEG_DEFAULT:
8729 break;
8730 case SEG_FS:
8731 case SEG_GS:
8732 if (USER_LABEL_PREFIX[0] == 0)
8733 putc ('%', file);
8734 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8735 break;
8736 default:
8737 gcc_unreachable ();
8740 if (!base && !index)
8742 /* Displacement only requires special attention. */
8744 if (CONST_INT_P (disp))
8746 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8748 if (USER_LABEL_PREFIX[0] == 0)
8749 putc ('%', file);
8750 fputs ("ds:", file);
8752 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8754 else if (flag_pic)
8755 output_pic_addr_const (file, disp, 0);
8756 else
8757 output_addr_const (file, disp);
8759 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8760 if (TARGET_64BIT)
8762 if (GET_CODE (disp) == CONST
8763 && GET_CODE (XEXP (disp, 0)) == PLUS
8764 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8765 disp = XEXP (XEXP (disp, 0), 0);
8766 if (GET_CODE (disp) == LABEL_REF
8767 || (GET_CODE (disp) == SYMBOL_REF
8768 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8769 fputs ("(%rip)", file);
8772 else
8774 if (ASSEMBLER_DIALECT == ASM_ATT)
8776 if (disp)
8778 if (flag_pic)
8779 output_pic_addr_const (file, disp, 0);
8780 else if (GET_CODE (disp) == LABEL_REF)
8781 output_asm_label (disp);
8782 else
8783 output_addr_const (file, disp);
8786 putc ('(', file);
8787 if (base)
8788 print_reg (base, 0, file);
8789 if (index)
8791 putc (',', file);
8792 print_reg (index, 0, file);
8793 if (scale != 1)
8794 fprintf (file, ",%d", scale);
8796 putc (')', file);
8798 else
8800 rtx offset = NULL_RTX;
8802 if (disp)
8804 /* Pull out the offset of a symbol; print any symbol itself. */
8805 if (GET_CODE (disp) == CONST
8806 && GET_CODE (XEXP (disp, 0)) == PLUS
8807 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8809 offset = XEXP (XEXP (disp, 0), 1);
8810 disp = gen_rtx_CONST (VOIDmode,
8811 XEXP (XEXP (disp, 0), 0));
8814 if (flag_pic)
8815 output_pic_addr_const (file, disp, 0);
8816 else if (GET_CODE (disp) == LABEL_REF)
8817 output_asm_label (disp);
8818 else if (CONST_INT_P (disp))
8819 offset = disp;
8820 else
8821 output_addr_const (file, disp);
8824 putc ('[', file);
8825 if (base)
8827 print_reg (base, 0, file);
8828 if (offset)
8830 if (INTVAL (offset) >= 0)
8831 putc ('+', file);
8832 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8835 else if (offset)
8836 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8837 else
8838 putc ('0', file);
8840 if (index)
8842 putc ('+', file);
8843 print_reg (index, 0, file);
8844 if (scale != 1)
8845 fprintf (file, "*%d", scale);
8847 putc (']', file);
8852 bool
8853 output_addr_const_extra (FILE *file, rtx x)
8855 rtx op;
8857 if (GET_CODE (x) != UNSPEC)
8858 return false;
8860 op = XVECEXP (x, 0, 0);
8861 switch (XINT (x, 1))
8863 case UNSPEC_GOTTPOFF:
8864 output_addr_const (file, op);
8865 /* FIXME: This might be @TPOFF in Sun ld. */
8866 fputs ("@GOTTPOFF", file);
8867 break;
8868 case UNSPEC_TPOFF:
8869 output_addr_const (file, op);
8870 fputs ("@TPOFF", file);
8871 break;
8872 case UNSPEC_NTPOFF:
8873 output_addr_const (file, op);
8874 if (TARGET_64BIT)
8875 fputs ("@TPOFF", file);
8876 else
8877 fputs ("@NTPOFF", file);
8878 break;
8879 case UNSPEC_DTPOFF:
8880 output_addr_const (file, op);
8881 fputs ("@DTPOFF", file);
8882 break;
8883 case UNSPEC_GOTNTPOFF:
8884 output_addr_const (file, op);
8885 if (TARGET_64BIT)
8886 fputs ("@GOTTPOFF(%rip)", file);
8887 else
8888 fputs ("@GOTNTPOFF", file);
8889 break;
8890 case UNSPEC_INDNTPOFF:
8891 output_addr_const (file, op);
8892 fputs ("@INDNTPOFF", file);
8893 break;
8895 default:
8896 return false;
8899 return true;
8902 /* Split one or more DImode RTL references into pairs of SImode
8903 references. The RTL can be REG, offsettable MEM, integer constant, or
8904 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8905 split and "num" is its length. lo_half and hi_half are output arrays
8906 that parallel "operands". */
8908 void
8909 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8911 while (num--)
8913 rtx op = operands[num];
8915 /* simplify_subreg refuse to split volatile memory addresses,
8916 but we still have to handle it. */
8917 if (MEM_P (op))
8919 lo_half[num] = adjust_address (op, SImode, 0);
8920 hi_half[num] = adjust_address (op, SImode, 4);
8922 else
8924 lo_half[num] = simplify_gen_subreg (SImode, op,
8925 GET_MODE (op) == VOIDmode
8926 ? DImode : GET_MODE (op), 0);
8927 hi_half[num] = simplify_gen_subreg (SImode, op,
8928 GET_MODE (op) == VOIDmode
8929 ? DImode : GET_MODE (op), 4);
8933 /* Split one or more TImode RTL references into pairs of DImode
8934 references. The RTL can be REG, offsettable MEM, integer constant, or
8935 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8936 split and "num" is its length. lo_half and hi_half are output arrays
8937 that parallel "operands". */
8939 void
8940 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8942 while (num--)
8944 rtx op = operands[num];
8946 /* simplify_subreg refuse to split volatile memory addresses, but we
8947 still have to handle it. */
8948 if (MEM_P (op))
8950 lo_half[num] = adjust_address (op, DImode, 0);
8951 hi_half[num] = adjust_address (op, DImode, 8);
8953 else
8955 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8956 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8961 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8962 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8963 is the expression of the binary operation. The output may either be
8964 emitted here, or returned to the caller, like all output_* functions.
8966 There is no guarantee that the operands are the same mode, as they
8967 might be within FLOAT or FLOAT_EXTEND expressions. */
8969 #ifndef SYSV386_COMPAT
8970 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8971 wants to fix the assemblers because that causes incompatibility
8972 with gcc. No-one wants to fix gcc because that causes
8973 incompatibility with assemblers... You can use the option of
8974 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8975 #define SYSV386_COMPAT 1
8976 #endif
8978 const char *
8979 output_387_binary_op (rtx insn, rtx *operands)
8981 static char buf[30];
8982 const char *p;
8983 const char *ssep;
8984 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8986 #ifdef ENABLE_CHECKING
8987 /* Even if we do not want to check the inputs, this documents input
8988 constraints. Which helps in understanding the following code. */
8989 if (STACK_REG_P (operands[0])
8990 && ((REG_P (operands[1])
8991 && REGNO (operands[0]) == REGNO (operands[1])
8992 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
8993 || (REG_P (operands[2])
8994 && REGNO (operands[0]) == REGNO (operands[2])
8995 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
8996 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8997 ; /* ok */
8998 else
8999 gcc_assert (is_sse);
9000 #endif
9002 switch (GET_CODE (operands[3]))
9004 case PLUS:
9005 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9006 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9007 p = "fiadd";
9008 else
9009 p = "fadd";
9010 ssep = "add";
9011 break;
9013 case MINUS:
9014 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9015 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9016 p = "fisub";
9017 else
9018 p = "fsub";
9019 ssep = "sub";
9020 break;
9022 case MULT:
9023 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9024 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9025 p = "fimul";
9026 else
9027 p = "fmul";
9028 ssep = "mul";
9029 break;
9031 case DIV:
9032 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9033 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9034 p = "fidiv";
9035 else
9036 p = "fdiv";
9037 ssep = "div";
9038 break;
9040 default:
9041 gcc_unreachable ();
9044 if (is_sse)
9046 strcpy (buf, ssep);
9047 if (GET_MODE (operands[0]) == SFmode)
9048 strcat (buf, "ss\t{%2, %0|%0, %2}");
9049 else
9050 strcat (buf, "sd\t{%2, %0|%0, %2}");
9051 return buf;
9053 strcpy (buf, p);
9055 switch (GET_CODE (operands[3]))
9057 case MULT:
9058 case PLUS:
9059 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
9061 rtx temp = operands[2];
9062 operands[2] = operands[1];
9063 operands[1] = temp;
9066 /* know operands[0] == operands[1]. */
9068 if (MEM_P (operands[2]))
9070 p = "%z2\t%2";
9071 break;
9074 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9076 if (STACK_TOP_P (operands[0]))
9077 /* How is it that we are storing to a dead operand[2]?
9078 Well, presumably operands[1] is dead too. We can't
9079 store the result to st(0) as st(0) gets popped on this
9080 instruction. Instead store to operands[2] (which I
9081 think has to be st(1)). st(1) will be popped later.
9082 gcc <= 2.8.1 didn't have this check and generated
9083 assembly code that the Unixware assembler rejected. */
9084 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9085 else
9086 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9087 break;
9090 if (STACK_TOP_P (operands[0]))
9091 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9092 else
9093 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9094 break;
9096 case MINUS:
9097 case DIV:
9098 if (MEM_P (operands[1]))
9100 p = "r%z1\t%1";
9101 break;
9104 if (MEM_P (operands[2]))
9106 p = "%z2\t%2";
9107 break;
9110 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9112 #if SYSV386_COMPAT
9113 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
9114 derived assemblers, confusingly reverse the direction of
9115 the operation for fsub{r} and fdiv{r} when the
9116 destination register is not st(0). The Intel assembler
9117 doesn't have this brain damage. Read !SYSV386_COMPAT to
9118 figure out what the hardware really does. */
9119 if (STACK_TOP_P (operands[0]))
9120 p = "{p\t%0, %2|rp\t%2, %0}";
9121 else
9122 p = "{rp\t%2, %0|p\t%0, %2}";
9123 #else
9124 if (STACK_TOP_P (operands[0]))
9125 /* As above for fmul/fadd, we can't store to st(0). */
9126 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9127 else
9128 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9129 #endif
9130 break;
9133 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
9135 #if SYSV386_COMPAT
9136 if (STACK_TOP_P (operands[0]))
9137 p = "{rp\t%0, %1|p\t%1, %0}";
9138 else
9139 p = "{p\t%1, %0|rp\t%0, %1}";
9140 #else
9141 if (STACK_TOP_P (operands[0]))
9142 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9143 else
9144 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9145 #endif
9146 break;
9149 if (STACK_TOP_P (operands[0]))
9151 if (STACK_TOP_P (operands[1]))
9152 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9153 else
9154 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9155 break;
9157 else if (STACK_TOP_P (operands[1]))
9159 #if SYSV386_COMPAT
9160 p = "{\t%1, %0|r\t%0, %1}";
9161 #else
9162 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9163 #endif
9165 else
9167 #if SYSV386_COMPAT
9168 p = "{r\t%2, %0|\t%0, %2}";
9169 #else
9170 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9171 #endif
9173 break;
9175 default:
9176 gcc_unreachable ();
9179 strcat (buf, p);
9180 return buf;
9183 /* Return needed mode for entity in optimize_mode_switching pass. */
9186 ix86_mode_needed (int entity, rtx insn)
9188 enum attr_i387_cw mode;
9190 /* The mode UNINITIALIZED is used to store control word after a
9191 function call or ASM pattern. The mode ANY specify that function
9192 has no requirements on the control word and make no changes in the
9193 bits we are interested in. */
9195 if (CALL_P (insn)
9196 || (NONJUMP_INSN_P (insn)
9197 && (asm_noperands (PATTERN (insn)) >= 0
9198 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
9199 return I387_CW_UNINITIALIZED;
9201 if (recog_memoized (insn) < 0)
9202 return I387_CW_ANY;
9204 mode = get_attr_i387_cw (insn);
9206 switch (entity)
9208 case I387_TRUNC:
9209 if (mode == I387_CW_TRUNC)
9210 return mode;
9211 break;
9213 case I387_FLOOR:
9214 if (mode == I387_CW_FLOOR)
9215 return mode;
9216 break;
9218 case I387_CEIL:
9219 if (mode == I387_CW_CEIL)
9220 return mode;
9221 break;
9223 case I387_MASK_PM:
9224 if (mode == I387_CW_MASK_PM)
9225 return mode;
9226 break;
9228 default:
9229 gcc_unreachable ();
9232 return I387_CW_ANY;
9235 /* Output code to initialize control word copies used by trunc?f?i and
9236 rounding patterns. CURRENT_MODE is set to current control word,
9237 while NEW_MODE is set to new control word. */
9239 void
9240 emit_i387_cw_initialization (int mode)
9242 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
9243 rtx new_mode;
9245 int slot;
9247 rtx reg = gen_reg_rtx (HImode);
9249 emit_insn (gen_x86_fnstcw_1 (stored_mode));
9250 emit_move_insn (reg, copy_rtx (stored_mode));
9252 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
9254 switch (mode)
9256 case I387_CW_TRUNC:
9257 /* round toward zero (truncate) */
9258 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
9259 slot = SLOT_CW_TRUNC;
9260 break;
9262 case I387_CW_FLOOR:
9263 /* round down toward -oo */
9264 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9265 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
9266 slot = SLOT_CW_FLOOR;
9267 break;
9269 case I387_CW_CEIL:
9270 /* round up toward +oo */
9271 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9272 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
9273 slot = SLOT_CW_CEIL;
9274 break;
9276 case I387_CW_MASK_PM:
9277 /* mask precision exception for nearbyint() */
9278 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9279 slot = SLOT_CW_MASK_PM;
9280 break;
9282 default:
9283 gcc_unreachable ();
9286 else
9288 switch (mode)
9290 case I387_CW_TRUNC:
9291 /* round toward zero (truncate) */
9292 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
9293 slot = SLOT_CW_TRUNC;
9294 break;
9296 case I387_CW_FLOOR:
9297 /* round down toward -oo */
9298 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
9299 slot = SLOT_CW_FLOOR;
9300 break;
9302 case I387_CW_CEIL:
9303 /* round up toward +oo */
9304 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
9305 slot = SLOT_CW_CEIL;
9306 break;
9308 case I387_CW_MASK_PM:
9309 /* mask precision exception for nearbyint() */
9310 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9311 slot = SLOT_CW_MASK_PM;
9312 break;
9314 default:
9315 gcc_unreachable ();
9319 gcc_assert (slot < MAX_386_STACK_LOCALS);
9321 new_mode = assign_386_stack_local (HImode, slot);
9322 emit_move_insn (new_mode, reg);
9325 /* Output code for INSN to convert a float to a signed int. OPERANDS
9326 are the insn operands. The output may be [HSD]Imode and the input
9327 operand may be [SDX]Fmode. */
9329 const char *
9330 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
9332 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9333 int dimode_p = GET_MODE (operands[0]) == DImode;
9334 int round_mode = get_attr_i387_cw (insn);
9336 /* Jump through a hoop or two for DImode, since the hardware has no
9337 non-popping instruction. We used to do this a different way, but
9338 that was somewhat fragile and broke with post-reload splitters. */
9339 if ((dimode_p || fisttp) && !stack_top_dies)
9340 output_asm_insn ("fld\t%y1", operands);
9342 gcc_assert (STACK_TOP_P (operands[1]));
9343 gcc_assert (MEM_P (operands[0]));
9344 gcc_assert (GET_MODE (operands[1]) != TFmode);
9346 if (fisttp)
9347 output_asm_insn ("fisttp%z0\t%0", operands);
9348 else
9350 if (round_mode != I387_CW_ANY)
9351 output_asm_insn ("fldcw\t%3", operands);
9352 if (stack_top_dies || dimode_p)
9353 output_asm_insn ("fistp%z0\t%0", operands);
9354 else
9355 output_asm_insn ("fist%z0\t%0", operands);
9356 if (round_mode != I387_CW_ANY)
9357 output_asm_insn ("fldcw\t%2", operands);
9360 return "";
9363 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9364 have the values zero or one, indicates the ffreep insn's operand
9365 from the OPERANDS array. */
9367 static const char *
9368 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
9370 if (TARGET_USE_FFREEP)
9371 #if HAVE_AS_IX86_FFREEP
9372 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
9373 #else
9375 static char retval[] = ".word\t0xc_df";
9376 int regno = REGNO (operands[opno]);
9378 gcc_assert (FP_REGNO_P (regno));
9380 retval[9] = '0' + (regno - FIRST_STACK_REG);
9381 return retval;
9383 #endif
9385 return opno ? "fstp\t%y1" : "fstp\t%y0";
9389 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9390 should be used. UNORDERED_P is true when fucom should be used. */
9392 const char *
9393 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
9395 int stack_top_dies;
9396 rtx cmp_op0, cmp_op1;
9397 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
9399 if (eflags_p)
9401 cmp_op0 = operands[0];
9402 cmp_op1 = operands[1];
9404 else
9406 cmp_op0 = operands[1];
9407 cmp_op1 = operands[2];
9410 if (is_sse)
9412 if (GET_MODE (operands[0]) == SFmode)
9413 if (unordered_p)
9414 return "ucomiss\t{%1, %0|%0, %1}";
9415 else
9416 return "comiss\t{%1, %0|%0, %1}";
9417 else
9418 if (unordered_p)
9419 return "ucomisd\t{%1, %0|%0, %1}";
9420 else
9421 return "comisd\t{%1, %0|%0, %1}";
9424 gcc_assert (STACK_TOP_P (cmp_op0));
9426 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9428 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
9430 if (stack_top_dies)
9432 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
9433 return output_387_ffreep (operands, 1);
9435 else
9436 return "ftst\n\tfnstsw\t%0";
9439 if (STACK_REG_P (cmp_op1)
9440 && stack_top_dies
9441 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
9442 && REGNO (cmp_op1) != FIRST_STACK_REG)
9444 /* If both the top of the 387 stack dies, and the other operand
9445 is also a stack register that dies, then this must be a
9446 `fcompp' float compare */
9448 if (eflags_p)
9450 /* There is no double popping fcomi variant. Fortunately,
9451 eflags is immune from the fstp's cc clobbering. */
9452 if (unordered_p)
9453 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
9454 else
9455 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
9456 return output_387_ffreep (operands, 0);
9458 else
9460 if (unordered_p)
9461 return "fucompp\n\tfnstsw\t%0";
9462 else
9463 return "fcompp\n\tfnstsw\t%0";
9466 else
9468 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9470 static const char * const alt[16] =
9472 "fcom%z2\t%y2\n\tfnstsw\t%0",
9473 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9474 "fucom%z2\t%y2\n\tfnstsw\t%0",
9475 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9477 "ficom%z2\t%y2\n\tfnstsw\t%0",
9478 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9479 NULL,
9480 NULL,
9482 "fcomi\t{%y1, %0|%0, %y1}",
9483 "fcomip\t{%y1, %0|%0, %y1}",
9484 "fucomi\t{%y1, %0|%0, %y1}",
9485 "fucomip\t{%y1, %0|%0, %y1}",
9487 NULL,
9488 NULL,
9489 NULL,
9490 NULL
9493 int mask;
9494 const char *ret;
9496 mask = eflags_p << 3;
9497 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
9498 mask |= unordered_p << 1;
9499 mask |= stack_top_dies;
9501 gcc_assert (mask < 16);
9502 ret = alt[mask];
9503 gcc_assert (ret);
9505 return ret;
9509 void
9510 ix86_output_addr_vec_elt (FILE *file, int value)
9512 const char *directive = ASM_LONG;
9514 #ifdef ASM_QUAD
9515 if (TARGET_64BIT)
9516 directive = ASM_QUAD;
9517 #else
9518 gcc_assert (!TARGET_64BIT);
9519 #endif
9521 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
9524 void
9525 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
9527 const char *directive = ASM_LONG;
9529 #ifdef ASM_QUAD
9530 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
9531 directive = ASM_QUAD;
9532 #else
9533 gcc_assert (!TARGET_64BIT);
9534 #endif
9535 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
9536 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
9537 fprintf (file, "%s%s%d-%s%d\n",
9538 directive, LPREFIX, value, LPREFIX, rel);
9539 else if (HAVE_AS_GOTOFF_IN_DATA)
9540 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
9541 #if TARGET_MACHO
9542 else if (TARGET_MACHO)
9544 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9545 machopic_output_function_base_name (file);
9546 fprintf(file, "\n");
9548 #endif
9549 else
9550 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9551 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
9554 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9555 for the target. */
9557 void
9558 ix86_expand_clear (rtx dest)
9560 rtx tmp;
9562 /* We play register width games, which are only valid after reload. */
9563 gcc_assert (reload_completed);
9565 /* Avoid HImode and its attendant prefix byte. */
9566 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
9567 dest = gen_rtx_REG (SImode, REGNO (dest));
9568 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
9570 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9571 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
9573 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
9574 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9577 emit_insn (tmp);
9580 /* X is an unchanging MEM. If it is a constant pool reference, return
9581 the constant pool rtx, else NULL. */
9584 maybe_get_pool_constant (rtx x)
9586 x = ix86_delegitimize_address (XEXP (x, 0));
9588 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9589 return get_pool_constant (x);
9591 return NULL_RTX;
9594 void
9595 ix86_expand_move (enum machine_mode mode, rtx operands[])
9597 int strict = (reload_in_progress || reload_completed);
9598 rtx op0, op1;
9599 enum tls_model model;
9601 op0 = operands[0];
9602 op1 = operands[1];
9604 if (GET_CODE (op1) == SYMBOL_REF)
9606 model = SYMBOL_REF_TLS_MODEL (op1);
9607 if (model)
9609 op1 = legitimize_tls_address (op1, model, true);
9610 op1 = force_operand (op1, op0);
9611 if (op1 == op0)
9612 return;
9614 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9615 && SYMBOL_REF_DLLIMPORT_P (op1))
9616 op1 = legitimize_dllimport_symbol (op1, false);
9618 else if (GET_CODE (op1) == CONST
9619 && GET_CODE (XEXP (op1, 0)) == PLUS
9620 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
9622 rtx addend = XEXP (XEXP (op1, 0), 1);
9623 rtx symbol = XEXP (XEXP (op1, 0), 0);
9624 rtx tmp = NULL;
9626 model = SYMBOL_REF_TLS_MODEL (symbol);
9627 if (model)
9628 tmp = legitimize_tls_address (symbol, model, true);
9629 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9630 && SYMBOL_REF_DLLIMPORT_P (symbol))
9631 tmp = legitimize_dllimport_symbol (symbol, true);
9633 if (tmp)
9635 tmp = force_operand (tmp, NULL);
9636 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
9637 op0, 1, OPTAB_DIRECT);
9638 if (tmp == op0)
9639 return;
9643 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
9645 if (TARGET_MACHO && !TARGET_64BIT)
9647 #if TARGET_MACHO
9648 if (MACHOPIC_PURE)
9650 rtx temp = ((reload_in_progress
9651 || ((op0 && REG_P (op0))
9652 && mode == Pmode))
9653 ? op0 : gen_reg_rtx (Pmode));
9654 op1 = machopic_indirect_data_reference (op1, temp);
9655 op1 = machopic_legitimize_pic_address (op1, mode,
9656 temp == op1 ? 0 : temp);
9658 else if (MACHOPIC_INDIRECT)
9659 op1 = machopic_indirect_data_reference (op1, 0);
9660 if (op0 == op1)
9661 return;
9662 #endif
9664 else
9666 if (MEM_P (op0))
9667 op1 = force_reg (Pmode, op1);
9668 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
9670 rtx reg = no_new_pseudos ? op0 : NULL_RTX;
9671 op1 = legitimize_pic_address (op1, reg);
9672 if (op0 == op1)
9673 return;
9677 else
9679 if (MEM_P (op0)
9680 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9681 || !push_operand (op0, mode))
9682 && MEM_P (op1))
9683 op1 = force_reg (mode, op1);
9685 if (push_operand (op0, mode)
9686 && ! general_no_elim_operand (op1, mode))
9687 op1 = copy_to_mode_reg (mode, op1);
9689 /* Force large constants in 64bit compilation into register
9690 to get them CSEed. */
9691 if (TARGET_64BIT && mode == DImode
9692 && immediate_operand (op1, mode)
9693 && !x86_64_zext_immediate_operand (op1, VOIDmode)
9694 && !register_operand (op0, mode)
9695 && optimize && !reload_completed && !reload_in_progress)
9696 op1 = copy_to_mode_reg (mode, op1);
9698 if (FLOAT_MODE_P (mode))
9700 /* If we are loading a floating point constant to a register,
9701 force the value to memory now, since we'll get better code
9702 out the back end. */
9704 if (strict)
9706 else if (GET_CODE (op1) == CONST_DOUBLE)
9708 op1 = validize_mem (force_const_mem (mode, op1));
9709 if (!register_operand (op0, mode))
9711 rtx temp = gen_reg_rtx (mode);
9712 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9713 emit_move_insn (op0, temp);
9714 return;
9720 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9723 void
9724 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9726 rtx op0 = operands[0], op1 = operands[1];
9728 /* Force constants other than zero into memory. We do not know how
9729 the instructions used to build constants modify the upper 64 bits
9730 of the register, once we have that information we may be able
9731 to handle some of them more efficiently. */
9732 if ((reload_in_progress | reload_completed) == 0
9733 && register_operand (op0, mode)
9734 && CONSTANT_P (op1)
9735 && standard_sse_constant_p (op1) <= 0)
9736 op1 = validize_mem (force_const_mem (mode, op1));
9738 /* Make operand1 a register if it isn't already. */
9739 if (!no_new_pseudos
9740 && !register_operand (op0, mode)
9741 && !register_operand (op1, mode))
9743 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9744 return;
9747 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9750 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9751 straight to ix86_expand_vector_move. */
9752 /* Code generation for scalar reg-reg moves of single and double precision data:
9753 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
9754 movaps reg, reg
9755 else
9756 movss reg, reg
9757 if (x86_sse_partial_reg_dependency == true)
9758 movapd reg, reg
9759 else
9760 movsd reg, reg
9762 Code generation for scalar loads of double precision data:
9763 if (x86_sse_split_regs == true)
9764 movlpd mem, reg (gas syntax)
9765 else
9766 movsd mem, reg
9768 Code generation for unaligned packed loads of single precision data
9769 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
9770 if (x86_sse_unaligned_move_optimal)
9771 movups mem, reg
9773 if (x86_sse_partial_reg_dependency == true)
9775 xorps reg, reg
9776 movlps mem, reg
9777 movhps mem+8, reg
9779 else
9781 movlps mem, reg
9782 movhps mem+8, reg
9785 Code generation for unaligned packed loads of double precision data
9786 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
9787 if (x86_sse_unaligned_move_optimal)
9788 movupd mem, reg
9790 if (x86_sse_split_regs == true)
9792 movlpd mem, reg
9793 movhpd mem+8, reg
9795 else
9797 movsd mem, reg
9798 movhpd mem+8, reg
9802 void
9803 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9805 rtx op0, op1, m;
9807 op0 = operands[0];
9808 op1 = operands[1];
9810 if (MEM_P (op1))
9812 /* If we're optimizing for size, movups is the smallest. */
9813 if (optimize_size)
9815 op0 = gen_lowpart (V4SFmode, op0);
9816 op1 = gen_lowpart (V4SFmode, op1);
9817 emit_insn (gen_sse_movups (op0, op1));
9818 return;
9821 /* ??? If we have typed data, then it would appear that using
9822 movdqu is the only way to get unaligned data loaded with
9823 integer type. */
9824 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9826 op0 = gen_lowpart (V16QImode, op0);
9827 op1 = gen_lowpart (V16QImode, op1);
9828 emit_insn (gen_sse2_movdqu (op0, op1));
9829 return;
9832 if (TARGET_SSE2 && mode == V2DFmode)
9834 rtx zero;
9836 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9838 op0 = gen_lowpart (V2DFmode, op0);
9839 op1 = gen_lowpart (V2DFmode, op1);
9840 emit_insn (gen_sse2_movupd (op0, op1));
9841 return;
9844 /* When SSE registers are split into halves, we can avoid
9845 writing to the top half twice. */
9846 if (TARGET_SSE_SPLIT_REGS)
9848 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9849 zero = op0;
9851 else
9853 /* ??? Not sure about the best option for the Intel chips.
9854 The following would seem to satisfy; the register is
9855 entirely cleared, breaking the dependency chain. We
9856 then store to the upper half, with a dependency depth
9857 of one. A rumor has it that Intel recommends two movsd
9858 followed by an unpacklpd, but this is unconfirmed. And
9859 given that the dependency depth of the unpacklpd would
9860 still be one, I'm not sure why this would be better. */
9861 zero = CONST0_RTX (V2DFmode);
9864 m = adjust_address (op1, DFmode, 0);
9865 emit_insn (gen_sse2_loadlpd (op0, zero, m));
9866 m = adjust_address (op1, DFmode, 8);
9867 emit_insn (gen_sse2_loadhpd (op0, op0, m));
9869 else
9871 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9873 op0 = gen_lowpart (V4SFmode, op0);
9874 op1 = gen_lowpart (V4SFmode, op1);
9875 emit_insn (gen_sse_movups (op0, op1));
9876 return;
9879 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9880 emit_move_insn (op0, CONST0_RTX (mode));
9881 else
9882 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9884 if (mode != V4SFmode)
9885 op0 = gen_lowpart (V4SFmode, op0);
9886 m = adjust_address (op1, V2SFmode, 0);
9887 emit_insn (gen_sse_loadlps (op0, op0, m));
9888 m = adjust_address (op1, V2SFmode, 8);
9889 emit_insn (gen_sse_loadhps (op0, op0, m));
9892 else if (MEM_P (op0))
9894 /* If we're optimizing for size, movups is the smallest. */
9895 if (optimize_size)
9897 op0 = gen_lowpart (V4SFmode, op0);
9898 op1 = gen_lowpart (V4SFmode, op1);
9899 emit_insn (gen_sse_movups (op0, op1));
9900 return;
9903 /* ??? Similar to above, only less clear because of quote
9904 typeless stores unquote. */
9905 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9906 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9908 op0 = gen_lowpart (V16QImode, op0);
9909 op1 = gen_lowpart (V16QImode, op1);
9910 emit_insn (gen_sse2_movdqu (op0, op1));
9911 return;
9914 if (TARGET_SSE2 && mode == V2DFmode)
9916 m = adjust_address (op0, DFmode, 0);
9917 emit_insn (gen_sse2_storelpd (m, op1));
9918 m = adjust_address (op0, DFmode, 8);
9919 emit_insn (gen_sse2_storehpd (m, op1));
9921 else
9923 if (mode != V4SFmode)
9924 op1 = gen_lowpart (V4SFmode, op1);
9925 m = adjust_address (op0, V2SFmode, 0);
9926 emit_insn (gen_sse_storelps (m, op1));
9927 m = adjust_address (op0, V2SFmode, 8);
9928 emit_insn (gen_sse_storehps (m, op1));
9931 else
9932 gcc_unreachable ();
9935 /* Expand a push in MODE. This is some mode for which we do not support
9936 proper push instructions, at least from the registers that we expect
9937 the value to live in. */
9939 void
9940 ix86_expand_push (enum machine_mode mode, rtx x)
9942 rtx tmp;
9944 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9945 GEN_INT (-GET_MODE_SIZE (mode)),
9946 stack_pointer_rtx, 1, OPTAB_DIRECT);
9947 if (tmp != stack_pointer_rtx)
9948 emit_move_insn (stack_pointer_rtx, tmp);
9950 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9951 emit_move_insn (tmp, x);
9954 /* Helper function of ix86_fixup_binary_operands to canonicalize
9955 operand order. Returns true if the operands should be swapped. */
9957 static bool
9958 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
9959 rtx operands[])
9961 rtx dst = operands[0];
9962 rtx src1 = operands[1];
9963 rtx src2 = operands[2];
9965 /* If the operation is not commutative, we can't do anything. */
9966 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9967 return false;
9969 /* Highest priority is that src1 should match dst. */
9970 if (rtx_equal_p (dst, src1))
9971 return false;
9972 if (rtx_equal_p (dst, src2))
9973 return true;
9975 /* Next highest priority is that immediate constants come second. */
9976 if (immediate_operand (src2, mode))
9977 return false;
9978 if (immediate_operand (src1, mode))
9979 return true;
9981 /* Lowest priority is that memory references should come second. */
9982 if (MEM_P (src2))
9983 return false;
9984 if (MEM_P (src1))
9985 return true;
9987 return false;
9991 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9992 destination to use for the operation. If different from the true
9993 destination in operands[0], a copy operation will be required. */
9996 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
9997 rtx operands[])
9999 rtx dst = operands[0];
10000 rtx src1 = operands[1];
10001 rtx src2 = operands[2];
10003 /* Canonicalize operand order. */
10004 if (ix86_swap_binary_operands_p (code, mode, operands))
10006 rtx temp = src1;
10007 src1 = src2;
10008 src2 = temp;
10011 /* Both source operands cannot be in memory. */
10012 if (MEM_P (src1) && MEM_P (src2))
10014 /* Optimization: Only read from memory once. */
10015 if (rtx_equal_p (src1, src2))
10017 src2 = force_reg (mode, src2);
10018 src1 = src2;
10020 else
10021 src2 = force_reg (mode, src2);
10024 /* If the destination is memory, and we do not have matching source
10025 operands, do things in registers. */
10026 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10027 dst = gen_reg_rtx (mode);
10029 /* Source 1 cannot be a constant. */
10030 if (CONSTANT_P (src1))
10031 src1 = force_reg (mode, src1);
10033 /* Source 1 cannot be a non-matching memory. */
10034 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10035 src1 = force_reg (mode, src1);
10037 operands[1] = src1;
10038 operands[2] = src2;
10039 return dst;
10042 /* Similarly, but assume that the destination has already been
10043 set up properly. */
10045 void
10046 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
10047 enum machine_mode mode, rtx operands[])
10049 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
10050 gcc_assert (dst == operands[0]);
10053 /* Attempt to expand a binary operator. Make the expansion closer to the
10054 actual machine, then just general_operand, which will allow 3 separate
10055 memory references (one output, two input) in a single insn. */
10057 void
10058 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
10059 rtx operands[])
10061 rtx src1, src2, dst, op, clob;
10063 dst = ix86_fixup_binary_operands (code, mode, operands);
10064 src1 = operands[1];
10065 src2 = operands[2];
10067 /* Emit the instruction. */
10069 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
10070 if (reload_in_progress)
10072 /* Reload doesn't know about the flags register, and doesn't know that
10073 it doesn't want to clobber it. We can only do this with PLUS. */
10074 gcc_assert (code == PLUS);
10075 emit_insn (op);
10077 else
10079 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10080 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10083 /* Fix up the destination if needed. */
10084 if (dst != operands[0])
10085 emit_move_insn (operands[0], dst);
10088 /* Return TRUE or FALSE depending on whether the binary operator meets the
10089 appropriate constraints. */
10092 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
10093 rtx operands[3])
10095 rtx dst = operands[0];
10096 rtx src1 = operands[1];
10097 rtx src2 = operands[2];
10099 /* Both source operands cannot be in memory. */
10100 if (MEM_P (src1) && MEM_P (src2))
10101 return 0;
10103 /* Canonicalize operand order for commutative operators. */
10104 if (ix86_swap_binary_operands_p (code, mode, operands))
10106 rtx temp = src1;
10107 src1 = src2;
10108 src2 = temp;
10111 /* If the destination is memory, we must have a matching source operand. */
10112 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10113 return 0;
10115 /* Source 1 cannot be a constant. */
10116 if (CONSTANT_P (src1))
10117 return 0;
10119 /* Source 1 cannot be a non-matching memory. */
10120 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10121 return 0;
10123 return 1;
10126 /* Attempt to expand a unary operator. Make the expansion closer to the
10127 actual machine, then just general_operand, which will allow 2 separate
10128 memory references (one output, one input) in a single insn. */
10130 void
10131 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
10132 rtx operands[])
10134 int matching_memory;
10135 rtx src, dst, op, clob;
10137 dst = operands[0];
10138 src = operands[1];
10140 /* If the destination is memory, and we do not have matching source
10141 operands, do things in registers. */
10142 matching_memory = 0;
10143 if (MEM_P (dst))
10145 if (rtx_equal_p (dst, src))
10146 matching_memory = 1;
10147 else
10148 dst = gen_reg_rtx (mode);
10151 /* When source operand is memory, destination must match. */
10152 if (MEM_P (src) && !matching_memory)
10153 src = force_reg (mode, src);
10155 /* Emit the instruction. */
10157 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
10158 if (reload_in_progress || code == NOT)
10160 /* Reload doesn't know about the flags register, and doesn't know that
10161 it doesn't want to clobber it. */
10162 gcc_assert (code == NOT);
10163 emit_insn (op);
10165 else
10167 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10168 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10171 /* Fix up the destination if needed. */
10172 if (dst != operands[0])
10173 emit_move_insn (operands[0], dst);
10176 /* Return TRUE or FALSE depending on whether the unary operator meets the
10177 appropriate constraints. */
10180 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
10181 enum machine_mode mode ATTRIBUTE_UNUSED,
10182 rtx operands[2] ATTRIBUTE_UNUSED)
10184 /* If one of operands is memory, source and destination must match. */
10185 if ((MEM_P (operands[0])
10186 || MEM_P (operands[1]))
10187 && ! rtx_equal_p (operands[0], operands[1]))
10188 return FALSE;
10189 return TRUE;
10192 /* Post-reload splitter for converting an SF or DFmode value in an
10193 SSE register into an unsigned SImode. */
10195 void
10196 ix86_split_convert_uns_si_sse (rtx operands[])
10198 enum machine_mode vecmode;
10199 rtx value, large, zero_or_two31, input, two31, x;
10201 large = operands[1];
10202 zero_or_two31 = operands[2];
10203 input = operands[3];
10204 two31 = operands[4];
10205 vecmode = GET_MODE (large);
10206 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
10208 /* Load up the value into the low element. We must ensure that the other
10209 elements are valid floats -- zero is the easiest such value. */
10210 if (MEM_P (input))
10212 if (vecmode == V4SFmode)
10213 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
10214 else
10215 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
10217 else
10219 input = gen_rtx_REG (vecmode, REGNO (input));
10220 emit_move_insn (value, CONST0_RTX (vecmode));
10221 if (vecmode == V4SFmode)
10222 emit_insn (gen_sse_movss (value, value, input));
10223 else
10224 emit_insn (gen_sse2_movsd (value, value, input));
10227 emit_move_insn (large, two31);
10228 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
10230 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
10231 emit_insn (gen_rtx_SET (VOIDmode, large, x));
10233 x = gen_rtx_AND (vecmode, zero_or_two31, large);
10234 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
10236 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
10237 emit_insn (gen_rtx_SET (VOIDmode, value, x));
10239 large = gen_rtx_REG (V4SImode, REGNO (large));
10240 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
10242 x = gen_rtx_REG (V4SImode, REGNO (value));
10243 if (vecmode == V4SFmode)
10244 emit_insn (gen_sse2_cvttps2dq (x, value));
10245 else
10246 emit_insn (gen_sse2_cvttpd2dq (x, value));
10247 value = x;
10249 emit_insn (gen_xorv4si3 (value, value, large));
10252 /* Convert an unsigned DImode value into a DFmode, using only SSE.
10253 Expects the 64-bit DImode to be supplied in a pair of integral
10254 registers. Requires SSE2; will use SSE3 if available. For x86_32,
10255 -mfpmath=sse, !optimize_size only. */
10257 void
10258 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
10260 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
10261 rtx int_xmm, fp_xmm;
10262 rtx biases, exponents;
10263 rtx x;
10265 int_xmm = gen_reg_rtx (V4SImode);
10266 if (TARGET_INTER_UNIT_MOVES)
10267 emit_insn (gen_movdi_to_sse (int_xmm, input));
10268 else if (TARGET_SSE_SPLIT_REGS)
10270 emit_insn (gen_rtx_CLOBBER (VOIDmode, int_xmm));
10271 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
10273 else
10275 x = gen_reg_rtx (V2DImode);
10276 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
10277 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
10280 x = gen_rtx_CONST_VECTOR (V4SImode,
10281 gen_rtvec (4, GEN_INT (0x43300000UL),
10282 GEN_INT (0x45300000UL),
10283 const0_rtx, const0_rtx));
10284 exponents = validize_mem (force_const_mem (V4SImode, x));
10286 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10287 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
10289 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10290 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10291 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10292 (0x1.0p84 + double(fp_value_hi_xmm)).
10293 Note these exponents differ by 32. */
10295 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
10297 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
10298 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10299 real_ldexp (&bias_lo_rvt, &dconst1, 52);
10300 real_ldexp (&bias_hi_rvt, &dconst1, 84);
10301 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
10302 x = const_double_from_real_value (bias_hi_rvt, DFmode);
10303 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
10304 biases = validize_mem (force_const_mem (V2DFmode, biases));
10305 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
10307 /* Add the upper and lower DFmode values together. */
10308 if (TARGET_SSE3)
10309 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
10310 else
10312 x = copy_to_mode_reg (V2DFmode, fp_xmm);
10313 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
10314 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
10317 ix86_expand_vector_extract (false, target, fp_xmm, 0);
10320 /* Convert an unsigned SImode value into a DFmode. Only currently used
10321 for SSE, but applicable anywhere. */
10323 void
10324 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
10326 REAL_VALUE_TYPE TWO31r;
10327 rtx x, fp;
10329 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
10330 NULL, 1, OPTAB_DIRECT);
10332 fp = gen_reg_rtx (DFmode);
10333 emit_insn (gen_floatsidf2 (fp, x));
10335 real_ldexp (&TWO31r, &dconst1, 31);
10336 x = const_double_from_real_value (TWO31r, DFmode);
10338 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
10339 if (x != target)
10340 emit_move_insn (target, x);
10343 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10344 32-bit mode; otherwise we have a direct convert instruction. */
10346 void
10347 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
10349 REAL_VALUE_TYPE TWO32r;
10350 rtx fp_lo, fp_hi, x;
10352 fp_lo = gen_reg_rtx (DFmode);
10353 fp_hi = gen_reg_rtx (DFmode);
10355 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
10357 real_ldexp (&TWO32r, &dconst1, 32);
10358 x = const_double_from_real_value (TWO32r, DFmode);
10359 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
10361 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
10363 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
10364 0, OPTAB_DIRECT);
10365 if (x != target)
10366 emit_move_insn (target, x);
10369 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10370 For x86_32, -mfpmath=sse, !optimize_size only. */
10371 void
10372 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
10374 REAL_VALUE_TYPE ONE16r;
10375 rtx fp_hi, fp_lo, int_hi, int_lo, x;
10377 real_ldexp (&ONE16r, &dconst1, 16);
10378 x = const_double_from_real_value (ONE16r, SFmode);
10379 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
10380 NULL, 0, OPTAB_DIRECT);
10381 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
10382 NULL, 0, OPTAB_DIRECT);
10383 fp_hi = gen_reg_rtx (SFmode);
10384 fp_lo = gen_reg_rtx (SFmode);
10385 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
10386 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
10387 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
10388 0, OPTAB_DIRECT);
10389 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
10390 0, OPTAB_DIRECT);
10391 if (!rtx_equal_p (target, fp_hi))
10392 emit_move_insn (target, fp_hi);
10395 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10396 then replicate the value for all elements of the vector
10397 register. */
10400 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
10402 rtvec v;
10403 switch (mode)
10405 case SFmode:
10406 if (vect)
10407 v = gen_rtvec (4, value, value, value, value);
10408 else
10409 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
10410 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10411 return gen_rtx_CONST_VECTOR (V4SFmode, v);
10413 case DFmode:
10414 if (vect)
10415 v = gen_rtvec (2, value, value);
10416 else
10417 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
10418 return gen_rtx_CONST_VECTOR (V2DFmode, v);
10420 default:
10421 gcc_unreachable ();
10425 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
10426 Create a mask for the sign bit in MODE for an SSE register. If VECT is
10427 true, then replicate the mask for all elements of the vector register.
10428 If INVERT is true, then create a mask excluding the sign bit. */
10431 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
10433 enum machine_mode vec_mode;
10434 HOST_WIDE_INT hi, lo;
10435 int shift = 63;
10436 rtx v;
10437 rtx mask;
10439 /* Find the sign bit, sign extended to 2*HWI. */
10440 if (mode == SFmode)
10441 lo = 0x80000000, hi = lo < 0;
10442 else if (HOST_BITS_PER_WIDE_INT >= 64)
10443 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
10444 else
10445 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
10447 if (invert)
10448 lo = ~lo, hi = ~hi;
10450 /* Force this value into the low part of a fp vector constant. */
10451 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
10452 mask = gen_lowpart (mode, mask);
10454 v = ix86_build_const_vector (mode, vect, mask);
10455 vec_mode = (mode == SFmode) ? V4SFmode : V2DFmode;
10456 return force_reg (vec_mode, v);
10459 /* Generate code for floating point ABS or NEG. */
10461 void
10462 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
10463 rtx operands[])
10465 rtx mask, set, use, clob, dst, src;
10466 bool matching_memory;
10467 bool use_sse = false;
10468 bool vector_mode = VECTOR_MODE_P (mode);
10469 enum machine_mode elt_mode = mode;
10471 if (vector_mode)
10473 elt_mode = GET_MODE_INNER (mode);
10474 use_sse = true;
10476 else if (TARGET_SSE_MATH)
10477 use_sse = SSE_FLOAT_MODE_P (mode);
10479 /* NEG and ABS performed with SSE use bitwise mask operations.
10480 Create the appropriate mask now. */
10481 if (use_sse)
10482 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
10483 else
10484 mask = NULL_RTX;
10486 dst = operands[0];
10487 src = operands[1];
10489 /* If the destination is memory, and we don't have matching source
10490 operands or we're using the x87, do things in registers. */
10491 matching_memory = false;
10492 if (MEM_P (dst))
10494 if (use_sse && rtx_equal_p (dst, src))
10495 matching_memory = true;
10496 else
10497 dst = gen_reg_rtx (mode);
10499 if (MEM_P (src) && !matching_memory)
10500 src = force_reg (mode, src);
10502 if (vector_mode)
10504 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
10505 set = gen_rtx_SET (VOIDmode, dst, set);
10506 emit_insn (set);
10508 else
10510 set = gen_rtx_fmt_e (code, mode, src);
10511 set = gen_rtx_SET (VOIDmode, dst, set);
10512 if (mask)
10514 use = gen_rtx_USE (VOIDmode, mask);
10515 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10516 emit_insn (gen_rtx_PARALLEL (VOIDmode,
10517 gen_rtvec (3, set, use, clob)));
10519 else
10520 emit_insn (set);
10523 if (dst != operands[0])
10524 emit_move_insn (operands[0], dst);
10527 /* Expand a copysign operation. Special case operand 0 being a constant. */
10529 void
10530 ix86_expand_copysign (rtx operands[])
10532 enum machine_mode mode, vmode;
10533 rtx dest, op0, op1, mask, nmask;
10535 dest = operands[0];
10536 op0 = operands[1];
10537 op1 = operands[2];
10539 mode = GET_MODE (dest);
10540 vmode = mode == SFmode ? V4SFmode : V2DFmode;
10542 if (GET_CODE (op0) == CONST_DOUBLE)
10544 rtvec v;
10546 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
10547 op0 = simplify_unary_operation (ABS, mode, op0, mode);
10549 if (op0 == CONST0_RTX (mode))
10550 op0 = CONST0_RTX (vmode);
10551 else
10553 if (mode == SFmode)
10554 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
10555 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10556 else
10557 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
10558 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
10561 mask = ix86_build_signbit_mask (mode, 0, 0);
10563 if (mode == SFmode)
10564 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
10565 else
10566 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
10568 else
10570 nmask = ix86_build_signbit_mask (mode, 0, 1);
10571 mask = ix86_build_signbit_mask (mode, 0, 0);
10573 if (mode == SFmode)
10574 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
10575 else
10576 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
10580 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
10581 be a constant, and so has already been expanded into a vector constant. */
10583 void
10584 ix86_split_copysign_const (rtx operands[])
10586 enum machine_mode mode, vmode;
10587 rtx dest, op0, op1, mask, x;
10589 dest = operands[0];
10590 op0 = operands[1];
10591 op1 = operands[2];
10592 mask = operands[3];
10594 mode = GET_MODE (dest);
10595 vmode = GET_MODE (mask);
10597 dest = simplify_gen_subreg (vmode, dest, mode, 0);
10598 x = gen_rtx_AND (vmode, dest, mask);
10599 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10601 if (op0 != CONST0_RTX (vmode))
10603 x = gen_rtx_IOR (vmode, dest, op0);
10604 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10608 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
10609 so we have to do two masks. */
10611 void
10612 ix86_split_copysign_var (rtx operands[])
10614 enum machine_mode mode, vmode;
10615 rtx dest, scratch, op0, op1, mask, nmask, x;
10617 dest = operands[0];
10618 scratch = operands[1];
10619 op0 = operands[2];
10620 op1 = operands[3];
10621 nmask = operands[4];
10622 mask = operands[5];
10624 mode = GET_MODE (dest);
10625 vmode = GET_MODE (mask);
10627 if (rtx_equal_p (op0, op1))
10629 /* Shouldn't happen often (it's useless, obviously), but when it does
10630 we'd generate incorrect code if we continue below. */
10631 emit_move_insn (dest, op0);
10632 return;
10635 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
10637 gcc_assert (REGNO (op1) == REGNO (scratch));
10639 x = gen_rtx_AND (vmode, scratch, mask);
10640 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10642 dest = mask;
10643 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10644 x = gen_rtx_NOT (vmode, dest);
10645 x = gen_rtx_AND (vmode, x, op0);
10646 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10648 else
10650 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
10652 x = gen_rtx_AND (vmode, scratch, mask);
10654 else /* alternative 2,4 */
10656 gcc_assert (REGNO (mask) == REGNO (scratch));
10657 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
10658 x = gen_rtx_AND (vmode, scratch, op1);
10660 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10662 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
10664 dest = simplify_gen_subreg (vmode, op0, mode, 0);
10665 x = gen_rtx_AND (vmode, dest, nmask);
10667 else /* alternative 3,4 */
10669 gcc_assert (REGNO (nmask) == REGNO (dest));
10670 dest = nmask;
10671 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10672 x = gen_rtx_AND (vmode, dest, op0);
10674 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10677 x = gen_rtx_IOR (vmode, dest, scratch);
10678 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10681 /* Return TRUE or FALSE depending on whether the first SET in INSN
10682 has source and destination with matching CC modes, and that the
10683 CC mode is at least as constrained as REQ_MODE. */
10686 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
10688 rtx set;
10689 enum machine_mode set_mode;
10691 set = PATTERN (insn);
10692 if (GET_CODE (set) == PARALLEL)
10693 set = XVECEXP (set, 0, 0);
10694 gcc_assert (GET_CODE (set) == SET);
10695 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
10697 set_mode = GET_MODE (SET_DEST (set));
10698 switch (set_mode)
10700 case CCNOmode:
10701 if (req_mode != CCNOmode
10702 && (req_mode != CCmode
10703 || XEXP (SET_SRC (set), 1) != const0_rtx))
10704 return 0;
10705 break;
10706 case CCmode:
10707 if (req_mode == CCGCmode)
10708 return 0;
10709 /* FALLTHRU */
10710 case CCGCmode:
10711 if (req_mode == CCGOCmode || req_mode == CCNOmode)
10712 return 0;
10713 /* FALLTHRU */
10714 case CCGOCmode:
10715 if (req_mode == CCZmode)
10716 return 0;
10717 /* FALLTHRU */
10718 case CCZmode:
10719 break;
10721 default:
10722 gcc_unreachable ();
10725 return (GET_MODE (SET_SRC (set)) == set_mode);
10728 /* Generate insn patterns to do an integer compare of OPERANDS. */
10730 static rtx
10731 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
10733 enum machine_mode cmpmode;
10734 rtx tmp, flags;
10736 cmpmode = SELECT_CC_MODE (code, op0, op1);
10737 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
10739 /* This is very simple, but making the interface the same as in the
10740 FP case makes the rest of the code easier. */
10741 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
10742 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
10744 /* Return the test that should be put into the flags user, i.e.
10745 the bcc, scc, or cmov instruction. */
10746 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
10749 /* Figure out whether to use ordered or unordered fp comparisons.
10750 Return the appropriate mode to use. */
10752 enum machine_mode
10753 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
10755 /* ??? In order to make all comparisons reversible, we do all comparisons
10756 non-trapping when compiling for IEEE. Once gcc is able to distinguish
10757 all forms trapping and nontrapping comparisons, we can make inequality
10758 comparisons trapping again, since it results in better code when using
10759 FCOM based compares. */
10760 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
10763 enum machine_mode
10764 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
10766 enum machine_mode mode = GET_MODE (op0);
10768 if (SCALAR_FLOAT_MODE_P (mode))
10770 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
10771 return ix86_fp_compare_mode (code);
10774 switch (code)
10776 /* Only zero flag is needed. */
10777 case EQ: /* ZF=0 */
10778 case NE: /* ZF!=0 */
10779 return CCZmode;
10780 /* Codes needing carry flag. */
10781 case GEU: /* CF=0 */
10782 case GTU: /* CF=0 & ZF=0 */
10783 case LTU: /* CF=1 */
10784 case LEU: /* CF=1 | ZF=1 */
10785 return CCmode;
10786 /* Codes possibly doable only with sign flag when
10787 comparing against zero. */
10788 case GE: /* SF=OF or SF=0 */
10789 case LT: /* SF<>OF or SF=1 */
10790 if (op1 == const0_rtx)
10791 return CCGOCmode;
10792 else
10793 /* For other cases Carry flag is not required. */
10794 return CCGCmode;
10795 /* Codes doable only with sign flag when comparing
10796 against zero, but we miss jump instruction for it
10797 so we need to use relational tests against overflow
10798 that thus needs to be zero. */
10799 case GT: /* ZF=0 & SF=OF */
10800 case LE: /* ZF=1 | SF<>OF */
10801 if (op1 == const0_rtx)
10802 return CCNOmode;
10803 else
10804 return CCGCmode;
10805 /* strcmp pattern do (use flags) and combine may ask us for proper
10806 mode. */
10807 case USE:
10808 return CCmode;
10809 default:
10810 gcc_unreachable ();
10814 /* Return the fixed registers used for condition codes. */
10816 static bool
10817 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
10819 *p1 = FLAGS_REG;
10820 *p2 = FPSR_REG;
10821 return true;
10824 /* If two condition code modes are compatible, return a condition code
10825 mode which is compatible with both. Otherwise, return
10826 VOIDmode. */
10828 static enum machine_mode
10829 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
10831 if (m1 == m2)
10832 return m1;
10834 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
10835 return VOIDmode;
10837 if ((m1 == CCGCmode && m2 == CCGOCmode)
10838 || (m1 == CCGOCmode && m2 == CCGCmode))
10839 return CCGCmode;
10841 switch (m1)
10843 default:
10844 gcc_unreachable ();
10846 case CCmode:
10847 case CCGCmode:
10848 case CCGOCmode:
10849 case CCNOmode:
10850 case CCZmode:
10851 switch (m2)
10853 default:
10854 return VOIDmode;
10856 case CCmode:
10857 case CCGCmode:
10858 case CCGOCmode:
10859 case CCNOmode:
10860 case CCZmode:
10861 return CCmode;
10864 case CCFPmode:
10865 case CCFPUmode:
10866 /* These are only compatible with themselves, which we already
10867 checked above. */
10868 return VOIDmode;
10872 /* Split comparison code CODE into comparisons we can do using branch
10873 instructions. BYPASS_CODE is comparison code for branch that will
10874 branch around FIRST_CODE and SECOND_CODE. If some of branches
10875 is not required, set value to UNKNOWN.
10876 We never require more than two branches. */
10878 void
10879 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
10880 enum rtx_code *first_code,
10881 enum rtx_code *second_code)
10883 *first_code = code;
10884 *bypass_code = UNKNOWN;
10885 *second_code = UNKNOWN;
10887 /* The fcomi comparison sets flags as follows:
10889 cmp ZF PF CF
10890 > 0 0 0
10891 < 0 0 1
10892 = 1 0 0
10893 un 1 1 1 */
10895 switch (code)
10897 case GT: /* GTU - CF=0 & ZF=0 */
10898 case GE: /* GEU - CF=0 */
10899 case ORDERED: /* PF=0 */
10900 case UNORDERED: /* PF=1 */
10901 case UNEQ: /* EQ - ZF=1 */
10902 case UNLT: /* LTU - CF=1 */
10903 case UNLE: /* LEU - CF=1 | ZF=1 */
10904 case LTGT: /* EQ - ZF=0 */
10905 break;
10906 case LT: /* LTU - CF=1 - fails on unordered */
10907 *first_code = UNLT;
10908 *bypass_code = UNORDERED;
10909 break;
10910 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
10911 *first_code = UNLE;
10912 *bypass_code = UNORDERED;
10913 break;
10914 case EQ: /* EQ - ZF=1 - fails on unordered */
10915 *first_code = UNEQ;
10916 *bypass_code = UNORDERED;
10917 break;
10918 case NE: /* NE - ZF=0 - fails on unordered */
10919 *first_code = LTGT;
10920 *second_code = UNORDERED;
10921 break;
10922 case UNGE: /* GEU - CF=0 - fails on unordered */
10923 *first_code = GE;
10924 *second_code = UNORDERED;
10925 break;
10926 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
10927 *first_code = GT;
10928 *second_code = UNORDERED;
10929 break;
10930 default:
10931 gcc_unreachable ();
10933 if (!TARGET_IEEE_FP)
10935 *second_code = UNKNOWN;
10936 *bypass_code = UNKNOWN;
10940 /* Return cost of comparison done fcom + arithmetics operations on AX.
10941 All following functions do use number of instructions as a cost metrics.
10942 In future this should be tweaked to compute bytes for optimize_size and
10943 take into account performance of various instructions on various CPUs. */
10944 static int
10945 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10947 if (!TARGET_IEEE_FP)
10948 return 4;
10949 /* The cost of code output by ix86_expand_fp_compare. */
10950 switch (code)
10952 case UNLE:
10953 case UNLT:
10954 case LTGT:
10955 case GT:
10956 case GE:
10957 case UNORDERED:
10958 case ORDERED:
10959 case UNEQ:
10960 return 4;
10961 break;
10962 case LT:
10963 case NE:
10964 case EQ:
10965 case UNGE:
10966 return 5;
10967 break;
10968 case LE:
10969 case UNGT:
10970 return 6;
10971 break;
10972 default:
10973 gcc_unreachable ();
10977 /* Return cost of comparison done using fcomi operation.
10978 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10979 static int
10980 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10982 enum rtx_code bypass_code, first_code, second_code;
10983 /* Return arbitrarily high cost when instruction is not supported - this
10984 prevents gcc from using it. */
10985 if (!TARGET_CMOVE)
10986 return 1024;
10987 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10988 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10991 /* Return cost of comparison done using sahf operation.
10992 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10993 static int
10994 ix86_fp_comparison_sahf_cost (enum rtx_code code)
10996 enum rtx_code bypass_code, first_code, second_code;
10997 /* Return arbitrarily high cost when instruction is not preferred - this
10998 avoids gcc from using it. */
10999 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_size)))
11000 return 1024;
11001 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11002 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
11005 /* Compute cost of the comparison done using any method.
11006 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11007 static int
11008 ix86_fp_comparison_cost (enum rtx_code code)
11010 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
11011 int min;
11013 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
11014 sahf_cost = ix86_fp_comparison_sahf_cost (code);
11016 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
11017 if (min > sahf_cost)
11018 min = sahf_cost;
11019 if (min > fcomi_cost)
11020 min = fcomi_cost;
11021 return min;
11024 /* Return true if we should use an FCOMI instruction for this
11025 fp comparison. */
11028 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
11030 enum rtx_code swapped_code = swap_condition (code);
11032 return ((ix86_fp_comparison_cost (code)
11033 == ix86_fp_comparison_fcomi_cost (code))
11034 || (ix86_fp_comparison_cost (swapped_code)
11035 == ix86_fp_comparison_fcomi_cost (swapped_code)));
11038 /* Swap, force into registers, or otherwise massage the two operands
11039 to a fp comparison. The operands are updated in place; the new
11040 comparison code is returned. */
11042 static enum rtx_code
11043 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
11045 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
11046 rtx op0 = *pop0, op1 = *pop1;
11047 enum machine_mode op_mode = GET_MODE (op0);
11048 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
11050 /* All of the unordered compare instructions only work on registers.
11051 The same is true of the fcomi compare instructions. The XFmode
11052 compare instructions require registers except when comparing
11053 against zero or when converting operand 1 from fixed point to
11054 floating point. */
11056 if (!is_sse
11057 && (fpcmp_mode == CCFPUmode
11058 || (op_mode == XFmode
11059 && ! (standard_80387_constant_p (op0) == 1
11060 || standard_80387_constant_p (op1) == 1)
11061 && GET_CODE (op1) != FLOAT)
11062 || ix86_use_fcomi_compare (code)))
11064 op0 = force_reg (op_mode, op0);
11065 op1 = force_reg (op_mode, op1);
11067 else
11069 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
11070 things around if they appear profitable, otherwise force op0
11071 into a register. */
11073 if (standard_80387_constant_p (op0) == 0
11074 || (MEM_P (op0)
11075 && ! (standard_80387_constant_p (op1) == 0
11076 || MEM_P (op1))))
11078 rtx tmp;
11079 tmp = op0, op0 = op1, op1 = tmp;
11080 code = swap_condition (code);
11083 if (!REG_P (op0))
11084 op0 = force_reg (op_mode, op0);
11086 if (CONSTANT_P (op1))
11088 int tmp = standard_80387_constant_p (op1);
11089 if (tmp == 0)
11090 op1 = validize_mem (force_const_mem (op_mode, op1));
11091 else if (tmp == 1)
11093 if (TARGET_CMOVE)
11094 op1 = force_reg (op_mode, op1);
11096 else
11097 op1 = force_reg (op_mode, op1);
11101 /* Try to rearrange the comparison to make it cheaper. */
11102 if (ix86_fp_comparison_cost (code)
11103 > ix86_fp_comparison_cost (swap_condition (code))
11104 && (REG_P (op1) || !no_new_pseudos))
11106 rtx tmp;
11107 tmp = op0, op0 = op1, op1 = tmp;
11108 code = swap_condition (code);
11109 if (!REG_P (op0))
11110 op0 = force_reg (op_mode, op0);
11113 *pop0 = op0;
11114 *pop1 = op1;
11115 return code;
11118 /* Convert comparison codes we use to represent FP comparison to integer
11119 code that will result in proper branch. Return UNKNOWN if no such code
11120 is available. */
11122 enum rtx_code
11123 ix86_fp_compare_code_to_integer (enum rtx_code code)
11125 switch (code)
11127 case GT:
11128 return GTU;
11129 case GE:
11130 return GEU;
11131 case ORDERED:
11132 case UNORDERED:
11133 return code;
11134 break;
11135 case UNEQ:
11136 return EQ;
11137 break;
11138 case UNLT:
11139 return LTU;
11140 break;
11141 case UNLE:
11142 return LEU;
11143 break;
11144 case LTGT:
11145 return NE;
11146 break;
11147 default:
11148 return UNKNOWN;
11152 /* Generate insn patterns to do a floating point compare of OPERANDS. */
11154 static rtx
11155 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
11156 rtx *second_test, rtx *bypass_test)
11158 enum machine_mode fpcmp_mode, intcmp_mode;
11159 rtx tmp, tmp2;
11160 int cost = ix86_fp_comparison_cost (code);
11161 enum rtx_code bypass_code, first_code, second_code;
11163 fpcmp_mode = ix86_fp_compare_mode (code);
11164 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
11166 if (second_test)
11167 *second_test = NULL_RTX;
11168 if (bypass_test)
11169 *bypass_test = NULL_RTX;
11171 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11173 /* Do fcomi/sahf based test when profitable. */
11174 if ((TARGET_CMOVE || TARGET_SAHF)
11175 && (bypass_code == UNKNOWN || bypass_test)
11176 && (second_code == UNKNOWN || second_test)
11177 && ix86_fp_comparison_arithmetics_cost (code) > cost)
11179 if (TARGET_CMOVE)
11181 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11182 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
11183 tmp);
11184 emit_insn (tmp);
11186 else
11188 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11189 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11190 if (!scratch)
11191 scratch = gen_reg_rtx (HImode);
11192 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11193 emit_insn (gen_x86_sahf_1 (scratch));
11196 /* The FP codes work out to act like unsigned. */
11197 intcmp_mode = fpcmp_mode;
11198 code = first_code;
11199 if (bypass_code != UNKNOWN)
11200 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
11201 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11202 const0_rtx);
11203 if (second_code != UNKNOWN)
11204 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
11205 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11206 const0_rtx);
11208 else
11210 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
11211 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11212 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11213 if (!scratch)
11214 scratch = gen_reg_rtx (HImode);
11215 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11217 /* In the unordered case, we have to check C2 for NaN's, which
11218 doesn't happen to work out to anything nice combination-wise.
11219 So do some bit twiddling on the value we've got in AH to come
11220 up with an appropriate set of condition codes. */
11222 intcmp_mode = CCNOmode;
11223 switch (code)
11225 case GT:
11226 case UNGT:
11227 if (code == GT || !TARGET_IEEE_FP)
11229 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11230 code = EQ;
11232 else
11234 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11235 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11236 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
11237 intcmp_mode = CCmode;
11238 code = GEU;
11240 break;
11241 case LT:
11242 case UNLT:
11243 if (code == LT && TARGET_IEEE_FP)
11245 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11246 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
11247 intcmp_mode = CCmode;
11248 code = EQ;
11250 else
11252 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
11253 code = NE;
11255 break;
11256 case GE:
11257 case UNGE:
11258 if (code == GE || !TARGET_IEEE_FP)
11260 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
11261 code = EQ;
11263 else
11265 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11266 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11267 GEN_INT (0x01)));
11268 code = NE;
11270 break;
11271 case LE:
11272 case UNLE:
11273 if (code == LE && TARGET_IEEE_FP)
11275 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11276 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11277 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11278 intcmp_mode = CCmode;
11279 code = LTU;
11281 else
11283 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11284 code = NE;
11286 break;
11287 case EQ:
11288 case UNEQ:
11289 if (code == EQ && TARGET_IEEE_FP)
11291 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11292 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11293 intcmp_mode = CCmode;
11294 code = EQ;
11296 else
11298 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11299 code = NE;
11300 break;
11302 break;
11303 case NE:
11304 case LTGT:
11305 if (code == NE && TARGET_IEEE_FP)
11307 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11308 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11309 GEN_INT (0x40)));
11310 code = NE;
11312 else
11314 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11315 code = EQ;
11317 break;
11319 case UNORDERED:
11320 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11321 code = NE;
11322 break;
11323 case ORDERED:
11324 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11325 code = EQ;
11326 break;
11328 default:
11329 gcc_unreachable ();
11333 /* Return the test that should be put into the flags user, i.e.
11334 the bcc, scc, or cmov instruction. */
11335 return gen_rtx_fmt_ee (code, VOIDmode,
11336 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11337 const0_rtx);
11341 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
11343 rtx op0, op1, ret;
11344 op0 = ix86_compare_op0;
11345 op1 = ix86_compare_op1;
11347 if (second_test)
11348 *second_test = NULL_RTX;
11349 if (bypass_test)
11350 *bypass_test = NULL_RTX;
11352 if (ix86_compare_emitted)
11354 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
11355 ix86_compare_emitted = NULL_RTX;
11357 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
11359 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
11360 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11361 second_test, bypass_test);
11363 else
11364 ret = ix86_expand_int_compare (code, op0, op1);
11366 return ret;
11369 /* Return true if the CODE will result in nontrivial jump sequence. */
11370 bool
11371 ix86_fp_jump_nontrivial_p (enum rtx_code code)
11373 enum rtx_code bypass_code, first_code, second_code;
11374 if (!TARGET_CMOVE)
11375 return true;
11376 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11377 return bypass_code != UNKNOWN || second_code != UNKNOWN;
11380 void
11381 ix86_expand_branch (enum rtx_code code, rtx label)
11383 rtx tmp;
11385 /* If we have emitted a compare insn, go straight to simple.
11386 ix86_expand_compare won't emit anything if ix86_compare_emitted
11387 is non NULL. */
11388 if (ix86_compare_emitted)
11389 goto simple;
11391 switch (GET_MODE (ix86_compare_op0))
11393 case QImode:
11394 case HImode:
11395 case SImode:
11396 simple:
11397 tmp = ix86_expand_compare (code, NULL, NULL);
11398 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11399 gen_rtx_LABEL_REF (VOIDmode, label),
11400 pc_rtx);
11401 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11402 return;
11404 case SFmode:
11405 case DFmode:
11406 case XFmode:
11408 rtvec vec;
11409 int use_fcomi;
11410 enum rtx_code bypass_code, first_code, second_code;
11412 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
11413 &ix86_compare_op1);
11415 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11417 /* Check whether we will use the natural sequence with one jump. If
11418 so, we can expand jump early. Otherwise delay expansion by
11419 creating compound insn to not confuse optimizers. */
11420 if (bypass_code == UNKNOWN && second_code == UNKNOWN
11421 && TARGET_CMOVE)
11423 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
11424 gen_rtx_LABEL_REF (VOIDmode, label),
11425 pc_rtx, NULL_RTX, NULL_RTX);
11427 else
11429 tmp = gen_rtx_fmt_ee (code, VOIDmode,
11430 ix86_compare_op0, ix86_compare_op1);
11431 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11432 gen_rtx_LABEL_REF (VOIDmode, label),
11433 pc_rtx);
11434 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
11436 use_fcomi = ix86_use_fcomi_compare (code);
11437 vec = rtvec_alloc (3 + !use_fcomi);
11438 RTVEC_ELT (vec, 0) = tmp;
11439 RTVEC_ELT (vec, 1)
11440 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
11441 RTVEC_ELT (vec, 2)
11442 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
11443 if (! use_fcomi)
11444 RTVEC_ELT (vec, 3)
11445 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
11447 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
11449 return;
11452 case DImode:
11453 if (TARGET_64BIT)
11454 goto simple;
11455 case TImode:
11456 /* Expand DImode branch into multiple compare+branch. */
11458 rtx lo[2], hi[2], label2;
11459 enum rtx_code code1, code2, code3;
11460 enum machine_mode submode;
11462 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
11464 tmp = ix86_compare_op0;
11465 ix86_compare_op0 = ix86_compare_op1;
11466 ix86_compare_op1 = tmp;
11467 code = swap_condition (code);
11469 if (GET_MODE (ix86_compare_op0) == DImode)
11471 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
11472 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
11473 submode = SImode;
11475 else
11477 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
11478 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
11479 submode = DImode;
11482 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
11483 avoid two branches. This costs one extra insn, so disable when
11484 optimizing for size. */
11486 if ((code == EQ || code == NE)
11487 && (!optimize_size
11488 || hi[1] == const0_rtx || lo[1] == const0_rtx))
11490 rtx xor0, xor1;
11492 xor1 = hi[0];
11493 if (hi[1] != const0_rtx)
11494 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
11495 NULL_RTX, 0, OPTAB_WIDEN);
11497 xor0 = lo[0];
11498 if (lo[1] != const0_rtx)
11499 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
11500 NULL_RTX, 0, OPTAB_WIDEN);
11502 tmp = expand_binop (submode, ior_optab, xor1, xor0,
11503 NULL_RTX, 0, OPTAB_WIDEN);
11505 ix86_compare_op0 = tmp;
11506 ix86_compare_op1 = const0_rtx;
11507 ix86_expand_branch (code, label);
11508 return;
11511 /* Otherwise, if we are doing less-than or greater-or-equal-than,
11512 op1 is a constant and the low word is zero, then we can just
11513 examine the high word. */
11515 if (CONST_INT_P (hi[1]) && lo[1] == const0_rtx)
11516 switch (code)
11518 case LT: case LTU: case GE: case GEU:
11519 ix86_compare_op0 = hi[0];
11520 ix86_compare_op1 = hi[1];
11521 ix86_expand_branch (code, label);
11522 return;
11523 default:
11524 break;
11527 /* Otherwise, we need two or three jumps. */
11529 label2 = gen_label_rtx ();
11531 code1 = code;
11532 code2 = swap_condition (code);
11533 code3 = unsigned_condition (code);
11535 switch (code)
11537 case LT: case GT: case LTU: case GTU:
11538 break;
11540 case LE: code1 = LT; code2 = GT; break;
11541 case GE: code1 = GT; code2 = LT; break;
11542 case LEU: code1 = LTU; code2 = GTU; break;
11543 case GEU: code1 = GTU; code2 = LTU; break;
11545 case EQ: code1 = UNKNOWN; code2 = NE; break;
11546 case NE: code2 = UNKNOWN; break;
11548 default:
11549 gcc_unreachable ();
11553 * a < b =>
11554 * if (hi(a) < hi(b)) goto true;
11555 * if (hi(a) > hi(b)) goto false;
11556 * if (lo(a) < lo(b)) goto true;
11557 * false:
11560 ix86_compare_op0 = hi[0];
11561 ix86_compare_op1 = hi[1];
11563 if (code1 != UNKNOWN)
11564 ix86_expand_branch (code1, label);
11565 if (code2 != UNKNOWN)
11566 ix86_expand_branch (code2, label2);
11568 ix86_compare_op0 = lo[0];
11569 ix86_compare_op1 = lo[1];
11570 ix86_expand_branch (code3, label);
11572 if (code2 != UNKNOWN)
11573 emit_label (label2);
11574 return;
11577 default:
11578 gcc_unreachable ();
11582 /* Split branch based on floating point condition. */
11583 void
11584 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
11585 rtx target1, rtx target2, rtx tmp, rtx pushed)
11587 rtx second, bypass;
11588 rtx label = NULL_RTX;
11589 rtx condition;
11590 int bypass_probability = -1, second_probability = -1, probability = -1;
11591 rtx i;
11593 if (target2 != pc_rtx)
11595 rtx tmp = target2;
11596 code = reverse_condition_maybe_unordered (code);
11597 target2 = target1;
11598 target1 = tmp;
11601 condition = ix86_expand_fp_compare (code, op1, op2,
11602 tmp, &second, &bypass);
11604 /* Remove pushed operand from stack. */
11605 if (pushed)
11606 ix86_free_from_memory (GET_MODE (pushed));
11608 if (split_branch_probability >= 0)
11610 /* Distribute the probabilities across the jumps.
11611 Assume the BYPASS and SECOND to be always test
11612 for UNORDERED. */
11613 probability = split_branch_probability;
11615 /* Value of 1 is low enough to make no need for probability
11616 to be updated. Later we may run some experiments and see
11617 if unordered values are more frequent in practice. */
11618 if (bypass)
11619 bypass_probability = 1;
11620 if (second)
11621 second_probability = 1;
11623 if (bypass != NULL_RTX)
11625 label = gen_label_rtx ();
11626 i = emit_jump_insn (gen_rtx_SET
11627 (VOIDmode, pc_rtx,
11628 gen_rtx_IF_THEN_ELSE (VOIDmode,
11629 bypass,
11630 gen_rtx_LABEL_REF (VOIDmode,
11631 label),
11632 pc_rtx)));
11633 if (bypass_probability >= 0)
11634 REG_NOTES (i)
11635 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11636 GEN_INT (bypass_probability),
11637 REG_NOTES (i));
11639 i = emit_jump_insn (gen_rtx_SET
11640 (VOIDmode, pc_rtx,
11641 gen_rtx_IF_THEN_ELSE (VOIDmode,
11642 condition, target1, target2)));
11643 if (probability >= 0)
11644 REG_NOTES (i)
11645 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11646 GEN_INT (probability),
11647 REG_NOTES (i));
11648 if (second != NULL_RTX)
11650 i = emit_jump_insn (gen_rtx_SET
11651 (VOIDmode, pc_rtx,
11652 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
11653 target2)));
11654 if (second_probability >= 0)
11655 REG_NOTES (i)
11656 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11657 GEN_INT (second_probability),
11658 REG_NOTES (i));
11660 if (label != NULL_RTX)
11661 emit_label (label);
11665 ix86_expand_setcc (enum rtx_code code, rtx dest)
11667 rtx ret, tmp, tmpreg, equiv;
11668 rtx second_test, bypass_test;
11670 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
11671 return 0; /* FAIL */
11673 gcc_assert (GET_MODE (dest) == QImode);
11675 ret = ix86_expand_compare (code, &second_test, &bypass_test);
11676 PUT_MODE (ret, QImode);
11678 tmp = dest;
11679 tmpreg = dest;
11681 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
11682 if (bypass_test || second_test)
11684 rtx test = second_test;
11685 int bypass = 0;
11686 rtx tmp2 = gen_reg_rtx (QImode);
11687 if (bypass_test)
11689 gcc_assert (!second_test);
11690 test = bypass_test;
11691 bypass = 1;
11692 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
11694 PUT_MODE (test, QImode);
11695 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
11697 if (bypass)
11698 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
11699 else
11700 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
11703 /* Attach a REG_EQUAL note describing the comparison result. */
11704 if (ix86_compare_op0 && ix86_compare_op1)
11706 equiv = simplify_gen_relational (code, QImode,
11707 GET_MODE (ix86_compare_op0),
11708 ix86_compare_op0, ix86_compare_op1);
11709 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
11712 return 1; /* DONE */
11715 /* Expand comparison setting or clearing carry flag. Return true when
11716 successful and set pop for the operation. */
11717 static bool
11718 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
11720 enum machine_mode mode =
11721 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
11723 /* Do not handle DImode compares that go through special path.
11724 Also we can't deal with FP compares yet. This is possible to add. */
11725 if (mode == (TARGET_64BIT ? TImode : DImode))
11726 return false;
11728 if (SCALAR_FLOAT_MODE_P (mode))
11730 rtx second_test = NULL, bypass_test = NULL;
11731 rtx compare_op, compare_seq;
11733 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
11735 /* Shortcut: following common codes never translate
11736 into carry flag compares. */
11737 if (code == EQ || code == NE || code == UNEQ || code == LTGT
11738 || code == ORDERED || code == UNORDERED)
11739 return false;
11741 /* These comparisons require zero flag; swap operands so they won't. */
11742 if ((code == GT || code == UNLE || code == LE || code == UNGT)
11743 && !TARGET_IEEE_FP)
11745 rtx tmp = op0;
11746 op0 = op1;
11747 op1 = tmp;
11748 code = swap_condition (code);
11751 /* Try to expand the comparison and verify that we end up with carry flag
11752 based comparison. This is fails to be true only when we decide to expand
11753 comparison using arithmetic that is not too common scenario. */
11754 start_sequence ();
11755 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11756 &second_test, &bypass_test);
11757 compare_seq = get_insns ();
11758 end_sequence ();
11760 if (second_test || bypass_test)
11761 return false;
11762 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11763 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11764 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
11765 else
11766 code = GET_CODE (compare_op);
11767 if (code != LTU && code != GEU)
11768 return false;
11769 emit_insn (compare_seq);
11770 *pop = compare_op;
11771 return true;
11773 if (!INTEGRAL_MODE_P (mode))
11774 return false;
11775 switch (code)
11777 case LTU:
11778 case GEU:
11779 break;
11781 /* Convert a==0 into (unsigned)a<1. */
11782 case EQ:
11783 case NE:
11784 if (op1 != const0_rtx)
11785 return false;
11786 op1 = const1_rtx;
11787 code = (code == EQ ? LTU : GEU);
11788 break;
11790 /* Convert a>b into b<a or a>=b-1. */
11791 case GTU:
11792 case LEU:
11793 if (CONST_INT_P (op1))
11795 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
11796 /* Bail out on overflow. We still can swap operands but that
11797 would force loading of the constant into register. */
11798 if (op1 == const0_rtx
11799 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
11800 return false;
11801 code = (code == GTU ? GEU : LTU);
11803 else
11805 rtx tmp = op1;
11806 op1 = op0;
11807 op0 = tmp;
11808 code = (code == GTU ? LTU : GEU);
11810 break;
11812 /* Convert a>=0 into (unsigned)a<0x80000000. */
11813 case LT:
11814 case GE:
11815 if (mode == DImode || op1 != const0_rtx)
11816 return false;
11817 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11818 code = (code == LT ? GEU : LTU);
11819 break;
11820 case LE:
11821 case GT:
11822 if (mode == DImode || op1 != constm1_rtx)
11823 return false;
11824 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11825 code = (code == LE ? GEU : LTU);
11826 break;
11828 default:
11829 return false;
11831 /* Swapping operands may cause constant to appear as first operand. */
11832 if (!nonimmediate_operand (op0, VOIDmode))
11834 if (no_new_pseudos)
11835 return false;
11836 op0 = force_reg (mode, op0);
11838 ix86_compare_op0 = op0;
11839 ix86_compare_op1 = op1;
11840 *pop = ix86_expand_compare (code, NULL, NULL);
11841 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
11842 return true;
11846 ix86_expand_int_movcc (rtx operands[])
11848 enum rtx_code code = GET_CODE (operands[1]), compare_code;
11849 rtx compare_seq, compare_op;
11850 rtx second_test, bypass_test;
11851 enum machine_mode mode = GET_MODE (operands[0]);
11852 bool sign_bit_compare_p = false;;
11854 start_sequence ();
11855 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11856 compare_seq = get_insns ();
11857 end_sequence ();
11859 compare_code = GET_CODE (compare_op);
11861 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
11862 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
11863 sign_bit_compare_p = true;
11865 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
11866 HImode insns, we'd be swallowed in word prefix ops. */
11868 if ((mode != HImode || TARGET_FAST_PREFIX)
11869 && (mode != (TARGET_64BIT ? TImode : DImode))
11870 && CONST_INT_P (operands[2])
11871 && CONST_INT_P (operands[3]))
11873 rtx out = operands[0];
11874 HOST_WIDE_INT ct = INTVAL (operands[2]);
11875 HOST_WIDE_INT cf = INTVAL (operands[3]);
11876 HOST_WIDE_INT diff;
11878 diff = ct - cf;
11879 /* Sign bit compares are better done using shifts than we do by using
11880 sbb. */
11881 if (sign_bit_compare_p
11882 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11883 ix86_compare_op1, &compare_op))
11885 /* Detect overlap between destination and compare sources. */
11886 rtx tmp = out;
11888 if (!sign_bit_compare_p)
11890 bool fpcmp = false;
11892 compare_code = GET_CODE (compare_op);
11894 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11895 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11897 fpcmp = true;
11898 compare_code = ix86_fp_compare_code_to_integer (compare_code);
11901 /* To simplify rest of code, restrict to the GEU case. */
11902 if (compare_code == LTU)
11904 HOST_WIDE_INT tmp = ct;
11905 ct = cf;
11906 cf = tmp;
11907 compare_code = reverse_condition (compare_code);
11908 code = reverse_condition (code);
11910 else
11912 if (fpcmp)
11913 PUT_CODE (compare_op,
11914 reverse_condition_maybe_unordered
11915 (GET_CODE (compare_op)));
11916 else
11917 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11919 diff = ct - cf;
11921 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
11922 || reg_overlap_mentioned_p (out, ix86_compare_op1))
11923 tmp = gen_reg_rtx (mode);
11925 if (mode == DImode)
11926 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
11927 else
11928 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
11930 else
11932 if (code == GT || code == GE)
11933 code = reverse_condition (code);
11934 else
11936 HOST_WIDE_INT tmp = ct;
11937 ct = cf;
11938 cf = tmp;
11939 diff = ct - cf;
11941 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
11942 ix86_compare_op1, VOIDmode, 0, -1);
11945 if (diff == 1)
11948 * cmpl op0,op1
11949 * sbbl dest,dest
11950 * [addl dest, ct]
11952 * Size 5 - 8.
11954 if (ct)
11955 tmp = expand_simple_binop (mode, PLUS,
11956 tmp, GEN_INT (ct),
11957 copy_rtx (tmp), 1, OPTAB_DIRECT);
11959 else if (cf == -1)
11962 * cmpl op0,op1
11963 * sbbl dest,dest
11964 * orl $ct, dest
11966 * Size 8.
11968 tmp = expand_simple_binop (mode, IOR,
11969 tmp, GEN_INT (ct),
11970 copy_rtx (tmp), 1, OPTAB_DIRECT);
11972 else if (diff == -1 && ct)
11975 * cmpl op0,op1
11976 * sbbl dest,dest
11977 * notl dest
11978 * [addl dest, cf]
11980 * Size 8 - 11.
11982 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11983 if (cf)
11984 tmp = expand_simple_binop (mode, PLUS,
11985 copy_rtx (tmp), GEN_INT (cf),
11986 copy_rtx (tmp), 1, OPTAB_DIRECT);
11988 else
11991 * cmpl op0,op1
11992 * sbbl dest,dest
11993 * [notl dest]
11994 * andl cf - ct, dest
11995 * [addl dest, ct]
11997 * Size 8 - 11.
12000 if (cf == 0)
12002 cf = ct;
12003 ct = 0;
12004 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12007 tmp = expand_simple_binop (mode, AND,
12008 copy_rtx (tmp),
12009 gen_int_mode (cf - ct, mode),
12010 copy_rtx (tmp), 1, OPTAB_DIRECT);
12011 if (ct)
12012 tmp = expand_simple_binop (mode, PLUS,
12013 copy_rtx (tmp), GEN_INT (ct),
12014 copy_rtx (tmp), 1, OPTAB_DIRECT);
12017 if (!rtx_equal_p (tmp, out))
12018 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
12020 return 1; /* DONE */
12023 if (diff < 0)
12025 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12027 HOST_WIDE_INT tmp;
12028 tmp = ct, ct = cf, cf = tmp;
12029 diff = -diff;
12031 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12033 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12035 /* We may be reversing unordered compare to normal compare, that
12036 is not valid in general (we may convert non-trapping condition
12037 to trapping one), however on i386 we currently emit all
12038 comparisons unordered. */
12039 compare_code = reverse_condition_maybe_unordered (compare_code);
12040 code = reverse_condition_maybe_unordered (code);
12042 else
12044 compare_code = reverse_condition (compare_code);
12045 code = reverse_condition (code);
12049 compare_code = UNKNOWN;
12050 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
12051 && CONST_INT_P (ix86_compare_op1))
12053 if (ix86_compare_op1 == const0_rtx
12054 && (code == LT || code == GE))
12055 compare_code = code;
12056 else if (ix86_compare_op1 == constm1_rtx)
12058 if (code == LE)
12059 compare_code = LT;
12060 else if (code == GT)
12061 compare_code = GE;
12065 /* Optimize dest = (op0 < 0) ? -1 : cf. */
12066 if (compare_code != UNKNOWN
12067 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
12068 && (cf == -1 || ct == -1))
12070 /* If lea code below could be used, only optimize
12071 if it results in a 2 insn sequence. */
12073 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
12074 || diff == 3 || diff == 5 || diff == 9)
12075 || (compare_code == LT && ct == -1)
12076 || (compare_code == GE && cf == -1))
12079 * notl op1 (if necessary)
12080 * sarl $31, op1
12081 * orl cf, op1
12083 if (ct != -1)
12085 cf = ct;
12086 ct = -1;
12087 code = reverse_condition (code);
12090 out = emit_store_flag (out, code, ix86_compare_op0,
12091 ix86_compare_op1, VOIDmode, 0, -1);
12093 out = expand_simple_binop (mode, IOR,
12094 out, GEN_INT (cf),
12095 out, 1, OPTAB_DIRECT);
12096 if (out != operands[0])
12097 emit_move_insn (operands[0], out);
12099 return 1; /* DONE */
12104 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
12105 || diff == 3 || diff == 5 || diff == 9)
12106 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
12107 && (mode != DImode
12108 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
12111 * xorl dest,dest
12112 * cmpl op1,op2
12113 * setcc dest
12114 * lea cf(dest*(ct-cf)),dest
12116 * Size 14.
12118 * This also catches the degenerate setcc-only case.
12121 rtx tmp;
12122 int nops;
12124 out = emit_store_flag (out, code, ix86_compare_op0,
12125 ix86_compare_op1, VOIDmode, 0, 1);
12127 nops = 0;
12128 /* On x86_64 the lea instruction operates on Pmode, so we need
12129 to get arithmetics done in proper mode to match. */
12130 if (diff == 1)
12131 tmp = copy_rtx (out);
12132 else
12134 rtx out1;
12135 out1 = copy_rtx (out);
12136 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
12137 nops++;
12138 if (diff & 1)
12140 tmp = gen_rtx_PLUS (mode, tmp, out1);
12141 nops++;
12144 if (cf != 0)
12146 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
12147 nops++;
12149 if (!rtx_equal_p (tmp, out))
12151 if (nops == 1)
12152 out = force_operand (tmp, copy_rtx (out));
12153 else
12154 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
12156 if (!rtx_equal_p (out, operands[0]))
12157 emit_move_insn (operands[0], copy_rtx (out));
12159 return 1; /* DONE */
12163 * General case: Jumpful:
12164 * xorl dest,dest cmpl op1, op2
12165 * cmpl op1, op2 movl ct, dest
12166 * setcc dest jcc 1f
12167 * decl dest movl cf, dest
12168 * andl (cf-ct),dest 1:
12169 * addl ct,dest
12171 * Size 20. Size 14.
12173 * This is reasonably steep, but branch mispredict costs are
12174 * high on modern cpus, so consider failing only if optimizing
12175 * for space.
12178 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12179 && BRANCH_COST >= 2)
12181 if (cf == 0)
12183 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12185 cf = ct;
12186 ct = 0;
12188 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12190 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12192 /* We may be reversing unordered compare to normal compare,
12193 that is not valid in general (we may convert non-trapping
12194 condition to trapping one), however on i386 we currently
12195 emit all comparisons unordered. */
12196 code = reverse_condition_maybe_unordered (code);
12198 else
12200 code = reverse_condition (code);
12201 if (compare_code != UNKNOWN)
12202 compare_code = reverse_condition (compare_code);
12206 if (compare_code != UNKNOWN)
12208 /* notl op1 (if needed)
12209 sarl $31, op1
12210 andl (cf-ct), op1
12211 addl ct, op1
12213 For x < 0 (resp. x <= -1) there will be no notl,
12214 so if possible swap the constants to get rid of the
12215 complement.
12216 True/false will be -1/0 while code below (store flag
12217 followed by decrement) is 0/-1, so the constants need
12218 to be exchanged once more. */
12220 if (compare_code == GE || !cf)
12222 code = reverse_condition (code);
12223 compare_code = LT;
12225 else
12227 HOST_WIDE_INT tmp = cf;
12228 cf = ct;
12229 ct = tmp;
12232 out = emit_store_flag (out, code, ix86_compare_op0,
12233 ix86_compare_op1, VOIDmode, 0, -1);
12235 else
12237 out = emit_store_flag (out, code, ix86_compare_op0,
12238 ix86_compare_op1, VOIDmode, 0, 1);
12240 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
12241 copy_rtx (out), 1, OPTAB_DIRECT);
12244 out = expand_simple_binop (mode, AND, copy_rtx (out),
12245 gen_int_mode (cf - ct, mode),
12246 copy_rtx (out), 1, OPTAB_DIRECT);
12247 if (ct)
12248 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
12249 copy_rtx (out), 1, OPTAB_DIRECT);
12250 if (!rtx_equal_p (out, operands[0]))
12251 emit_move_insn (operands[0], copy_rtx (out));
12253 return 1; /* DONE */
12257 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12259 /* Try a few things more with specific constants and a variable. */
12261 optab op;
12262 rtx var, orig_out, out, tmp;
12264 if (BRANCH_COST <= 2)
12265 return 0; /* FAIL */
12267 /* If one of the two operands is an interesting constant, load a
12268 constant with the above and mask it in with a logical operation. */
12270 if (CONST_INT_P (operands[2]))
12272 var = operands[3];
12273 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
12274 operands[3] = constm1_rtx, op = and_optab;
12275 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
12276 operands[3] = const0_rtx, op = ior_optab;
12277 else
12278 return 0; /* FAIL */
12280 else if (CONST_INT_P (operands[3]))
12282 var = operands[2];
12283 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
12284 operands[2] = constm1_rtx, op = and_optab;
12285 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
12286 operands[2] = const0_rtx, op = ior_optab;
12287 else
12288 return 0; /* FAIL */
12290 else
12291 return 0; /* FAIL */
12293 orig_out = operands[0];
12294 tmp = gen_reg_rtx (mode);
12295 operands[0] = tmp;
12297 /* Recurse to get the constant loaded. */
12298 if (ix86_expand_int_movcc (operands) == 0)
12299 return 0; /* FAIL */
12301 /* Mask in the interesting variable. */
12302 out = expand_binop (mode, op, var, tmp, orig_out, 0,
12303 OPTAB_WIDEN);
12304 if (!rtx_equal_p (out, orig_out))
12305 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
12307 return 1; /* DONE */
12311 * For comparison with above,
12313 * movl cf,dest
12314 * movl ct,tmp
12315 * cmpl op1,op2
12316 * cmovcc tmp,dest
12318 * Size 15.
12321 if (! nonimmediate_operand (operands[2], mode))
12322 operands[2] = force_reg (mode, operands[2]);
12323 if (! nonimmediate_operand (operands[3], mode))
12324 operands[3] = force_reg (mode, operands[3]);
12326 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12328 rtx tmp = gen_reg_rtx (mode);
12329 emit_move_insn (tmp, operands[3]);
12330 operands[3] = tmp;
12332 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12334 rtx tmp = gen_reg_rtx (mode);
12335 emit_move_insn (tmp, operands[2]);
12336 operands[2] = tmp;
12339 if (! register_operand (operands[2], VOIDmode)
12340 && (mode == QImode
12341 || ! register_operand (operands[3], VOIDmode)))
12342 operands[2] = force_reg (mode, operands[2]);
12344 if (mode == QImode
12345 && ! register_operand (operands[3], VOIDmode))
12346 operands[3] = force_reg (mode, operands[3]);
12348 emit_insn (compare_seq);
12349 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12350 gen_rtx_IF_THEN_ELSE (mode,
12351 compare_op, operands[2],
12352 operands[3])));
12353 if (bypass_test)
12354 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12355 gen_rtx_IF_THEN_ELSE (mode,
12356 bypass_test,
12357 copy_rtx (operands[3]),
12358 copy_rtx (operands[0]))));
12359 if (second_test)
12360 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12361 gen_rtx_IF_THEN_ELSE (mode,
12362 second_test,
12363 copy_rtx (operands[2]),
12364 copy_rtx (operands[0]))));
12366 return 1; /* DONE */
12369 /* Swap, force into registers, or otherwise massage the two operands
12370 to an sse comparison with a mask result. Thus we differ a bit from
12371 ix86_prepare_fp_compare_args which expects to produce a flags result.
12373 The DEST operand exists to help determine whether to commute commutative
12374 operators. The POP0/POP1 operands are updated in place. The new
12375 comparison code is returned, or UNKNOWN if not implementable. */
12377 static enum rtx_code
12378 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
12379 rtx *pop0, rtx *pop1)
12381 rtx tmp;
12383 switch (code)
12385 case LTGT:
12386 case UNEQ:
12387 /* We have no LTGT as an operator. We could implement it with
12388 NE & ORDERED, but this requires an extra temporary. It's
12389 not clear that it's worth it. */
12390 return UNKNOWN;
12392 case LT:
12393 case LE:
12394 case UNGT:
12395 case UNGE:
12396 /* These are supported directly. */
12397 break;
12399 case EQ:
12400 case NE:
12401 case UNORDERED:
12402 case ORDERED:
12403 /* For commutative operators, try to canonicalize the destination
12404 operand to be first in the comparison - this helps reload to
12405 avoid extra moves. */
12406 if (!dest || !rtx_equal_p (dest, *pop1))
12407 break;
12408 /* FALLTHRU */
12410 case GE:
12411 case GT:
12412 case UNLE:
12413 case UNLT:
12414 /* These are not supported directly. Swap the comparison operands
12415 to transform into something that is supported. */
12416 tmp = *pop0;
12417 *pop0 = *pop1;
12418 *pop1 = tmp;
12419 code = swap_condition (code);
12420 break;
12422 default:
12423 gcc_unreachable ();
12426 return code;
12429 /* Detect conditional moves that exactly match min/max operational
12430 semantics. Note that this is IEEE safe, as long as we don't
12431 interchange the operands.
12433 Returns FALSE if this conditional move doesn't match a MIN/MAX,
12434 and TRUE if the operation is successful and instructions are emitted. */
12436 static bool
12437 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
12438 rtx cmp_op1, rtx if_true, rtx if_false)
12440 enum machine_mode mode;
12441 bool is_min;
12442 rtx tmp;
12444 if (code == LT)
12446 else if (code == UNGE)
12448 tmp = if_true;
12449 if_true = if_false;
12450 if_false = tmp;
12452 else
12453 return false;
12455 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
12456 is_min = true;
12457 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
12458 is_min = false;
12459 else
12460 return false;
12462 mode = GET_MODE (dest);
12464 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
12465 but MODE may be a vector mode and thus not appropriate. */
12466 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
12468 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
12469 rtvec v;
12471 if_true = force_reg (mode, if_true);
12472 v = gen_rtvec (2, if_true, if_false);
12473 tmp = gen_rtx_UNSPEC (mode, v, u);
12475 else
12477 code = is_min ? SMIN : SMAX;
12478 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
12481 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
12482 return true;
12485 /* Expand an sse vector comparison. Return the register with the result. */
12487 static rtx
12488 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
12489 rtx op_true, rtx op_false)
12491 enum machine_mode mode = GET_MODE (dest);
12492 rtx x;
12494 cmp_op0 = force_reg (mode, cmp_op0);
12495 if (!nonimmediate_operand (cmp_op1, mode))
12496 cmp_op1 = force_reg (mode, cmp_op1);
12498 if (optimize
12499 || reg_overlap_mentioned_p (dest, op_true)
12500 || reg_overlap_mentioned_p (dest, op_false))
12501 dest = gen_reg_rtx (mode);
12503 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
12504 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12506 return dest;
12509 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
12510 operations. This is used for both scalar and vector conditional moves. */
12512 static void
12513 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
12515 enum machine_mode mode = GET_MODE (dest);
12516 rtx t2, t3, x;
12518 if (op_false == CONST0_RTX (mode))
12520 op_true = force_reg (mode, op_true);
12521 x = gen_rtx_AND (mode, cmp, op_true);
12522 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12524 else if (op_true == CONST0_RTX (mode))
12526 op_false = force_reg (mode, op_false);
12527 x = gen_rtx_NOT (mode, cmp);
12528 x = gen_rtx_AND (mode, x, op_false);
12529 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12531 else
12533 op_true = force_reg (mode, op_true);
12534 op_false = force_reg (mode, op_false);
12536 t2 = gen_reg_rtx (mode);
12537 if (optimize)
12538 t3 = gen_reg_rtx (mode);
12539 else
12540 t3 = dest;
12542 x = gen_rtx_AND (mode, op_true, cmp);
12543 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
12545 x = gen_rtx_NOT (mode, cmp);
12546 x = gen_rtx_AND (mode, x, op_false);
12547 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
12549 x = gen_rtx_IOR (mode, t3, t2);
12550 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12554 /* Expand a floating-point conditional move. Return true if successful. */
12557 ix86_expand_fp_movcc (rtx operands[])
12559 enum machine_mode mode = GET_MODE (operands[0]);
12560 enum rtx_code code = GET_CODE (operands[1]);
12561 rtx tmp, compare_op, second_test, bypass_test;
12563 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
12565 enum machine_mode cmode;
12567 /* Since we've no cmove for sse registers, don't force bad register
12568 allocation just to gain access to it. Deny movcc when the
12569 comparison mode doesn't match the move mode. */
12570 cmode = GET_MODE (ix86_compare_op0);
12571 if (cmode == VOIDmode)
12572 cmode = GET_MODE (ix86_compare_op1);
12573 if (cmode != mode)
12574 return 0;
12576 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12577 &ix86_compare_op0,
12578 &ix86_compare_op1);
12579 if (code == UNKNOWN)
12580 return 0;
12582 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
12583 ix86_compare_op1, operands[2],
12584 operands[3]))
12585 return 1;
12587 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
12588 ix86_compare_op1, operands[2], operands[3]);
12589 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
12590 return 1;
12593 /* The floating point conditional move instructions don't directly
12594 support conditions resulting from a signed integer comparison. */
12596 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12598 /* The floating point conditional move instructions don't directly
12599 support signed integer comparisons. */
12601 if (!fcmov_comparison_operator (compare_op, VOIDmode))
12603 gcc_assert (!second_test && !bypass_test);
12604 tmp = gen_reg_rtx (QImode);
12605 ix86_expand_setcc (code, tmp);
12606 code = NE;
12607 ix86_compare_op0 = tmp;
12608 ix86_compare_op1 = const0_rtx;
12609 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12611 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12613 tmp = gen_reg_rtx (mode);
12614 emit_move_insn (tmp, operands[3]);
12615 operands[3] = tmp;
12617 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12619 tmp = gen_reg_rtx (mode);
12620 emit_move_insn (tmp, operands[2]);
12621 operands[2] = tmp;
12624 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12625 gen_rtx_IF_THEN_ELSE (mode, compare_op,
12626 operands[2], operands[3])));
12627 if (bypass_test)
12628 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12629 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
12630 operands[3], operands[0])));
12631 if (second_test)
12632 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12633 gen_rtx_IF_THEN_ELSE (mode, second_test,
12634 operands[2], operands[0])));
12636 return 1;
12639 /* Expand a floating-point vector conditional move; a vcond operation
12640 rather than a movcc operation. */
12642 bool
12643 ix86_expand_fp_vcond (rtx operands[])
12645 enum rtx_code code = GET_CODE (operands[3]);
12646 rtx cmp;
12648 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12649 &operands[4], &operands[5]);
12650 if (code == UNKNOWN)
12651 return false;
12653 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
12654 operands[5], operands[1], operands[2]))
12655 return true;
12657 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
12658 operands[1], operands[2]);
12659 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
12660 return true;
12663 /* Expand a signed integral vector conditional move. */
12665 bool
12666 ix86_expand_int_vcond (rtx operands[])
12668 enum machine_mode mode = GET_MODE (operands[0]);
12669 enum rtx_code code = GET_CODE (operands[3]);
12670 bool negate = false;
12671 rtx x, cop0, cop1;
12673 cop0 = operands[4];
12674 cop1 = operands[5];
12676 /* Canonicalize the comparison to EQ, GT, GTU. */
12677 switch (code)
12679 case EQ:
12680 case GT:
12681 case GTU:
12682 break;
12684 case NE:
12685 case LE:
12686 case LEU:
12687 code = reverse_condition (code);
12688 negate = true;
12689 break;
12691 case GE:
12692 case GEU:
12693 code = reverse_condition (code);
12694 negate = true;
12695 /* FALLTHRU */
12697 case LT:
12698 case LTU:
12699 code = swap_condition (code);
12700 x = cop0, cop0 = cop1, cop1 = x;
12701 break;
12703 default:
12704 gcc_unreachable ();
12707 /* Unsigned parallel compare is not supported by the hardware. Play some
12708 tricks to turn this into a signed comparison against 0. */
12709 if (code == GTU)
12711 cop0 = force_reg (mode, cop0);
12713 switch (mode)
12715 case V4SImode:
12717 rtx t1, t2, mask;
12719 /* Perform a parallel modulo subtraction. */
12720 t1 = gen_reg_rtx (mode);
12721 emit_insn (gen_subv4si3 (t1, cop0, cop1));
12723 /* Extract the original sign bit of op0. */
12724 mask = GEN_INT (-0x80000000);
12725 mask = gen_rtx_CONST_VECTOR (mode,
12726 gen_rtvec (4, mask, mask, mask, mask));
12727 mask = force_reg (mode, mask);
12728 t2 = gen_reg_rtx (mode);
12729 emit_insn (gen_andv4si3 (t2, cop0, mask));
12731 /* XOR it back into the result of the subtraction. This results
12732 in the sign bit set iff we saw unsigned underflow. */
12733 x = gen_reg_rtx (mode);
12734 emit_insn (gen_xorv4si3 (x, t1, t2));
12736 code = GT;
12738 break;
12740 case V16QImode:
12741 case V8HImode:
12742 /* Perform a parallel unsigned saturating subtraction. */
12743 x = gen_reg_rtx (mode);
12744 emit_insn (gen_rtx_SET (VOIDmode, x,
12745 gen_rtx_US_MINUS (mode, cop0, cop1)));
12747 code = EQ;
12748 negate = !negate;
12749 break;
12751 default:
12752 gcc_unreachable ();
12755 cop0 = x;
12756 cop1 = CONST0_RTX (mode);
12759 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
12760 operands[1+negate], operands[2-negate]);
12762 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
12763 operands[2-negate]);
12764 return true;
12767 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
12768 true if we should do zero extension, else sign extension. HIGH_P is
12769 true if we want the N/2 high elements, else the low elements. */
12771 void
12772 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
12774 enum machine_mode imode = GET_MODE (operands[1]);
12775 rtx (*unpack)(rtx, rtx, rtx);
12776 rtx se, dest;
12778 switch (imode)
12780 case V16QImode:
12781 if (high_p)
12782 unpack = gen_vec_interleave_highv16qi;
12783 else
12784 unpack = gen_vec_interleave_lowv16qi;
12785 break;
12786 case V8HImode:
12787 if (high_p)
12788 unpack = gen_vec_interleave_highv8hi;
12789 else
12790 unpack = gen_vec_interleave_lowv8hi;
12791 break;
12792 case V4SImode:
12793 if (high_p)
12794 unpack = gen_vec_interleave_highv4si;
12795 else
12796 unpack = gen_vec_interleave_lowv4si;
12797 break;
12798 default:
12799 gcc_unreachable ();
12802 dest = gen_lowpart (imode, operands[0]);
12804 if (unsigned_p)
12805 se = force_reg (imode, CONST0_RTX (imode));
12806 else
12807 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
12808 operands[1], pc_rtx, pc_rtx);
12810 emit_insn (unpack (dest, operands[1], se));
12813 /* Expand conditional increment or decrement using adb/sbb instructions.
12814 The default case using setcc followed by the conditional move can be
12815 done by generic code. */
12817 ix86_expand_int_addcc (rtx operands[])
12819 enum rtx_code code = GET_CODE (operands[1]);
12820 rtx compare_op;
12821 rtx val = const0_rtx;
12822 bool fpcmp = false;
12823 enum machine_mode mode = GET_MODE (operands[0]);
12825 if (operands[3] != const1_rtx
12826 && operands[3] != constm1_rtx)
12827 return 0;
12828 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12829 ix86_compare_op1, &compare_op))
12830 return 0;
12831 code = GET_CODE (compare_op);
12833 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12834 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12836 fpcmp = true;
12837 code = ix86_fp_compare_code_to_integer (code);
12840 if (code != LTU)
12842 val = constm1_rtx;
12843 if (fpcmp)
12844 PUT_CODE (compare_op,
12845 reverse_condition_maybe_unordered
12846 (GET_CODE (compare_op)));
12847 else
12848 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12850 PUT_MODE (compare_op, mode);
12852 /* Construct either adc or sbb insn. */
12853 if ((code == LTU) == (operands[3] == constm1_rtx))
12855 switch (GET_MODE (operands[0]))
12857 case QImode:
12858 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
12859 break;
12860 case HImode:
12861 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
12862 break;
12863 case SImode:
12864 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
12865 break;
12866 case DImode:
12867 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12868 break;
12869 default:
12870 gcc_unreachable ();
12873 else
12875 switch (GET_MODE (operands[0]))
12877 case QImode:
12878 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
12879 break;
12880 case HImode:
12881 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
12882 break;
12883 case SImode:
12884 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
12885 break;
12886 case DImode:
12887 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12888 break;
12889 default:
12890 gcc_unreachable ();
12893 return 1; /* DONE */
12897 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
12898 works for floating pointer parameters and nonoffsetable memories.
12899 For pushes, it returns just stack offsets; the values will be saved
12900 in the right order. Maximally three parts are generated. */
12902 static int
12903 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
12905 int size;
12907 if (!TARGET_64BIT)
12908 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
12909 else
12910 size = (GET_MODE_SIZE (mode) + 4) / 8;
12912 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
12913 gcc_assert (size >= 2 && size <= 3);
12915 /* Optimize constant pool reference to immediates. This is used by fp
12916 moves, that force all constants to memory to allow combining. */
12917 if (MEM_P (operand) && MEM_READONLY_P (operand))
12919 rtx tmp = maybe_get_pool_constant (operand);
12920 if (tmp)
12921 operand = tmp;
12924 if (MEM_P (operand) && !offsettable_memref_p (operand))
12926 /* The only non-offsetable memories we handle are pushes. */
12927 int ok = push_operand (operand, VOIDmode);
12929 gcc_assert (ok);
12931 operand = copy_rtx (operand);
12932 PUT_MODE (operand, Pmode);
12933 parts[0] = parts[1] = parts[2] = operand;
12934 return size;
12937 if (GET_CODE (operand) == CONST_VECTOR)
12939 enum machine_mode imode = int_mode_for_mode (mode);
12940 /* Caution: if we looked through a constant pool memory above,
12941 the operand may actually have a different mode now. That's
12942 ok, since we want to pun this all the way back to an integer. */
12943 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
12944 gcc_assert (operand != NULL);
12945 mode = imode;
12948 if (!TARGET_64BIT)
12950 if (mode == DImode)
12951 split_di (&operand, 1, &parts[0], &parts[1]);
12952 else
12954 if (REG_P (operand))
12956 gcc_assert (reload_completed);
12957 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
12958 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
12959 if (size == 3)
12960 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
12962 else if (offsettable_memref_p (operand))
12964 operand = adjust_address (operand, SImode, 0);
12965 parts[0] = operand;
12966 parts[1] = adjust_address (operand, SImode, 4);
12967 if (size == 3)
12968 parts[2] = adjust_address (operand, SImode, 8);
12970 else if (GET_CODE (operand) == CONST_DOUBLE)
12972 REAL_VALUE_TYPE r;
12973 long l[4];
12975 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12976 switch (mode)
12978 case XFmode:
12979 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
12980 parts[2] = gen_int_mode (l[2], SImode);
12981 break;
12982 case DFmode:
12983 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
12984 break;
12985 default:
12986 gcc_unreachable ();
12988 parts[1] = gen_int_mode (l[1], SImode);
12989 parts[0] = gen_int_mode (l[0], SImode);
12991 else
12992 gcc_unreachable ();
12995 else
12997 if (mode == TImode)
12998 split_ti (&operand, 1, &parts[0], &parts[1]);
12999 if (mode == XFmode || mode == TFmode)
13001 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
13002 if (REG_P (operand))
13004 gcc_assert (reload_completed);
13005 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
13006 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
13008 else if (offsettable_memref_p (operand))
13010 operand = adjust_address (operand, DImode, 0);
13011 parts[0] = operand;
13012 parts[1] = adjust_address (operand, upper_mode, 8);
13014 else if (GET_CODE (operand) == CONST_DOUBLE)
13016 REAL_VALUE_TYPE r;
13017 long l[4];
13019 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
13020 real_to_target (l, &r, mode);
13022 /* Do not use shift by 32 to avoid warning on 32bit systems. */
13023 if (HOST_BITS_PER_WIDE_INT >= 64)
13024 parts[0]
13025 = gen_int_mode
13026 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
13027 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
13028 DImode);
13029 else
13030 parts[0] = immed_double_const (l[0], l[1], DImode);
13032 if (upper_mode == SImode)
13033 parts[1] = gen_int_mode (l[2], SImode);
13034 else if (HOST_BITS_PER_WIDE_INT >= 64)
13035 parts[1]
13036 = gen_int_mode
13037 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
13038 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
13039 DImode);
13040 else
13041 parts[1] = immed_double_const (l[2], l[3], DImode);
13043 else
13044 gcc_unreachable ();
13048 return size;
13051 /* Emit insns to perform a move or push of DI, DF, and XF values.
13052 Return false when normal moves are needed; true when all required
13053 insns have been emitted. Operands 2-4 contain the input values
13054 int the correct order; operands 5-7 contain the output values. */
13056 void
13057 ix86_split_long_move (rtx operands[])
13059 rtx part[2][3];
13060 int nparts;
13061 int push = 0;
13062 int collisions = 0;
13063 enum machine_mode mode = GET_MODE (operands[0]);
13065 /* The DFmode expanders may ask us to move double.
13066 For 64bit target this is single move. By hiding the fact
13067 here we simplify i386.md splitters. */
13068 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
13070 /* Optimize constant pool reference to immediates. This is used by
13071 fp moves, that force all constants to memory to allow combining. */
13073 if (MEM_P (operands[1])
13074 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
13075 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
13076 operands[1] = get_pool_constant (XEXP (operands[1], 0));
13077 if (push_operand (operands[0], VOIDmode))
13079 operands[0] = copy_rtx (operands[0]);
13080 PUT_MODE (operands[0], Pmode);
13082 else
13083 operands[0] = gen_lowpart (DImode, operands[0]);
13084 operands[1] = gen_lowpart (DImode, operands[1]);
13085 emit_move_insn (operands[0], operands[1]);
13086 return;
13089 /* The only non-offsettable memory we handle is push. */
13090 if (push_operand (operands[0], VOIDmode))
13091 push = 1;
13092 else
13093 gcc_assert (!MEM_P (operands[0])
13094 || offsettable_memref_p (operands[0]));
13096 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
13097 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
13099 /* When emitting push, take care for source operands on the stack. */
13100 if (push && MEM_P (operands[1])
13101 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
13103 if (nparts == 3)
13104 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
13105 XEXP (part[1][2], 0));
13106 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
13107 XEXP (part[1][1], 0));
13110 /* We need to do copy in the right order in case an address register
13111 of the source overlaps the destination. */
13112 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
13114 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
13115 collisions++;
13116 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
13117 collisions++;
13118 if (nparts == 3
13119 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
13120 collisions++;
13122 /* Collision in the middle part can be handled by reordering. */
13123 if (collisions == 1 && nparts == 3
13124 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
13126 rtx tmp;
13127 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
13128 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
13131 /* If there are more collisions, we can't handle it by reordering.
13132 Do an lea to the last part and use only one colliding move. */
13133 else if (collisions > 1)
13135 rtx base;
13137 collisions = 1;
13139 base = part[0][nparts - 1];
13141 /* Handle the case when the last part isn't valid for lea.
13142 Happens in 64-bit mode storing the 12-byte XFmode. */
13143 if (GET_MODE (base) != Pmode)
13144 base = gen_rtx_REG (Pmode, REGNO (base));
13146 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
13147 part[1][0] = replace_equiv_address (part[1][0], base);
13148 part[1][1] = replace_equiv_address (part[1][1],
13149 plus_constant (base, UNITS_PER_WORD));
13150 if (nparts == 3)
13151 part[1][2] = replace_equiv_address (part[1][2],
13152 plus_constant (base, 8));
13156 if (push)
13158 if (!TARGET_64BIT)
13160 if (nparts == 3)
13162 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
13163 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
13164 emit_move_insn (part[0][2], part[1][2]);
13167 else
13169 /* In 64bit mode we don't have 32bit push available. In case this is
13170 register, it is OK - we will just use larger counterpart. We also
13171 retype memory - these comes from attempt to avoid REX prefix on
13172 moving of second half of TFmode value. */
13173 if (GET_MODE (part[1][1]) == SImode)
13175 switch (GET_CODE (part[1][1]))
13177 case MEM:
13178 part[1][1] = adjust_address (part[1][1], DImode, 0);
13179 break;
13181 case REG:
13182 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
13183 break;
13185 default:
13186 gcc_unreachable ();
13189 if (GET_MODE (part[1][0]) == SImode)
13190 part[1][0] = part[1][1];
13193 emit_move_insn (part[0][1], part[1][1]);
13194 emit_move_insn (part[0][0], part[1][0]);
13195 return;
13198 /* Choose correct order to not overwrite the source before it is copied. */
13199 if ((REG_P (part[0][0])
13200 && REG_P (part[1][1])
13201 && (REGNO (part[0][0]) == REGNO (part[1][1])
13202 || (nparts == 3
13203 && REGNO (part[0][0]) == REGNO (part[1][2]))))
13204 || (collisions > 0
13205 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
13207 if (nparts == 3)
13209 operands[2] = part[0][2];
13210 operands[3] = part[0][1];
13211 operands[4] = part[0][0];
13212 operands[5] = part[1][2];
13213 operands[6] = part[1][1];
13214 operands[7] = part[1][0];
13216 else
13218 operands[2] = part[0][1];
13219 operands[3] = part[0][0];
13220 operands[5] = part[1][1];
13221 operands[6] = part[1][0];
13224 else
13226 if (nparts == 3)
13228 operands[2] = part[0][0];
13229 operands[3] = part[0][1];
13230 operands[4] = part[0][2];
13231 operands[5] = part[1][0];
13232 operands[6] = part[1][1];
13233 operands[7] = part[1][2];
13235 else
13237 operands[2] = part[0][0];
13238 operands[3] = part[0][1];
13239 operands[5] = part[1][0];
13240 operands[6] = part[1][1];
13244 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
13245 if (optimize_size)
13247 if (CONST_INT_P (operands[5])
13248 && operands[5] != const0_rtx
13249 && REG_P (operands[2]))
13251 if (CONST_INT_P (operands[6])
13252 && INTVAL (operands[6]) == INTVAL (operands[5]))
13253 operands[6] = operands[2];
13255 if (nparts == 3
13256 && CONST_INT_P (operands[7])
13257 && INTVAL (operands[7]) == INTVAL (operands[5]))
13258 operands[7] = operands[2];
13261 if (nparts == 3
13262 && CONST_INT_P (operands[6])
13263 && operands[6] != const0_rtx
13264 && REG_P (operands[3])
13265 && CONST_INT_P (operands[7])
13266 && INTVAL (operands[7]) == INTVAL (operands[6]))
13267 operands[7] = operands[3];
13270 emit_move_insn (operands[2], operands[5]);
13271 emit_move_insn (operands[3], operands[6]);
13272 if (nparts == 3)
13273 emit_move_insn (operands[4], operands[7]);
13275 return;
13278 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
13279 left shift by a constant, either using a single shift or
13280 a sequence of add instructions. */
13282 static void
13283 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
13285 if (count == 1)
13287 emit_insn ((mode == DImode
13288 ? gen_addsi3
13289 : gen_adddi3) (operand, operand, operand));
13291 else if (!optimize_size
13292 && count * ix86_cost->add <= ix86_cost->shift_const)
13294 int i;
13295 for (i=0; i<count; i++)
13297 emit_insn ((mode == DImode
13298 ? gen_addsi3
13299 : gen_adddi3) (operand, operand, operand));
13302 else
13303 emit_insn ((mode == DImode
13304 ? gen_ashlsi3
13305 : gen_ashldi3) (operand, operand, GEN_INT (count)));
13308 void
13309 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
13311 rtx low[2], high[2];
13312 int count;
13313 const int single_width = mode == DImode ? 32 : 64;
13315 if (CONST_INT_P (operands[2]))
13317 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13318 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13320 if (count >= single_width)
13322 emit_move_insn (high[0], low[1]);
13323 emit_move_insn (low[0], const0_rtx);
13325 if (count > single_width)
13326 ix86_expand_ashl_const (high[0], count - single_width, mode);
13328 else
13330 if (!rtx_equal_p (operands[0], operands[1]))
13331 emit_move_insn (operands[0], operands[1]);
13332 emit_insn ((mode == DImode
13333 ? gen_x86_shld_1
13334 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
13335 ix86_expand_ashl_const (low[0], count, mode);
13337 return;
13340 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13342 if (operands[1] == const1_rtx)
13344 /* Assuming we've chosen a QImode capable registers, then 1 << N
13345 can be done with two 32/64-bit shifts, no branches, no cmoves. */
13346 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
13348 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
13350 ix86_expand_clear (low[0]);
13351 ix86_expand_clear (high[0]);
13352 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
13354 d = gen_lowpart (QImode, low[0]);
13355 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13356 s = gen_rtx_EQ (QImode, flags, const0_rtx);
13357 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13359 d = gen_lowpart (QImode, high[0]);
13360 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13361 s = gen_rtx_NE (QImode, flags, const0_rtx);
13362 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13365 /* Otherwise, we can get the same results by manually performing
13366 a bit extract operation on bit 5/6, and then performing the two
13367 shifts. The two methods of getting 0/1 into low/high are exactly
13368 the same size. Avoiding the shift in the bit extract case helps
13369 pentium4 a bit; no one else seems to care much either way. */
13370 else
13372 rtx x;
13374 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
13375 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
13376 else
13377 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
13378 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
13380 emit_insn ((mode == DImode
13381 ? gen_lshrsi3
13382 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
13383 emit_insn ((mode == DImode
13384 ? gen_andsi3
13385 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
13386 emit_move_insn (low[0], high[0]);
13387 emit_insn ((mode == DImode
13388 ? gen_xorsi3
13389 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
13392 emit_insn ((mode == DImode
13393 ? gen_ashlsi3
13394 : gen_ashldi3) (low[0], low[0], operands[2]));
13395 emit_insn ((mode == DImode
13396 ? gen_ashlsi3
13397 : gen_ashldi3) (high[0], high[0], operands[2]));
13398 return;
13401 if (operands[1] == constm1_rtx)
13403 /* For -1 << N, we can avoid the shld instruction, because we
13404 know that we're shifting 0...31/63 ones into a -1. */
13405 emit_move_insn (low[0], constm1_rtx);
13406 if (optimize_size)
13407 emit_move_insn (high[0], low[0]);
13408 else
13409 emit_move_insn (high[0], constm1_rtx);
13411 else
13413 if (!rtx_equal_p (operands[0], operands[1]))
13414 emit_move_insn (operands[0], operands[1]);
13416 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13417 emit_insn ((mode == DImode
13418 ? gen_x86_shld_1
13419 : gen_x86_64_shld) (high[0], low[0], operands[2]));
13422 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
13424 if (TARGET_CMOVE && scratch)
13426 ix86_expand_clear (scratch);
13427 emit_insn ((mode == DImode
13428 ? gen_x86_shift_adj_1
13429 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
13431 else
13432 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
13435 void
13436 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
13438 rtx low[2], high[2];
13439 int count;
13440 const int single_width = mode == DImode ? 32 : 64;
13442 if (CONST_INT_P (operands[2]))
13444 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13445 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13447 if (count == single_width * 2 - 1)
13449 emit_move_insn (high[0], high[1]);
13450 emit_insn ((mode == DImode
13451 ? gen_ashrsi3
13452 : gen_ashrdi3) (high[0], high[0],
13453 GEN_INT (single_width - 1)));
13454 emit_move_insn (low[0], high[0]);
13457 else if (count >= single_width)
13459 emit_move_insn (low[0], high[1]);
13460 emit_move_insn (high[0], low[0]);
13461 emit_insn ((mode == DImode
13462 ? gen_ashrsi3
13463 : gen_ashrdi3) (high[0], high[0],
13464 GEN_INT (single_width - 1)));
13465 if (count > single_width)
13466 emit_insn ((mode == DImode
13467 ? gen_ashrsi3
13468 : gen_ashrdi3) (low[0], low[0],
13469 GEN_INT (count - single_width)));
13471 else
13473 if (!rtx_equal_p (operands[0], operands[1]))
13474 emit_move_insn (operands[0], operands[1]);
13475 emit_insn ((mode == DImode
13476 ? gen_x86_shrd_1
13477 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13478 emit_insn ((mode == DImode
13479 ? gen_ashrsi3
13480 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
13483 else
13485 if (!rtx_equal_p (operands[0], operands[1]))
13486 emit_move_insn (operands[0], operands[1]);
13488 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13490 emit_insn ((mode == DImode
13491 ? gen_x86_shrd_1
13492 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13493 emit_insn ((mode == DImode
13494 ? gen_ashrsi3
13495 : gen_ashrdi3) (high[0], high[0], operands[2]));
13497 if (TARGET_CMOVE && scratch)
13499 emit_move_insn (scratch, high[0]);
13500 emit_insn ((mode == DImode
13501 ? gen_ashrsi3
13502 : gen_ashrdi3) (scratch, scratch,
13503 GEN_INT (single_width - 1)));
13504 emit_insn ((mode == DImode
13505 ? gen_x86_shift_adj_1
13506 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13507 scratch));
13509 else
13510 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
13514 void
13515 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
13517 rtx low[2], high[2];
13518 int count;
13519 const int single_width = mode == DImode ? 32 : 64;
13521 if (CONST_INT_P (operands[2]))
13523 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13524 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13526 if (count >= single_width)
13528 emit_move_insn (low[0], high[1]);
13529 ix86_expand_clear (high[0]);
13531 if (count > single_width)
13532 emit_insn ((mode == DImode
13533 ? gen_lshrsi3
13534 : gen_lshrdi3) (low[0], low[0],
13535 GEN_INT (count - single_width)));
13537 else
13539 if (!rtx_equal_p (operands[0], operands[1]))
13540 emit_move_insn (operands[0], operands[1]);
13541 emit_insn ((mode == DImode
13542 ? gen_x86_shrd_1
13543 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13544 emit_insn ((mode == DImode
13545 ? gen_lshrsi3
13546 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
13549 else
13551 if (!rtx_equal_p (operands[0], operands[1]))
13552 emit_move_insn (operands[0], operands[1]);
13554 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13556 emit_insn ((mode == DImode
13557 ? gen_x86_shrd_1
13558 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13559 emit_insn ((mode == DImode
13560 ? gen_lshrsi3
13561 : gen_lshrdi3) (high[0], high[0], operands[2]));
13563 /* Heh. By reversing the arguments, we can reuse this pattern. */
13564 if (TARGET_CMOVE && scratch)
13566 ix86_expand_clear (scratch);
13567 emit_insn ((mode == DImode
13568 ? gen_x86_shift_adj_1
13569 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13570 scratch));
13572 else
13573 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
13577 /* Predict just emitted jump instruction to be taken with probability PROB. */
13578 static void
13579 predict_jump (int prob)
13581 rtx insn = get_last_insn ();
13582 gcc_assert (JUMP_P (insn));
13583 REG_NOTES (insn)
13584 = gen_rtx_EXPR_LIST (REG_BR_PROB,
13585 GEN_INT (prob),
13586 REG_NOTES (insn));
13589 /* Helper function for the string operations below. Dest VARIABLE whether
13590 it is aligned to VALUE bytes. If true, jump to the label. */
13591 static rtx
13592 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
13594 rtx label = gen_label_rtx ();
13595 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
13596 if (GET_MODE (variable) == DImode)
13597 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
13598 else
13599 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
13600 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
13601 1, label);
13602 if (epilogue)
13603 predict_jump (REG_BR_PROB_BASE * 50 / 100);
13604 else
13605 predict_jump (REG_BR_PROB_BASE * 90 / 100);
13606 return label;
13609 /* Adjust COUNTER by the VALUE. */
13610 static void
13611 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
13613 if (GET_MODE (countreg) == DImode)
13614 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
13615 else
13616 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
13619 /* Zero extend possibly SImode EXP to Pmode register. */
13621 ix86_zero_extend_to_Pmode (rtx exp)
13623 rtx r;
13624 if (GET_MODE (exp) == VOIDmode)
13625 return force_reg (Pmode, exp);
13626 if (GET_MODE (exp) == Pmode)
13627 return copy_to_mode_reg (Pmode, exp);
13628 r = gen_reg_rtx (Pmode);
13629 emit_insn (gen_zero_extendsidi2 (r, exp));
13630 return r;
13633 /* Divide COUNTREG by SCALE. */
13634 static rtx
13635 scale_counter (rtx countreg, int scale)
13637 rtx sc;
13638 rtx piece_size_mask;
13640 if (scale == 1)
13641 return countreg;
13642 if (CONST_INT_P (countreg))
13643 return GEN_INT (INTVAL (countreg) / scale);
13644 gcc_assert (REG_P (countreg));
13646 piece_size_mask = GEN_INT (scale - 1);
13647 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
13648 GEN_INT (exact_log2 (scale)),
13649 NULL, 1, OPTAB_DIRECT);
13650 return sc;
13653 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
13654 DImode for constant loop counts. */
13656 static enum machine_mode
13657 counter_mode (rtx count_exp)
13659 if (GET_MODE (count_exp) != VOIDmode)
13660 return GET_MODE (count_exp);
13661 if (GET_CODE (count_exp) != CONST_INT)
13662 return Pmode;
13663 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
13664 return DImode;
13665 return SImode;
13668 /* When SRCPTR is non-NULL, output simple loop to move memory
13669 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
13670 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
13671 equivalent loop to set memory by VALUE (supposed to be in MODE).
13673 The size is rounded down to whole number of chunk size moved at once.
13674 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
13677 static void
13678 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
13679 rtx destptr, rtx srcptr, rtx value,
13680 rtx count, enum machine_mode mode, int unroll,
13681 int expected_size)
13683 rtx out_label, top_label, iter, tmp;
13684 enum machine_mode iter_mode = counter_mode (count);
13685 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
13686 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
13687 rtx size;
13688 rtx x_addr;
13689 rtx y_addr;
13690 int i;
13692 top_label = gen_label_rtx ();
13693 out_label = gen_label_rtx ();
13694 iter = gen_reg_rtx (iter_mode);
13696 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
13697 NULL, 1, OPTAB_DIRECT);
13698 /* Those two should combine. */
13699 if (piece_size == const1_rtx)
13701 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
13702 true, out_label);
13703 predict_jump (REG_BR_PROB_BASE * 10 / 100);
13705 emit_move_insn (iter, const0_rtx);
13707 emit_label (top_label);
13709 tmp = convert_modes (Pmode, iter_mode, iter, true);
13710 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
13711 destmem = change_address (destmem, mode, x_addr);
13713 if (srcmem)
13715 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
13716 srcmem = change_address (srcmem, mode, y_addr);
13718 /* When unrolling for chips that reorder memory reads and writes,
13719 we can save registers by using single temporary.
13720 Also using 4 temporaries is overkill in 32bit mode. */
13721 if (!TARGET_64BIT && 0)
13723 for (i = 0; i < unroll; i++)
13725 if (i)
13727 destmem =
13728 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13729 srcmem =
13730 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
13732 emit_move_insn (destmem, srcmem);
13735 else
13737 rtx tmpreg[4];
13738 gcc_assert (unroll <= 4);
13739 for (i = 0; i < unroll; i++)
13741 tmpreg[i] = gen_reg_rtx (mode);
13742 if (i)
13744 srcmem =
13745 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
13747 emit_move_insn (tmpreg[i], srcmem);
13749 for (i = 0; i < unroll; i++)
13751 if (i)
13753 destmem =
13754 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13756 emit_move_insn (destmem, tmpreg[i]);
13760 else
13761 for (i = 0; i < unroll; i++)
13763 if (i)
13764 destmem =
13765 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13766 emit_move_insn (destmem, value);
13769 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
13770 true, OPTAB_LIB_WIDEN);
13771 if (tmp != iter)
13772 emit_move_insn (iter, tmp);
13774 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
13775 true, top_label);
13776 if (expected_size != -1)
13778 expected_size /= GET_MODE_SIZE (mode) * unroll;
13779 if (expected_size == 0)
13780 predict_jump (0);
13781 else if (expected_size > REG_BR_PROB_BASE)
13782 predict_jump (REG_BR_PROB_BASE - 1);
13783 else
13784 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
13786 else
13787 predict_jump (REG_BR_PROB_BASE * 80 / 100);
13788 iter = ix86_zero_extend_to_Pmode (iter);
13789 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
13790 true, OPTAB_LIB_WIDEN);
13791 if (tmp != destptr)
13792 emit_move_insn (destptr, tmp);
13793 if (srcptr)
13795 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
13796 true, OPTAB_LIB_WIDEN);
13797 if (tmp != srcptr)
13798 emit_move_insn (srcptr, tmp);
13800 emit_label (out_label);
13803 /* Output "rep; mov" instruction.
13804 Arguments have same meaning as for previous function */
13805 static void
13806 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
13807 rtx destptr, rtx srcptr,
13808 rtx count,
13809 enum machine_mode mode)
13811 rtx destexp;
13812 rtx srcexp;
13813 rtx countreg;
13815 /* If the size is known, it is shorter to use rep movs. */
13816 if (mode == QImode && CONST_INT_P (count)
13817 && !(INTVAL (count) & 3))
13818 mode = SImode;
13820 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
13821 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
13822 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
13823 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
13824 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
13825 if (mode != QImode)
13827 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13828 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13829 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
13830 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
13831 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13832 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
13834 else
13836 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
13837 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
13839 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
13840 destexp, srcexp));
13843 /* Output "rep; stos" instruction.
13844 Arguments have same meaning as for previous function */
13845 static void
13846 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
13847 rtx count,
13848 enum machine_mode mode)
13850 rtx destexp;
13851 rtx countreg;
13853 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
13854 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
13855 value = force_reg (mode, gen_lowpart (mode, value));
13856 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
13857 if (mode != QImode)
13859 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13860 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13861 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
13863 else
13864 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
13865 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
13868 static void
13869 emit_strmov (rtx destmem, rtx srcmem,
13870 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
13872 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
13873 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
13874 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13877 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
13878 static void
13879 expand_movmem_epilogue (rtx destmem, rtx srcmem,
13880 rtx destptr, rtx srcptr, rtx count, int max_size)
13882 rtx src, dest;
13883 if (CONST_INT_P (count))
13885 HOST_WIDE_INT countval = INTVAL (count);
13886 int offset = 0;
13888 if ((countval & 0x10) && max_size > 16)
13890 if (TARGET_64BIT)
13892 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13893 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
13895 else
13896 gcc_unreachable ();
13897 offset += 16;
13899 if ((countval & 0x08) && max_size > 8)
13901 if (TARGET_64BIT)
13902 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13903 else
13905 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
13906 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
13908 offset += 8;
13910 if ((countval & 0x04) && max_size > 4)
13912 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
13913 offset += 4;
13915 if ((countval & 0x02) && max_size > 2)
13917 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
13918 offset += 2;
13920 if ((countval & 0x01) && max_size > 1)
13922 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
13923 offset += 1;
13925 return;
13927 if (max_size > 8)
13929 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
13930 count, 1, OPTAB_DIRECT);
13931 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
13932 count, QImode, 1, 4);
13933 return;
13936 /* When there are stringops, we can cheaply increase dest and src pointers.
13937 Otherwise we save code size by maintaining offset (zero is readily
13938 available from preceding rep operation) and using x86 addressing modes.
13940 if (TARGET_SINGLE_STRINGOP)
13942 if (max_size > 4)
13944 rtx label = ix86_expand_aligntest (count, 4, true);
13945 src = change_address (srcmem, SImode, srcptr);
13946 dest = change_address (destmem, SImode, destptr);
13947 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13948 emit_label (label);
13949 LABEL_NUSES (label) = 1;
13951 if (max_size > 2)
13953 rtx label = ix86_expand_aligntest (count, 2, true);
13954 src = change_address (srcmem, HImode, srcptr);
13955 dest = change_address (destmem, HImode, destptr);
13956 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13957 emit_label (label);
13958 LABEL_NUSES (label) = 1;
13960 if (max_size > 1)
13962 rtx label = ix86_expand_aligntest (count, 1, true);
13963 src = change_address (srcmem, QImode, srcptr);
13964 dest = change_address (destmem, QImode, destptr);
13965 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13966 emit_label (label);
13967 LABEL_NUSES (label) = 1;
13970 else
13972 rtx offset = force_reg (Pmode, const0_rtx);
13973 rtx tmp;
13975 if (max_size > 4)
13977 rtx label = ix86_expand_aligntest (count, 4, true);
13978 src = change_address (srcmem, SImode, srcptr);
13979 dest = change_address (destmem, SImode, destptr);
13980 emit_move_insn (dest, src);
13981 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
13982 true, OPTAB_LIB_WIDEN);
13983 if (tmp != offset)
13984 emit_move_insn (offset, tmp);
13985 emit_label (label);
13986 LABEL_NUSES (label) = 1;
13988 if (max_size > 2)
13990 rtx label = ix86_expand_aligntest (count, 2, true);
13991 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
13992 src = change_address (srcmem, HImode, tmp);
13993 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
13994 dest = change_address (destmem, HImode, tmp);
13995 emit_move_insn (dest, src);
13996 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
13997 true, OPTAB_LIB_WIDEN);
13998 if (tmp != offset)
13999 emit_move_insn (offset, tmp);
14000 emit_label (label);
14001 LABEL_NUSES (label) = 1;
14003 if (max_size > 1)
14005 rtx label = ix86_expand_aligntest (count, 1, true);
14006 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
14007 src = change_address (srcmem, QImode, tmp);
14008 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
14009 dest = change_address (destmem, QImode, tmp);
14010 emit_move_insn (dest, src);
14011 emit_label (label);
14012 LABEL_NUSES (label) = 1;
14017 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14018 static void
14019 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
14020 rtx count, int max_size)
14022 count =
14023 expand_simple_binop (counter_mode (count), AND, count,
14024 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
14025 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
14026 gen_lowpart (QImode, value), count, QImode,
14027 1, max_size / 2);
14030 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14031 static void
14032 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
14034 rtx dest;
14036 if (CONST_INT_P (count))
14038 HOST_WIDE_INT countval = INTVAL (count);
14039 int offset = 0;
14041 if ((countval & 0x10) && max_size > 16)
14043 if (TARGET_64BIT)
14045 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
14046 emit_insn (gen_strset (destptr, dest, value));
14047 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
14048 emit_insn (gen_strset (destptr, dest, value));
14050 else
14051 gcc_unreachable ();
14052 offset += 16;
14054 if ((countval & 0x08) && max_size > 8)
14056 if (TARGET_64BIT)
14058 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
14059 emit_insn (gen_strset (destptr, dest, value));
14061 else
14063 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
14064 emit_insn (gen_strset (destptr, dest, value));
14065 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
14066 emit_insn (gen_strset (destptr, dest, value));
14068 offset += 8;
14070 if ((countval & 0x04) && max_size > 4)
14072 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
14073 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
14074 offset += 4;
14076 if ((countval & 0x02) && max_size > 2)
14078 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
14079 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
14080 offset += 2;
14082 if ((countval & 0x01) && max_size > 1)
14084 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
14085 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
14086 offset += 1;
14088 return;
14090 if (max_size > 32)
14092 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
14093 return;
14095 if (max_size > 16)
14097 rtx label = ix86_expand_aligntest (count, 16, true);
14098 if (TARGET_64BIT)
14100 dest = change_address (destmem, DImode, destptr);
14101 emit_insn (gen_strset (destptr, dest, value));
14102 emit_insn (gen_strset (destptr, dest, value));
14104 else
14106 dest = change_address (destmem, SImode, destptr);
14107 emit_insn (gen_strset (destptr, dest, value));
14108 emit_insn (gen_strset (destptr, dest, value));
14109 emit_insn (gen_strset (destptr, dest, value));
14110 emit_insn (gen_strset (destptr, dest, value));
14112 emit_label (label);
14113 LABEL_NUSES (label) = 1;
14115 if (max_size > 8)
14117 rtx label = ix86_expand_aligntest (count, 8, true);
14118 if (TARGET_64BIT)
14120 dest = change_address (destmem, DImode, destptr);
14121 emit_insn (gen_strset (destptr, dest, value));
14123 else
14125 dest = change_address (destmem, SImode, destptr);
14126 emit_insn (gen_strset (destptr, dest, value));
14127 emit_insn (gen_strset (destptr, dest, value));
14129 emit_label (label);
14130 LABEL_NUSES (label) = 1;
14132 if (max_size > 4)
14134 rtx label = ix86_expand_aligntest (count, 4, true);
14135 dest = change_address (destmem, SImode, destptr);
14136 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
14137 emit_label (label);
14138 LABEL_NUSES (label) = 1;
14140 if (max_size > 2)
14142 rtx label = ix86_expand_aligntest (count, 2, true);
14143 dest = change_address (destmem, HImode, destptr);
14144 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
14145 emit_label (label);
14146 LABEL_NUSES (label) = 1;
14148 if (max_size > 1)
14150 rtx label = ix86_expand_aligntest (count, 1, true);
14151 dest = change_address (destmem, QImode, destptr);
14152 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
14153 emit_label (label);
14154 LABEL_NUSES (label) = 1;
14158 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
14159 DESIRED_ALIGNMENT. */
14160 static void
14161 expand_movmem_prologue (rtx destmem, rtx srcmem,
14162 rtx destptr, rtx srcptr, rtx count,
14163 int align, int desired_alignment)
14165 if (align <= 1 && desired_alignment > 1)
14167 rtx label = ix86_expand_aligntest (destptr, 1, false);
14168 srcmem = change_address (srcmem, QImode, srcptr);
14169 destmem = change_address (destmem, QImode, destptr);
14170 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14171 ix86_adjust_counter (count, 1);
14172 emit_label (label);
14173 LABEL_NUSES (label) = 1;
14175 if (align <= 2 && desired_alignment > 2)
14177 rtx label = ix86_expand_aligntest (destptr, 2, false);
14178 srcmem = change_address (srcmem, HImode, srcptr);
14179 destmem = change_address (destmem, HImode, destptr);
14180 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14181 ix86_adjust_counter (count, 2);
14182 emit_label (label);
14183 LABEL_NUSES (label) = 1;
14185 if (align <= 4 && desired_alignment > 4)
14187 rtx label = ix86_expand_aligntest (destptr, 4, false);
14188 srcmem = change_address (srcmem, SImode, srcptr);
14189 destmem = change_address (destmem, SImode, destptr);
14190 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14191 ix86_adjust_counter (count, 4);
14192 emit_label (label);
14193 LABEL_NUSES (label) = 1;
14195 gcc_assert (desired_alignment <= 8);
14198 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
14199 DESIRED_ALIGNMENT. */
14200 static void
14201 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
14202 int align, int desired_alignment)
14204 if (align <= 1 && desired_alignment > 1)
14206 rtx label = ix86_expand_aligntest (destptr, 1, false);
14207 destmem = change_address (destmem, QImode, destptr);
14208 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
14209 ix86_adjust_counter (count, 1);
14210 emit_label (label);
14211 LABEL_NUSES (label) = 1;
14213 if (align <= 2 && desired_alignment > 2)
14215 rtx label = ix86_expand_aligntest (destptr, 2, false);
14216 destmem = change_address (destmem, HImode, destptr);
14217 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
14218 ix86_adjust_counter (count, 2);
14219 emit_label (label);
14220 LABEL_NUSES (label) = 1;
14222 if (align <= 4 && desired_alignment > 4)
14224 rtx label = ix86_expand_aligntest (destptr, 4, false);
14225 destmem = change_address (destmem, SImode, destptr);
14226 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
14227 ix86_adjust_counter (count, 4);
14228 emit_label (label);
14229 LABEL_NUSES (label) = 1;
14231 gcc_assert (desired_alignment <= 8);
14234 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
14235 static enum stringop_alg
14236 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
14237 int *dynamic_check)
14239 const struct stringop_algs * algs;
14241 *dynamic_check = -1;
14242 if (memset)
14243 algs = &ix86_cost->memset[TARGET_64BIT != 0];
14244 else
14245 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
14246 if (stringop_alg != no_stringop)
14247 return stringop_alg;
14248 /* rep; movq or rep; movl is the smallest variant. */
14249 else if (optimize_size)
14251 if (!count || (count & 3))
14252 return rep_prefix_1_byte;
14253 else
14254 return rep_prefix_4_byte;
14256 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
14258 else if (expected_size != -1 && expected_size < 4)
14259 return loop_1_byte;
14260 else if (expected_size != -1)
14262 unsigned int i;
14263 enum stringop_alg alg = libcall;
14264 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
14266 gcc_assert (algs->size[i].max);
14267 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
14269 if (algs->size[i].alg != libcall)
14270 alg = algs->size[i].alg;
14271 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
14272 last non-libcall inline algorithm. */
14273 if (TARGET_INLINE_ALL_STRINGOPS)
14275 /* When the current size is best to be copied by a libcall,
14276 but we are still forced to inline, run the heuristic bellow
14277 that will pick code for medium sized blocks. */
14278 if (alg != libcall)
14279 return alg;
14280 break;
14282 else
14283 return algs->size[i].alg;
14286 gcc_assert (TARGET_INLINE_ALL_STRINGOPS);
14288 /* When asked to inline the call anyway, try to pick meaningful choice.
14289 We look for maximal size of block that is faster to copy by hand and
14290 take blocks of at most of that size guessing that average size will
14291 be roughly half of the block.
14293 If this turns out to be bad, we might simply specify the preferred
14294 choice in ix86_costs. */
14295 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
14296 && algs->unknown_size == libcall)
14298 int max = -1;
14299 enum stringop_alg alg;
14300 int i;
14302 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
14303 if (algs->size[i].alg != libcall && algs->size[i].alg)
14304 max = algs->size[i].max;
14305 if (max == -1)
14306 max = 4096;
14307 alg = decide_alg (count, max / 2, memset, dynamic_check);
14308 gcc_assert (*dynamic_check == -1);
14309 gcc_assert (alg != libcall);
14310 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
14311 *dynamic_check = max;
14312 return alg;
14314 return algs->unknown_size;
14317 /* Decide on alignment. We know that the operand is already aligned to ALIGN
14318 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
14319 static int
14320 decide_alignment (int align,
14321 enum stringop_alg alg,
14322 int expected_size)
14324 int desired_align = 0;
14325 switch (alg)
14327 case no_stringop:
14328 gcc_unreachable ();
14329 case loop:
14330 case unrolled_loop:
14331 desired_align = GET_MODE_SIZE (Pmode);
14332 break;
14333 case rep_prefix_8_byte:
14334 desired_align = 8;
14335 break;
14336 case rep_prefix_4_byte:
14337 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14338 copying whole cacheline at once. */
14339 if (TARGET_PENTIUMPRO)
14340 desired_align = 8;
14341 else
14342 desired_align = 4;
14343 break;
14344 case rep_prefix_1_byte:
14345 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14346 copying whole cacheline at once. */
14347 if (TARGET_PENTIUMPRO)
14348 desired_align = 8;
14349 else
14350 desired_align = 1;
14351 break;
14352 case loop_1_byte:
14353 desired_align = 1;
14354 break;
14355 case libcall:
14356 return 0;
14359 if (optimize_size)
14360 desired_align = 1;
14361 if (desired_align < align)
14362 desired_align = align;
14363 if (expected_size != -1 && expected_size < 4)
14364 desired_align = align;
14365 return desired_align;
14368 /* Return the smallest power of 2 greater than VAL. */
14369 static int
14370 smallest_pow2_greater_than (int val)
14372 int ret = 1;
14373 while (ret <= val)
14374 ret <<= 1;
14375 return ret;
14378 /* Expand string move (memcpy) operation. Use i386 string operations when
14379 profitable. expand_clrmem contains similar code. The code depends upon
14380 architecture, block size and alignment, but always has the same
14381 overall structure:
14383 1) Prologue guard: Conditional that jumps up to epilogues for small
14384 blocks that can be handled by epilogue alone. This is faster but
14385 also needed for correctness, since prologue assume the block is larger
14386 than the desired alignment.
14388 Optional dynamic check for size and libcall for large
14389 blocks is emitted here too, with -minline-stringops-dynamically.
14391 2) Prologue: copy first few bytes in order to get destination aligned
14392 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
14393 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
14394 We emit either a jump tree on power of two sized blocks, or a byte loop.
14396 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
14397 with specified algorithm.
14399 4) Epilogue: code copying tail of the block that is too small to be
14400 handled by main body (or up to size guarded by prologue guard). */
14403 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
14404 rtx expected_align_exp, rtx expected_size_exp)
14406 rtx destreg;
14407 rtx srcreg;
14408 rtx label = NULL;
14409 rtx tmp;
14410 rtx jump_around_label = NULL;
14411 HOST_WIDE_INT align = 1;
14412 unsigned HOST_WIDE_INT count = 0;
14413 HOST_WIDE_INT expected_size = -1;
14414 int size_needed = 0, epilogue_size_needed;
14415 int desired_align = 0;
14416 enum stringop_alg alg;
14417 int dynamic_check;
14419 if (CONST_INT_P (align_exp))
14420 align = INTVAL (align_exp);
14421 /* i386 can do misaligned access on reasonably increased cost. */
14422 if (CONST_INT_P (expected_align_exp)
14423 && INTVAL (expected_align_exp) > align)
14424 align = INTVAL (expected_align_exp);
14425 if (CONST_INT_P (count_exp))
14426 count = expected_size = INTVAL (count_exp);
14427 if (CONST_INT_P (expected_size_exp) && count == 0)
14428 expected_size = INTVAL (expected_size_exp);
14430 /* Step 0: Decide on preferred algorithm, desired alignment and
14431 size of chunks to be copied by main loop. */
14433 alg = decide_alg (count, expected_size, false, &dynamic_check);
14434 desired_align = decide_alignment (align, alg, expected_size);
14436 if (!TARGET_ALIGN_STRINGOPS)
14437 align = desired_align;
14439 if (alg == libcall)
14440 return 0;
14441 gcc_assert (alg != no_stringop);
14442 if (!count)
14443 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
14444 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14445 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
14446 switch (alg)
14448 case libcall:
14449 case no_stringop:
14450 gcc_unreachable ();
14451 case loop:
14452 size_needed = GET_MODE_SIZE (Pmode);
14453 break;
14454 case unrolled_loop:
14455 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
14456 break;
14457 case rep_prefix_8_byte:
14458 size_needed = 8;
14459 break;
14460 case rep_prefix_4_byte:
14461 size_needed = 4;
14462 break;
14463 case rep_prefix_1_byte:
14464 case loop_1_byte:
14465 size_needed = 1;
14466 break;
14469 epilogue_size_needed = size_needed;
14471 /* Step 1: Prologue guard. */
14473 /* Alignment code needs count to be in register. */
14474 if (CONST_INT_P (count_exp) && desired_align > align)
14476 enum machine_mode mode = SImode;
14477 if (TARGET_64BIT && (count & ~0xffffffff))
14478 mode = DImode;
14479 count_exp = force_reg (mode, count_exp);
14481 gcc_assert (desired_align >= 1 && align >= 1);
14483 /* Ensure that alignment prologue won't copy past end of block. */
14484 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
14486 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14487 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14488 Make sure it is power of 2. */
14489 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14491 label = gen_label_rtx ();
14492 emit_cmp_and_jump_insns (count_exp,
14493 GEN_INT (epilogue_size_needed),
14494 LTU, 0, counter_mode (count_exp), 1, label);
14495 if (GET_CODE (count_exp) == CONST_INT)
14497 else if (expected_size == -1 || expected_size < epilogue_size_needed)
14498 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14499 else
14500 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14502 /* Emit code to decide on runtime whether library call or inline should be
14503 used. */
14504 if (dynamic_check != -1)
14506 rtx hot_label = gen_label_rtx ();
14507 jump_around_label = gen_label_rtx ();
14508 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14509 LEU, 0, GET_MODE (count_exp), 1, hot_label);
14510 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14511 emit_block_move_via_libcall (dst, src, count_exp, false);
14512 emit_jump (jump_around_label);
14513 emit_label (hot_label);
14516 /* Step 2: Alignment prologue. */
14518 if (desired_align > align)
14520 /* Except for the first move in epilogue, we no longer know
14521 constant offset in aliasing info. It don't seems to worth
14522 the pain to maintain it for the first move, so throw away
14523 the info early. */
14524 src = change_address (src, BLKmode, srcreg);
14525 dst = change_address (dst, BLKmode, destreg);
14526 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
14527 desired_align);
14529 if (label && size_needed == 1)
14531 emit_label (label);
14532 LABEL_NUSES (label) = 1;
14533 label = NULL;
14536 /* Step 3: Main loop. */
14538 switch (alg)
14540 case libcall:
14541 case no_stringop:
14542 gcc_unreachable ();
14543 case loop_1_byte:
14544 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14545 count_exp, QImode, 1, expected_size);
14546 break;
14547 case loop:
14548 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14549 count_exp, Pmode, 1, expected_size);
14550 break;
14551 case unrolled_loop:
14552 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
14553 registers for 4 temporaries anyway. */
14554 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14555 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
14556 expected_size);
14557 break;
14558 case rep_prefix_8_byte:
14559 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14560 DImode);
14561 break;
14562 case rep_prefix_4_byte:
14563 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14564 SImode);
14565 break;
14566 case rep_prefix_1_byte:
14567 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14568 QImode);
14569 break;
14571 /* Adjust properly the offset of src and dest memory for aliasing. */
14572 if (CONST_INT_P (count_exp))
14574 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
14575 (count / size_needed) * size_needed);
14576 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14577 (count / size_needed) * size_needed);
14579 else
14581 src = change_address (src, BLKmode, srcreg);
14582 dst = change_address (dst, BLKmode, destreg);
14585 /* Step 4: Epilogue to copy the remaining bytes. */
14587 if (label)
14589 /* When the main loop is done, COUNT_EXP might hold original count,
14590 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14591 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14592 bytes. Compensate if needed. */
14594 if (size_needed < epilogue_size_needed)
14596 tmp =
14597 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
14598 GEN_INT (size_needed - 1), count_exp, 1,
14599 OPTAB_DIRECT);
14600 if (tmp != count_exp)
14601 emit_move_insn (count_exp, tmp);
14603 emit_label (label);
14604 LABEL_NUSES (label) = 1;
14607 if (count_exp != const0_rtx && epilogue_size_needed > 1)
14608 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
14609 epilogue_size_needed);
14610 if (jump_around_label)
14611 emit_label (jump_around_label);
14612 return 1;
14615 /* Helper function for memcpy. For QImode value 0xXY produce
14616 0xXYXYXYXY of wide specified by MODE. This is essentially
14617 a * 0x10101010, but we can do slightly better than
14618 synth_mult by unwinding the sequence by hand on CPUs with
14619 slow multiply. */
14620 static rtx
14621 promote_duplicated_reg (enum machine_mode mode, rtx val)
14623 enum machine_mode valmode = GET_MODE (val);
14624 rtx tmp;
14625 int nops = mode == DImode ? 3 : 2;
14627 gcc_assert (mode == SImode || mode == DImode);
14628 if (val == const0_rtx)
14629 return copy_to_mode_reg (mode, const0_rtx);
14630 if (CONST_INT_P (val))
14632 HOST_WIDE_INT v = INTVAL (val) & 255;
14634 v |= v << 8;
14635 v |= v << 16;
14636 if (mode == DImode)
14637 v |= (v << 16) << 16;
14638 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
14641 if (valmode == VOIDmode)
14642 valmode = QImode;
14643 if (valmode != QImode)
14644 val = gen_lowpart (QImode, val);
14645 if (mode == QImode)
14646 return val;
14647 if (!TARGET_PARTIAL_REG_STALL)
14648 nops--;
14649 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
14650 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
14651 <= (ix86_cost->shift_const + ix86_cost->add) * nops
14652 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
14654 rtx reg = convert_modes (mode, QImode, val, true);
14655 tmp = promote_duplicated_reg (mode, const1_rtx);
14656 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
14657 OPTAB_DIRECT);
14659 else
14661 rtx reg = convert_modes (mode, QImode, val, true);
14663 if (!TARGET_PARTIAL_REG_STALL)
14664 if (mode == SImode)
14665 emit_insn (gen_movsi_insv_1 (reg, reg));
14666 else
14667 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
14668 else
14670 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
14671 NULL, 1, OPTAB_DIRECT);
14672 reg =
14673 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14675 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
14676 NULL, 1, OPTAB_DIRECT);
14677 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14678 if (mode == SImode)
14679 return reg;
14680 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
14681 NULL, 1, OPTAB_DIRECT);
14682 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14683 return reg;
14687 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
14688 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
14689 alignment from ALIGN to DESIRED_ALIGN. */
14690 static rtx
14691 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
14693 rtx promoted_val;
14695 if (TARGET_64BIT
14696 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
14697 promoted_val = promote_duplicated_reg (DImode, val);
14698 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
14699 promoted_val = promote_duplicated_reg (SImode, val);
14700 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
14701 promoted_val = promote_duplicated_reg (HImode, val);
14702 else
14703 promoted_val = val;
14705 return promoted_val;
14708 /* Expand string clear operation (bzero). Use i386 string operations when
14709 profitable. See expand_movmem comment for explanation of individual
14710 steps performed. */
14712 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
14713 rtx expected_align_exp, rtx expected_size_exp)
14715 rtx destreg;
14716 rtx label = NULL;
14717 rtx tmp;
14718 rtx jump_around_label = NULL;
14719 HOST_WIDE_INT align = 1;
14720 unsigned HOST_WIDE_INT count = 0;
14721 HOST_WIDE_INT expected_size = -1;
14722 int size_needed = 0, epilogue_size_needed;
14723 int desired_align = 0;
14724 enum stringop_alg alg;
14725 rtx promoted_val = NULL;
14726 bool force_loopy_epilogue = false;
14727 int dynamic_check;
14729 if (CONST_INT_P (align_exp))
14730 align = INTVAL (align_exp);
14731 /* i386 can do misaligned access on reasonably increased cost. */
14732 if (CONST_INT_P (expected_align_exp)
14733 && INTVAL (expected_align_exp) > align)
14734 align = INTVAL (expected_align_exp);
14735 if (CONST_INT_P (count_exp))
14736 count = expected_size = INTVAL (count_exp);
14737 if (CONST_INT_P (expected_size_exp) && count == 0)
14738 expected_size = INTVAL (expected_size_exp);
14740 /* Step 0: Decide on preferred algorithm, desired alignment and
14741 size of chunks to be copied by main loop. */
14743 alg = decide_alg (count, expected_size, true, &dynamic_check);
14744 desired_align = decide_alignment (align, alg, expected_size);
14746 if (!TARGET_ALIGN_STRINGOPS)
14747 align = desired_align;
14749 if (alg == libcall)
14750 return 0;
14751 gcc_assert (alg != no_stringop);
14752 if (!count)
14753 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
14754 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14755 switch (alg)
14757 case libcall:
14758 case no_stringop:
14759 gcc_unreachable ();
14760 case loop:
14761 size_needed = GET_MODE_SIZE (Pmode);
14762 break;
14763 case unrolled_loop:
14764 size_needed = GET_MODE_SIZE (Pmode) * 4;
14765 break;
14766 case rep_prefix_8_byte:
14767 size_needed = 8;
14768 break;
14769 case rep_prefix_4_byte:
14770 size_needed = 4;
14771 break;
14772 case rep_prefix_1_byte:
14773 case loop_1_byte:
14774 size_needed = 1;
14775 break;
14777 epilogue_size_needed = size_needed;
14779 /* Step 1: Prologue guard. */
14781 /* Alignment code needs count to be in register. */
14782 if (CONST_INT_P (count_exp) && desired_align > align)
14784 enum machine_mode mode = SImode;
14785 if (TARGET_64BIT && (count & ~0xffffffff))
14786 mode = DImode;
14787 count_exp = force_reg (mode, count_exp);
14789 /* Do the cheap promotion to allow better CSE across the
14790 main loop and epilogue (ie one load of the big constant in the
14791 front of all code. */
14792 if (CONST_INT_P (val_exp))
14793 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
14794 desired_align, align);
14795 /* Ensure that alignment prologue won't copy past end of block. */
14796 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
14798 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14799 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14800 Make sure it is power of 2. */
14801 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14803 /* To improve performance of small blocks, we jump around the VAL
14804 promoting mode. This mean that if the promoted VAL is not constant,
14805 we might not use it in the epilogue and have to use byte
14806 loop variant. */
14807 if (epilogue_size_needed > 2 && !promoted_val)
14808 force_loopy_epilogue = true;
14809 label = gen_label_rtx ();
14810 emit_cmp_and_jump_insns (count_exp,
14811 GEN_INT (epilogue_size_needed),
14812 LTU, 0, counter_mode (count_exp), 1, label);
14813 if (GET_CODE (count_exp) == CONST_INT)
14815 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
14816 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14817 else
14818 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14820 if (dynamic_check != -1)
14822 rtx hot_label = gen_label_rtx ();
14823 jump_around_label = gen_label_rtx ();
14824 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14825 LEU, 0, counter_mode (count_exp), 1, hot_label);
14826 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14827 set_storage_via_libcall (dst, count_exp, val_exp, false);
14828 emit_jump (jump_around_label);
14829 emit_label (hot_label);
14832 /* Step 2: Alignment prologue. */
14834 /* Do the expensive promotion once we branched off the small blocks. */
14835 if (!promoted_val)
14836 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
14837 desired_align, align);
14838 gcc_assert (desired_align >= 1 && align >= 1);
14840 if (desired_align > align)
14842 /* Except for the first move in epilogue, we no longer know
14843 constant offset in aliasing info. It don't seems to worth
14844 the pain to maintain it for the first move, so throw away
14845 the info early. */
14846 dst = change_address (dst, BLKmode, destreg);
14847 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
14848 desired_align);
14850 if (label && size_needed == 1)
14852 emit_label (label);
14853 LABEL_NUSES (label) = 1;
14854 label = NULL;
14857 /* Step 3: Main loop. */
14859 switch (alg)
14861 case libcall:
14862 case no_stringop:
14863 gcc_unreachable ();
14864 case loop_1_byte:
14865 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14866 count_exp, QImode, 1, expected_size);
14867 break;
14868 case loop:
14869 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14870 count_exp, Pmode, 1, expected_size);
14871 break;
14872 case unrolled_loop:
14873 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14874 count_exp, Pmode, 4, expected_size);
14875 break;
14876 case rep_prefix_8_byte:
14877 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14878 DImode);
14879 break;
14880 case rep_prefix_4_byte:
14881 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14882 SImode);
14883 break;
14884 case rep_prefix_1_byte:
14885 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14886 QImode);
14887 break;
14889 /* Adjust properly the offset of src and dest memory for aliasing. */
14890 if (CONST_INT_P (count_exp))
14891 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14892 (count / size_needed) * size_needed);
14893 else
14894 dst = change_address (dst, BLKmode, destreg);
14896 /* Step 4: Epilogue to copy the remaining bytes. */
14898 if (label)
14900 /* When the main loop is done, COUNT_EXP might hold original count,
14901 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14902 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14903 bytes. Compensate if needed. */
14905 if (size_needed < desired_align - align)
14907 tmp =
14908 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
14909 GEN_INT (size_needed - 1), count_exp, 1,
14910 OPTAB_DIRECT);
14911 size_needed = desired_align - align + 1;
14912 if (tmp != count_exp)
14913 emit_move_insn (count_exp, tmp);
14915 emit_label (label);
14916 LABEL_NUSES (label) = 1;
14918 if (count_exp != const0_rtx && epilogue_size_needed > 1)
14920 if (force_loopy_epilogue)
14921 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
14922 size_needed);
14923 else
14924 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
14925 size_needed);
14927 if (jump_around_label)
14928 emit_label (jump_around_label);
14929 return 1;
14932 /* Expand the appropriate insns for doing strlen if not just doing
14933 repnz; scasb
14935 out = result, initialized with the start address
14936 align_rtx = alignment of the address.
14937 scratch = scratch register, initialized with the startaddress when
14938 not aligned, otherwise undefined
14940 This is just the body. It needs the initializations mentioned above and
14941 some address computing at the end. These things are done in i386.md. */
14943 static void
14944 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
14946 int align;
14947 rtx tmp;
14948 rtx align_2_label = NULL_RTX;
14949 rtx align_3_label = NULL_RTX;
14950 rtx align_4_label = gen_label_rtx ();
14951 rtx end_0_label = gen_label_rtx ();
14952 rtx mem;
14953 rtx tmpreg = gen_reg_rtx (SImode);
14954 rtx scratch = gen_reg_rtx (SImode);
14955 rtx cmp;
14957 align = 0;
14958 if (CONST_INT_P (align_rtx))
14959 align = INTVAL (align_rtx);
14961 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
14963 /* Is there a known alignment and is it less than 4? */
14964 if (align < 4)
14966 rtx scratch1 = gen_reg_rtx (Pmode);
14967 emit_move_insn (scratch1, out);
14968 /* Is there a known alignment and is it not 2? */
14969 if (align != 2)
14971 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
14972 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
14974 /* Leave just the 3 lower bits. */
14975 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
14976 NULL_RTX, 0, OPTAB_WIDEN);
14978 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
14979 Pmode, 1, align_4_label);
14980 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
14981 Pmode, 1, align_2_label);
14982 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
14983 Pmode, 1, align_3_label);
14985 else
14987 /* Since the alignment is 2, we have to check 2 or 0 bytes;
14988 check if is aligned to 4 - byte. */
14990 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
14991 NULL_RTX, 0, OPTAB_WIDEN);
14993 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
14994 Pmode, 1, align_4_label);
14997 mem = change_address (src, QImode, out);
14999 /* Now compare the bytes. */
15001 /* Compare the first n unaligned byte on a byte per byte basis. */
15002 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
15003 QImode, 1, end_0_label);
15005 /* Increment the address. */
15006 if (TARGET_64BIT)
15007 emit_insn (gen_adddi3 (out, out, const1_rtx));
15008 else
15009 emit_insn (gen_addsi3 (out, out, const1_rtx));
15011 /* Not needed with an alignment of 2 */
15012 if (align != 2)
15014 emit_label (align_2_label);
15016 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
15017 end_0_label);
15019 if (TARGET_64BIT)
15020 emit_insn (gen_adddi3 (out, out, const1_rtx));
15021 else
15022 emit_insn (gen_addsi3 (out, out, const1_rtx));
15024 emit_label (align_3_label);
15027 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
15028 end_0_label);
15030 if (TARGET_64BIT)
15031 emit_insn (gen_adddi3 (out, out, const1_rtx));
15032 else
15033 emit_insn (gen_addsi3 (out, out, const1_rtx));
15036 /* Generate loop to check 4 bytes at a time. It is not a good idea to
15037 align this loop. It gives only huge programs, but does not help to
15038 speed up. */
15039 emit_label (align_4_label);
15041 mem = change_address (src, SImode, out);
15042 emit_move_insn (scratch, mem);
15043 if (TARGET_64BIT)
15044 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
15045 else
15046 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
15048 /* This formula yields a nonzero result iff one of the bytes is zero.
15049 This saves three branches inside loop and many cycles. */
15051 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
15052 emit_insn (gen_one_cmplsi2 (scratch, scratch));
15053 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
15054 emit_insn (gen_andsi3 (tmpreg, tmpreg,
15055 gen_int_mode (0x80808080, SImode)));
15056 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
15057 align_4_label);
15059 if (TARGET_CMOVE)
15061 rtx reg = gen_reg_rtx (SImode);
15062 rtx reg2 = gen_reg_rtx (Pmode);
15063 emit_move_insn (reg, tmpreg);
15064 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
15066 /* If zero is not in the first two bytes, move two bytes forward. */
15067 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
15068 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15069 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
15070 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
15071 gen_rtx_IF_THEN_ELSE (SImode, tmp,
15072 reg,
15073 tmpreg)));
15074 /* Emit lea manually to avoid clobbering of flags. */
15075 emit_insn (gen_rtx_SET (SImode, reg2,
15076 gen_rtx_PLUS (Pmode, out, const2_rtx)));
15078 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15079 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
15080 emit_insn (gen_rtx_SET (VOIDmode, out,
15081 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
15082 reg2,
15083 out)));
15086 else
15088 rtx end_2_label = gen_label_rtx ();
15089 /* Is zero in the first two bytes? */
15091 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
15092 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15093 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
15094 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
15095 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
15096 pc_rtx);
15097 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
15098 JUMP_LABEL (tmp) = end_2_label;
15100 /* Not in the first two. Move two bytes forward. */
15101 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
15102 if (TARGET_64BIT)
15103 emit_insn (gen_adddi3 (out, out, const2_rtx));
15104 else
15105 emit_insn (gen_addsi3 (out, out, const2_rtx));
15107 emit_label (end_2_label);
15111 /* Avoid branch in fixing the byte. */
15112 tmpreg = gen_lowpart (QImode, tmpreg);
15113 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
15114 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
15115 if (TARGET_64BIT)
15116 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
15117 else
15118 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
15120 emit_label (end_0_label);
15123 /* Expand strlen. */
15126 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
15128 rtx addr, scratch1, scratch2, scratch3, scratch4;
15130 /* The generic case of strlen expander is long. Avoid it's
15131 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
15133 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
15134 && !TARGET_INLINE_ALL_STRINGOPS
15135 && !optimize_size
15136 && (!CONST_INT_P (align) || INTVAL (align) < 4))
15137 return 0;
15139 addr = force_reg (Pmode, XEXP (src, 0));
15140 scratch1 = gen_reg_rtx (Pmode);
15142 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
15143 && !optimize_size)
15145 /* Well it seems that some optimizer does not combine a call like
15146 foo(strlen(bar), strlen(bar));
15147 when the move and the subtraction is done here. It does calculate
15148 the length just once when these instructions are done inside of
15149 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
15150 often used and I use one fewer register for the lifetime of
15151 output_strlen_unroll() this is better. */
15153 emit_move_insn (out, addr);
15155 ix86_expand_strlensi_unroll_1 (out, src, align);
15157 /* strlensi_unroll_1 returns the address of the zero at the end of
15158 the string, like memchr(), so compute the length by subtracting
15159 the start address. */
15160 if (TARGET_64BIT)
15161 emit_insn (gen_subdi3 (out, out, addr));
15162 else
15163 emit_insn (gen_subsi3 (out, out, addr));
15165 else
15167 rtx unspec;
15168 scratch2 = gen_reg_rtx (Pmode);
15169 scratch3 = gen_reg_rtx (Pmode);
15170 scratch4 = force_reg (Pmode, constm1_rtx);
15172 emit_move_insn (scratch3, addr);
15173 eoschar = force_reg (QImode, eoschar);
15175 src = replace_equiv_address_nv (src, scratch3);
15177 /* If .md starts supporting :P, this can be done in .md. */
15178 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
15179 scratch4), UNSPEC_SCAS);
15180 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
15181 if (TARGET_64BIT)
15183 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
15184 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
15186 else
15188 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
15189 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
15192 return 1;
15195 /* For given symbol (function) construct code to compute address of it's PLT
15196 entry in large x86-64 PIC model. */
15198 construct_plt_address (rtx symbol)
15200 rtx tmp = gen_reg_rtx (Pmode);
15201 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
15203 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
15204 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
15206 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
15207 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
15208 return tmp;
15211 void
15212 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
15213 rtx callarg2 ATTRIBUTE_UNUSED,
15214 rtx pop, int sibcall)
15216 rtx use = NULL, call;
15218 if (pop == const0_rtx)
15219 pop = NULL;
15220 gcc_assert (!TARGET_64BIT || !pop);
15222 if (TARGET_MACHO && !TARGET_64BIT)
15224 #if TARGET_MACHO
15225 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
15226 fnaddr = machopic_indirect_call_target (fnaddr);
15227 #endif
15229 else
15231 /* Static functions and indirect calls don't need the pic register. */
15232 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
15233 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
15234 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
15235 use_reg (&use, pic_offset_table_rtx);
15238 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
15240 rtx al = gen_rtx_REG (QImode, 0);
15241 emit_move_insn (al, callarg2);
15242 use_reg (&use, al);
15245 if (ix86_cmodel == CM_LARGE_PIC
15246 && GET_CODE (fnaddr) == MEM
15247 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
15248 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
15249 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
15250 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
15252 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
15253 fnaddr = gen_rtx_MEM (QImode, fnaddr);
15255 if (sibcall && TARGET_64BIT
15256 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
15258 rtx addr;
15259 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
15260 fnaddr = gen_rtx_REG (Pmode, R11_REG);
15261 emit_move_insn (fnaddr, addr);
15262 fnaddr = gen_rtx_MEM (QImode, fnaddr);
15265 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
15266 if (retval)
15267 call = gen_rtx_SET (VOIDmode, retval, call);
15268 if (pop)
15270 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
15271 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
15272 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
15275 call = emit_call_insn (call);
15276 if (use)
15277 CALL_INSN_FUNCTION_USAGE (call) = use;
15281 /* Clear stack slot assignments remembered from previous functions.
15282 This is called from INIT_EXPANDERS once before RTL is emitted for each
15283 function. */
15285 static struct machine_function *
15286 ix86_init_machine_status (void)
15288 struct machine_function *f;
15290 f = ggc_alloc_cleared (sizeof (struct machine_function));
15291 f->use_fast_prologue_epilogue_nregs = -1;
15292 f->tls_descriptor_call_expanded_p = 0;
15294 return f;
15297 /* Return a MEM corresponding to a stack slot with mode MODE.
15298 Allocate a new slot if necessary.
15300 The RTL for a function can have several slots available: N is
15301 which slot to use. */
15304 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
15306 struct stack_local_entry *s;
15308 gcc_assert (n < MAX_386_STACK_LOCALS);
15310 for (s = ix86_stack_locals; s; s = s->next)
15311 if (s->mode == mode && s->n == n)
15312 return copy_rtx (s->rtl);
15314 s = (struct stack_local_entry *)
15315 ggc_alloc (sizeof (struct stack_local_entry));
15316 s->n = n;
15317 s->mode = mode;
15318 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
15320 s->next = ix86_stack_locals;
15321 ix86_stack_locals = s;
15322 return s->rtl;
15325 /* Construct the SYMBOL_REF for the tls_get_addr function. */
15327 static GTY(()) rtx ix86_tls_symbol;
15329 ix86_tls_get_addr (void)
15332 if (!ix86_tls_symbol)
15334 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
15335 (TARGET_ANY_GNU_TLS
15336 && !TARGET_64BIT)
15337 ? "___tls_get_addr"
15338 : "__tls_get_addr");
15341 return ix86_tls_symbol;
15344 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
15346 static GTY(()) rtx ix86_tls_module_base_symbol;
15348 ix86_tls_module_base (void)
15351 if (!ix86_tls_module_base_symbol)
15353 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
15354 "_TLS_MODULE_BASE_");
15355 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
15356 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
15359 return ix86_tls_module_base_symbol;
15362 /* Calculate the length of the memory address in the instruction
15363 encoding. Does not include the one-byte modrm, opcode, or prefix. */
15366 memory_address_length (rtx addr)
15368 struct ix86_address parts;
15369 rtx base, index, disp;
15370 int len;
15371 int ok;
15373 if (GET_CODE (addr) == PRE_DEC
15374 || GET_CODE (addr) == POST_INC
15375 || GET_CODE (addr) == PRE_MODIFY
15376 || GET_CODE (addr) == POST_MODIFY)
15377 return 0;
15379 ok = ix86_decompose_address (addr, &parts);
15380 gcc_assert (ok);
15382 if (parts.base && GET_CODE (parts.base) == SUBREG)
15383 parts.base = SUBREG_REG (parts.base);
15384 if (parts.index && GET_CODE (parts.index) == SUBREG)
15385 parts.index = SUBREG_REG (parts.index);
15387 base = parts.base;
15388 index = parts.index;
15389 disp = parts.disp;
15390 len = 0;
15392 /* Rule of thumb:
15393 - esp as the base always wants an index,
15394 - ebp as the base always wants a displacement. */
15396 /* Register Indirect. */
15397 if (base && !index && !disp)
15399 /* esp (for its index) and ebp (for its displacement) need
15400 the two-byte modrm form. */
15401 if (addr == stack_pointer_rtx
15402 || addr == arg_pointer_rtx
15403 || addr == frame_pointer_rtx
15404 || addr == hard_frame_pointer_rtx)
15405 len = 1;
15408 /* Direct Addressing. */
15409 else if (disp && !base && !index)
15410 len = 4;
15412 else
15414 /* Find the length of the displacement constant. */
15415 if (disp)
15417 if (base && satisfies_constraint_K (disp))
15418 len = 1;
15419 else
15420 len = 4;
15422 /* ebp always wants a displacement. */
15423 else if (base == hard_frame_pointer_rtx)
15424 len = 1;
15426 /* An index requires the two-byte modrm form.... */
15427 if (index
15428 /* ...like esp, which always wants an index. */
15429 || base == stack_pointer_rtx
15430 || base == arg_pointer_rtx
15431 || base == frame_pointer_rtx)
15432 len += 1;
15435 return len;
15438 /* Compute default value for "length_immediate" attribute. When SHORTFORM
15439 is set, expect that insn have 8bit immediate alternative. */
15441 ix86_attr_length_immediate_default (rtx insn, int shortform)
15443 int len = 0;
15444 int i;
15445 extract_insn_cached (insn);
15446 for (i = recog_data.n_operands - 1; i >= 0; --i)
15447 if (CONSTANT_P (recog_data.operand[i]))
15449 gcc_assert (!len);
15450 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
15451 len = 1;
15452 else
15454 switch (get_attr_mode (insn))
15456 case MODE_QI:
15457 len+=1;
15458 break;
15459 case MODE_HI:
15460 len+=2;
15461 break;
15462 case MODE_SI:
15463 len+=4;
15464 break;
15465 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
15466 case MODE_DI:
15467 len+=4;
15468 break;
15469 default:
15470 fatal_insn ("unknown insn mode", insn);
15474 return len;
15476 /* Compute default value for "length_address" attribute. */
15478 ix86_attr_length_address_default (rtx insn)
15480 int i;
15482 if (get_attr_type (insn) == TYPE_LEA)
15484 rtx set = PATTERN (insn);
15486 if (GET_CODE (set) == PARALLEL)
15487 set = XVECEXP (set, 0, 0);
15489 gcc_assert (GET_CODE (set) == SET);
15491 return memory_address_length (SET_SRC (set));
15494 extract_insn_cached (insn);
15495 for (i = recog_data.n_operands - 1; i >= 0; --i)
15496 if (MEM_P (recog_data.operand[i]))
15498 return memory_address_length (XEXP (recog_data.operand[i], 0));
15499 break;
15501 return 0;
15504 /* Return the maximum number of instructions a cpu can issue. */
15506 static int
15507 ix86_issue_rate (void)
15509 switch (ix86_tune)
15511 case PROCESSOR_PENTIUM:
15512 case PROCESSOR_K6:
15513 return 2;
15515 case PROCESSOR_PENTIUMPRO:
15516 case PROCESSOR_PENTIUM4:
15517 case PROCESSOR_ATHLON:
15518 case PROCESSOR_K8:
15519 case PROCESSOR_AMDFAM10:
15520 case PROCESSOR_NOCONA:
15521 case PROCESSOR_GENERIC32:
15522 case PROCESSOR_GENERIC64:
15523 return 3;
15525 case PROCESSOR_CORE2:
15526 return 4;
15528 default:
15529 return 1;
15533 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
15534 by DEP_INSN and nothing set by DEP_INSN. */
15536 static int
15537 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15539 rtx set, set2;
15541 /* Simplify the test for uninteresting insns. */
15542 if (insn_type != TYPE_SETCC
15543 && insn_type != TYPE_ICMOV
15544 && insn_type != TYPE_FCMOV
15545 && insn_type != TYPE_IBR)
15546 return 0;
15548 if ((set = single_set (dep_insn)) != 0)
15550 set = SET_DEST (set);
15551 set2 = NULL_RTX;
15553 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
15554 && XVECLEN (PATTERN (dep_insn), 0) == 2
15555 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
15556 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
15558 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15559 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15561 else
15562 return 0;
15564 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
15565 return 0;
15567 /* This test is true if the dependent insn reads the flags but
15568 not any other potentially set register. */
15569 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
15570 return 0;
15572 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
15573 return 0;
15575 return 1;
15578 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
15579 address with operands set by DEP_INSN. */
15581 static int
15582 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15584 rtx addr;
15586 if (insn_type == TYPE_LEA
15587 && TARGET_PENTIUM)
15589 addr = PATTERN (insn);
15591 if (GET_CODE (addr) == PARALLEL)
15592 addr = XVECEXP (addr, 0, 0);
15594 gcc_assert (GET_CODE (addr) == SET);
15596 addr = SET_SRC (addr);
15598 else
15600 int i;
15601 extract_insn_cached (insn);
15602 for (i = recog_data.n_operands - 1; i >= 0; --i)
15603 if (MEM_P (recog_data.operand[i]))
15605 addr = XEXP (recog_data.operand[i], 0);
15606 goto found;
15608 return 0;
15609 found:;
15612 return modified_in_p (addr, dep_insn);
15615 static int
15616 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
15618 enum attr_type insn_type, dep_insn_type;
15619 enum attr_memory memory;
15620 rtx set, set2;
15621 int dep_insn_code_number;
15623 /* Anti and output dependencies have zero cost on all CPUs. */
15624 if (REG_NOTE_KIND (link) != 0)
15625 return 0;
15627 dep_insn_code_number = recog_memoized (dep_insn);
15629 /* If we can't recognize the insns, we can't really do anything. */
15630 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
15631 return cost;
15633 insn_type = get_attr_type (insn);
15634 dep_insn_type = get_attr_type (dep_insn);
15636 switch (ix86_tune)
15638 case PROCESSOR_PENTIUM:
15639 /* Address Generation Interlock adds a cycle of latency. */
15640 if (ix86_agi_dependent (insn, dep_insn, insn_type))
15641 cost += 1;
15643 /* ??? Compares pair with jump/setcc. */
15644 if (ix86_flags_dependent (insn, dep_insn, insn_type))
15645 cost = 0;
15647 /* Floating point stores require value to be ready one cycle earlier. */
15648 if (insn_type == TYPE_FMOV
15649 && get_attr_memory (insn) == MEMORY_STORE
15650 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15651 cost += 1;
15652 break;
15654 case PROCESSOR_PENTIUMPRO:
15655 memory = get_attr_memory (insn);
15657 /* INT->FP conversion is expensive. */
15658 if (get_attr_fp_int_src (dep_insn))
15659 cost += 5;
15661 /* There is one cycle extra latency between an FP op and a store. */
15662 if (insn_type == TYPE_FMOV
15663 && (set = single_set (dep_insn)) != NULL_RTX
15664 && (set2 = single_set (insn)) != NULL_RTX
15665 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
15666 && MEM_P (SET_DEST (set2)))
15667 cost += 1;
15669 /* Show ability of reorder buffer to hide latency of load by executing
15670 in parallel with previous instruction in case
15671 previous instruction is not needed to compute the address. */
15672 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15673 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15675 /* Claim moves to take one cycle, as core can issue one load
15676 at time and the next load can start cycle later. */
15677 if (dep_insn_type == TYPE_IMOV
15678 || dep_insn_type == TYPE_FMOV)
15679 cost = 1;
15680 else if (cost > 1)
15681 cost--;
15683 break;
15685 case PROCESSOR_K6:
15686 memory = get_attr_memory (insn);
15688 /* The esp dependency is resolved before the instruction is really
15689 finished. */
15690 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
15691 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
15692 return 1;
15694 /* INT->FP conversion is expensive. */
15695 if (get_attr_fp_int_src (dep_insn))
15696 cost += 5;
15698 /* Show ability of reorder buffer to hide latency of load by executing
15699 in parallel with previous instruction in case
15700 previous instruction is not needed to compute the address. */
15701 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15702 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15704 /* Claim moves to take one cycle, as core can issue one load
15705 at time and the next load can start cycle later. */
15706 if (dep_insn_type == TYPE_IMOV
15707 || dep_insn_type == TYPE_FMOV)
15708 cost = 1;
15709 else if (cost > 2)
15710 cost -= 2;
15711 else
15712 cost = 1;
15714 break;
15716 case PROCESSOR_ATHLON:
15717 case PROCESSOR_K8:
15718 case PROCESSOR_AMDFAM10:
15719 case PROCESSOR_GENERIC32:
15720 case PROCESSOR_GENERIC64:
15721 memory = get_attr_memory (insn);
15723 /* Show ability of reorder buffer to hide latency of load by executing
15724 in parallel with previous instruction in case
15725 previous instruction is not needed to compute the address. */
15726 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15727 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15729 enum attr_unit unit = get_attr_unit (insn);
15730 int loadcost = 3;
15732 /* Because of the difference between the length of integer and
15733 floating unit pipeline preparation stages, the memory operands
15734 for floating point are cheaper.
15736 ??? For Athlon it the difference is most probably 2. */
15737 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
15738 loadcost = 3;
15739 else
15740 loadcost = TARGET_ATHLON ? 2 : 0;
15742 if (cost >= loadcost)
15743 cost -= loadcost;
15744 else
15745 cost = 0;
15748 default:
15749 break;
15752 return cost;
15755 /* How many alternative schedules to try. This should be as wide as the
15756 scheduling freedom in the DFA, but no wider. Making this value too
15757 large results extra work for the scheduler. */
15759 static int
15760 ia32_multipass_dfa_lookahead (void)
15762 if (ix86_tune == PROCESSOR_PENTIUM)
15763 return 2;
15765 if (ix86_tune == PROCESSOR_PENTIUMPRO
15766 || ix86_tune == PROCESSOR_K6)
15767 return 1;
15769 else
15770 return 0;
15774 /* Compute the alignment given to a constant that is being placed in memory.
15775 EXP is the constant and ALIGN is the alignment that the object would
15776 ordinarily have.
15777 The value of this function is used instead of that alignment to align
15778 the object. */
15781 ix86_constant_alignment (tree exp, int align)
15783 if (TREE_CODE (exp) == REAL_CST)
15785 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
15786 return 64;
15787 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
15788 return 128;
15790 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
15791 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
15792 return BITS_PER_WORD;
15794 return align;
15797 /* Compute the alignment for a static variable.
15798 TYPE is the data type, and ALIGN is the alignment that
15799 the object would ordinarily have. The value of this function is used
15800 instead of that alignment to align the object. */
15803 ix86_data_alignment (tree type, int align)
15805 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
15807 if (AGGREGATE_TYPE_P (type)
15808 && TYPE_SIZE (type)
15809 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15810 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
15811 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
15812 && align < max_align)
15813 align = max_align;
15815 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15816 to 16byte boundary. */
15817 if (TARGET_64BIT)
15819 if (AGGREGATE_TYPE_P (type)
15820 && TYPE_SIZE (type)
15821 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15822 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
15823 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
15824 return 128;
15827 if (TREE_CODE (type) == ARRAY_TYPE)
15829 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
15830 return 64;
15831 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
15832 return 128;
15834 else if (TREE_CODE (type) == COMPLEX_TYPE)
15837 if (TYPE_MODE (type) == DCmode && align < 64)
15838 return 64;
15839 if (TYPE_MODE (type) == XCmode && align < 128)
15840 return 128;
15842 else if ((TREE_CODE (type) == RECORD_TYPE
15843 || TREE_CODE (type) == UNION_TYPE
15844 || TREE_CODE (type) == QUAL_UNION_TYPE)
15845 && TYPE_FIELDS (type))
15847 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
15848 return 64;
15849 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
15850 return 128;
15852 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
15853 || TREE_CODE (type) == INTEGER_TYPE)
15855 if (TYPE_MODE (type) == DFmode && align < 64)
15856 return 64;
15857 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
15858 return 128;
15861 return align;
15864 /* Compute the alignment for a local variable.
15865 TYPE is the data type, and ALIGN is the alignment that
15866 the object would ordinarily have. The value of this macro is used
15867 instead of that alignment to align the object. */
15870 ix86_local_alignment (tree type, int align)
15872 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15873 to 16byte boundary. */
15874 if (TARGET_64BIT)
15876 if (AGGREGATE_TYPE_P (type)
15877 && TYPE_SIZE (type)
15878 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15879 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
15880 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
15881 return 128;
15883 if (TREE_CODE (type) == ARRAY_TYPE)
15885 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
15886 return 64;
15887 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
15888 return 128;
15890 else if (TREE_CODE (type) == COMPLEX_TYPE)
15892 if (TYPE_MODE (type) == DCmode && align < 64)
15893 return 64;
15894 if (TYPE_MODE (type) == XCmode && align < 128)
15895 return 128;
15897 else if ((TREE_CODE (type) == RECORD_TYPE
15898 || TREE_CODE (type) == UNION_TYPE
15899 || TREE_CODE (type) == QUAL_UNION_TYPE)
15900 && TYPE_FIELDS (type))
15902 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
15903 return 64;
15904 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
15905 return 128;
15907 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
15908 || TREE_CODE (type) == INTEGER_TYPE)
15911 if (TYPE_MODE (type) == DFmode && align < 64)
15912 return 64;
15913 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
15914 return 128;
15916 return align;
15919 /* Emit RTL insns to initialize the variable parts of a trampoline.
15920 FNADDR is an RTX for the address of the function's pure code.
15921 CXT is an RTX for the static chain value for the function. */
15922 void
15923 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
15925 if (!TARGET_64BIT)
15927 /* Compute offset from the end of the jmp to the target function. */
15928 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
15929 plus_constant (tramp, 10),
15930 NULL_RTX, 1, OPTAB_DIRECT);
15931 emit_move_insn (gen_rtx_MEM (QImode, tramp),
15932 gen_int_mode (0xb9, QImode));
15933 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
15934 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
15935 gen_int_mode (0xe9, QImode));
15936 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
15938 else
15940 int offset = 0;
15941 /* Try to load address using shorter movl instead of movabs.
15942 We may want to support movq for kernel mode, but kernel does not use
15943 trampolines at the moment. */
15944 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
15946 fnaddr = copy_to_mode_reg (DImode, fnaddr);
15947 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15948 gen_int_mode (0xbb41, HImode));
15949 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
15950 gen_lowpart (SImode, fnaddr));
15951 offset += 6;
15953 else
15955 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15956 gen_int_mode (0xbb49, HImode));
15957 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
15958 fnaddr);
15959 offset += 10;
15961 /* Load static chain using movabs to r10. */
15962 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15963 gen_int_mode (0xba49, HImode));
15964 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
15965 cxt);
15966 offset += 10;
15967 /* Jump to the r11 */
15968 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15969 gen_int_mode (0xff49, HImode));
15970 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
15971 gen_int_mode (0xe3, QImode));
15972 offset += 3;
15973 gcc_assert (offset <= TRAMPOLINE_SIZE);
15976 #ifdef ENABLE_EXECUTE_STACK
15977 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
15978 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
15979 #endif
15982 /* Codes for all the SSE/MMX builtins. */
15983 enum ix86_builtins
15985 IX86_BUILTIN_ADDPS,
15986 IX86_BUILTIN_ADDSS,
15987 IX86_BUILTIN_DIVPS,
15988 IX86_BUILTIN_DIVSS,
15989 IX86_BUILTIN_MULPS,
15990 IX86_BUILTIN_MULSS,
15991 IX86_BUILTIN_SUBPS,
15992 IX86_BUILTIN_SUBSS,
15994 IX86_BUILTIN_CMPEQPS,
15995 IX86_BUILTIN_CMPLTPS,
15996 IX86_BUILTIN_CMPLEPS,
15997 IX86_BUILTIN_CMPGTPS,
15998 IX86_BUILTIN_CMPGEPS,
15999 IX86_BUILTIN_CMPNEQPS,
16000 IX86_BUILTIN_CMPNLTPS,
16001 IX86_BUILTIN_CMPNLEPS,
16002 IX86_BUILTIN_CMPNGTPS,
16003 IX86_BUILTIN_CMPNGEPS,
16004 IX86_BUILTIN_CMPORDPS,
16005 IX86_BUILTIN_CMPUNORDPS,
16006 IX86_BUILTIN_CMPEQSS,
16007 IX86_BUILTIN_CMPLTSS,
16008 IX86_BUILTIN_CMPLESS,
16009 IX86_BUILTIN_CMPNEQSS,
16010 IX86_BUILTIN_CMPNLTSS,
16011 IX86_BUILTIN_CMPNLESS,
16012 IX86_BUILTIN_CMPNGTSS,
16013 IX86_BUILTIN_CMPNGESS,
16014 IX86_BUILTIN_CMPORDSS,
16015 IX86_BUILTIN_CMPUNORDSS,
16017 IX86_BUILTIN_COMIEQSS,
16018 IX86_BUILTIN_COMILTSS,
16019 IX86_BUILTIN_COMILESS,
16020 IX86_BUILTIN_COMIGTSS,
16021 IX86_BUILTIN_COMIGESS,
16022 IX86_BUILTIN_COMINEQSS,
16023 IX86_BUILTIN_UCOMIEQSS,
16024 IX86_BUILTIN_UCOMILTSS,
16025 IX86_BUILTIN_UCOMILESS,
16026 IX86_BUILTIN_UCOMIGTSS,
16027 IX86_BUILTIN_UCOMIGESS,
16028 IX86_BUILTIN_UCOMINEQSS,
16030 IX86_BUILTIN_CVTPI2PS,
16031 IX86_BUILTIN_CVTPS2PI,
16032 IX86_BUILTIN_CVTSI2SS,
16033 IX86_BUILTIN_CVTSI642SS,
16034 IX86_BUILTIN_CVTSS2SI,
16035 IX86_BUILTIN_CVTSS2SI64,
16036 IX86_BUILTIN_CVTTPS2PI,
16037 IX86_BUILTIN_CVTTSS2SI,
16038 IX86_BUILTIN_CVTTSS2SI64,
16040 IX86_BUILTIN_MAXPS,
16041 IX86_BUILTIN_MAXSS,
16042 IX86_BUILTIN_MINPS,
16043 IX86_BUILTIN_MINSS,
16045 IX86_BUILTIN_LOADUPS,
16046 IX86_BUILTIN_STOREUPS,
16047 IX86_BUILTIN_MOVSS,
16049 IX86_BUILTIN_MOVHLPS,
16050 IX86_BUILTIN_MOVLHPS,
16051 IX86_BUILTIN_LOADHPS,
16052 IX86_BUILTIN_LOADLPS,
16053 IX86_BUILTIN_STOREHPS,
16054 IX86_BUILTIN_STORELPS,
16056 IX86_BUILTIN_MASKMOVQ,
16057 IX86_BUILTIN_MOVMSKPS,
16058 IX86_BUILTIN_PMOVMSKB,
16060 IX86_BUILTIN_MOVNTPS,
16061 IX86_BUILTIN_MOVNTQ,
16063 IX86_BUILTIN_LOADDQU,
16064 IX86_BUILTIN_STOREDQU,
16066 IX86_BUILTIN_PACKSSWB,
16067 IX86_BUILTIN_PACKSSDW,
16068 IX86_BUILTIN_PACKUSWB,
16070 IX86_BUILTIN_PADDB,
16071 IX86_BUILTIN_PADDW,
16072 IX86_BUILTIN_PADDD,
16073 IX86_BUILTIN_PADDQ,
16074 IX86_BUILTIN_PADDSB,
16075 IX86_BUILTIN_PADDSW,
16076 IX86_BUILTIN_PADDUSB,
16077 IX86_BUILTIN_PADDUSW,
16078 IX86_BUILTIN_PSUBB,
16079 IX86_BUILTIN_PSUBW,
16080 IX86_BUILTIN_PSUBD,
16081 IX86_BUILTIN_PSUBQ,
16082 IX86_BUILTIN_PSUBSB,
16083 IX86_BUILTIN_PSUBSW,
16084 IX86_BUILTIN_PSUBUSB,
16085 IX86_BUILTIN_PSUBUSW,
16087 IX86_BUILTIN_PAND,
16088 IX86_BUILTIN_PANDN,
16089 IX86_BUILTIN_POR,
16090 IX86_BUILTIN_PXOR,
16092 IX86_BUILTIN_PAVGB,
16093 IX86_BUILTIN_PAVGW,
16095 IX86_BUILTIN_PCMPEQB,
16096 IX86_BUILTIN_PCMPEQW,
16097 IX86_BUILTIN_PCMPEQD,
16098 IX86_BUILTIN_PCMPGTB,
16099 IX86_BUILTIN_PCMPGTW,
16100 IX86_BUILTIN_PCMPGTD,
16102 IX86_BUILTIN_PMADDWD,
16104 IX86_BUILTIN_PMAXSW,
16105 IX86_BUILTIN_PMAXUB,
16106 IX86_BUILTIN_PMINSW,
16107 IX86_BUILTIN_PMINUB,
16109 IX86_BUILTIN_PMULHUW,
16110 IX86_BUILTIN_PMULHW,
16111 IX86_BUILTIN_PMULLW,
16113 IX86_BUILTIN_PSADBW,
16114 IX86_BUILTIN_PSHUFW,
16116 IX86_BUILTIN_PSLLW,
16117 IX86_BUILTIN_PSLLD,
16118 IX86_BUILTIN_PSLLQ,
16119 IX86_BUILTIN_PSRAW,
16120 IX86_BUILTIN_PSRAD,
16121 IX86_BUILTIN_PSRLW,
16122 IX86_BUILTIN_PSRLD,
16123 IX86_BUILTIN_PSRLQ,
16124 IX86_BUILTIN_PSLLWI,
16125 IX86_BUILTIN_PSLLDI,
16126 IX86_BUILTIN_PSLLQI,
16127 IX86_BUILTIN_PSRAWI,
16128 IX86_BUILTIN_PSRADI,
16129 IX86_BUILTIN_PSRLWI,
16130 IX86_BUILTIN_PSRLDI,
16131 IX86_BUILTIN_PSRLQI,
16133 IX86_BUILTIN_PUNPCKHBW,
16134 IX86_BUILTIN_PUNPCKHWD,
16135 IX86_BUILTIN_PUNPCKHDQ,
16136 IX86_BUILTIN_PUNPCKLBW,
16137 IX86_BUILTIN_PUNPCKLWD,
16138 IX86_BUILTIN_PUNPCKLDQ,
16140 IX86_BUILTIN_SHUFPS,
16142 IX86_BUILTIN_RCPPS,
16143 IX86_BUILTIN_RCPSS,
16144 IX86_BUILTIN_RSQRTPS,
16145 IX86_BUILTIN_RSQRTSS,
16146 IX86_BUILTIN_SQRTPS,
16147 IX86_BUILTIN_SQRTSS,
16149 IX86_BUILTIN_UNPCKHPS,
16150 IX86_BUILTIN_UNPCKLPS,
16152 IX86_BUILTIN_ANDPS,
16153 IX86_BUILTIN_ANDNPS,
16154 IX86_BUILTIN_ORPS,
16155 IX86_BUILTIN_XORPS,
16157 IX86_BUILTIN_EMMS,
16158 IX86_BUILTIN_LDMXCSR,
16159 IX86_BUILTIN_STMXCSR,
16160 IX86_BUILTIN_SFENCE,
16162 /* 3DNow! Original */
16163 IX86_BUILTIN_FEMMS,
16164 IX86_BUILTIN_PAVGUSB,
16165 IX86_BUILTIN_PF2ID,
16166 IX86_BUILTIN_PFACC,
16167 IX86_BUILTIN_PFADD,
16168 IX86_BUILTIN_PFCMPEQ,
16169 IX86_BUILTIN_PFCMPGE,
16170 IX86_BUILTIN_PFCMPGT,
16171 IX86_BUILTIN_PFMAX,
16172 IX86_BUILTIN_PFMIN,
16173 IX86_BUILTIN_PFMUL,
16174 IX86_BUILTIN_PFRCP,
16175 IX86_BUILTIN_PFRCPIT1,
16176 IX86_BUILTIN_PFRCPIT2,
16177 IX86_BUILTIN_PFRSQIT1,
16178 IX86_BUILTIN_PFRSQRT,
16179 IX86_BUILTIN_PFSUB,
16180 IX86_BUILTIN_PFSUBR,
16181 IX86_BUILTIN_PI2FD,
16182 IX86_BUILTIN_PMULHRW,
16184 /* 3DNow! Athlon Extensions */
16185 IX86_BUILTIN_PF2IW,
16186 IX86_BUILTIN_PFNACC,
16187 IX86_BUILTIN_PFPNACC,
16188 IX86_BUILTIN_PI2FW,
16189 IX86_BUILTIN_PSWAPDSI,
16190 IX86_BUILTIN_PSWAPDSF,
16192 /* SSE2 */
16193 IX86_BUILTIN_ADDPD,
16194 IX86_BUILTIN_ADDSD,
16195 IX86_BUILTIN_DIVPD,
16196 IX86_BUILTIN_DIVSD,
16197 IX86_BUILTIN_MULPD,
16198 IX86_BUILTIN_MULSD,
16199 IX86_BUILTIN_SUBPD,
16200 IX86_BUILTIN_SUBSD,
16202 IX86_BUILTIN_CMPEQPD,
16203 IX86_BUILTIN_CMPLTPD,
16204 IX86_BUILTIN_CMPLEPD,
16205 IX86_BUILTIN_CMPGTPD,
16206 IX86_BUILTIN_CMPGEPD,
16207 IX86_BUILTIN_CMPNEQPD,
16208 IX86_BUILTIN_CMPNLTPD,
16209 IX86_BUILTIN_CMPNLEPD,
16210 IX86_BUILTIN_CMPNGTPD,
16211 IX86_BUILTIN_CMPNGEPD,
16212 IX86_BUILTIN_CMPORDPD,
16213 IX86_BUILTIN_CMPUNORDPD,
16214 IX86_BUILTIN_CMPEQSD,
16215 IX86_BUILTIN_CMPLTSD,
16216 IX86_BUILTIN_CMPLESD,
16217 IX86_BUILTIN_CMPNEQSD,
16218 IX86_BUILTIN_CMPNLTSD,
16219 IX86_BUILTIN_CMPNLESD,
16220 IX86_BUILTIN_CMPORDSD,
16221 IX86_BUILTIN_CMPUNORDSD,
16223 IX86_BUILTIN_COMIEQSD,
16224 IX86_BUILTIN_COMILTSD,
16225 IX86_BUILTIN_COMILESD,
16226 IX86_BUILTIN_COMIGTSD,
16227 IX86_BUILTIN_COMIGESD,
16228 IX86_BUILTIN_COMINEQSD,
16229 IX86_BUILTIN_UCOMIEQSD,
16230 IX86_BUILTIN_UCOMILTSD,
16231 IX86_BUILTIN_UCOMILESD,
16232 IX86_BUILTIN_UCOMIGTSD,
16233 IX86_BUILTIN_UCOMIGESD,
16234 IX86_BUILTIN_UCOMINEQSD,
16236 IX86_BUILTIN_MAXPD,
16237 IX86_BUILTIN_MAXSD,
16238 IX86_BUILTIN_MINPD,
16239 IX86_BUILTIN_MINSD,
16241 IX86_BUILTIN_ANDPD,
16242 IX86_BUILTIN_ANDNPD,
16243 IX86_BUILTIN_ORPD,
16244 IX86_BUILTIN_XORPD,
16246 IX86_BUILTIN_SQRTPD,
16247 IX86_BUILTIN_SQRTSD,
16249 IX86_BUILTIN_UNPCKHPD,
16250 IX86_BUILTIN_UNPCKLPD,
16252 IX86_BUILTIN_SHUFPD,
16254 IX86_BUILTIN_LOADUPD,
16255 IX86_BUILTIN_STOREUPD,
16256 IX86_BUILTIN_MOVSD,
16258 IX86_BUILTIN_LOADHPD,
16259 IX86_BUILTIN_LOADLPD,
16261 IX86_BUILTIN_CVTDQ2PD,
16262 IX86_BUILTIN_CVTDQ2PS,
16264 IX86_BUILTIN_CVTPD2DQ,
16265 IX86_BUILTIN_CVTPD2PI,
16266 IX86_BUILTIN_CVTPD2PS,
16267 IX86_BUILTIN_CVTTPD2DQ,
16268 IX86_BUILTIN_CVTTPD2PI,
16270 IX86_BUILTIN_CVTPI2PD,
16271 IX86_BUILTIN_CVTSI2SD,
16272 IX86_BUILTIN_CVTSI642SD,
16274 IX86_BUILTIN_CVTSD2SI,
16275 IX86_BUILTIN_CVTSD2SI64,
16276 IX86_BUILTIN_CVTSD2SS,
16277 IX86_BUILTIN_CVTSS2SD,
16278 IX86_BUILTIN_CVTTSD2SI,
16279 IX86_BUILTIN_CVTTSD2SI64,
16281 IX86_BUILTIN_CVTPS2DQ,
16282 IX86_BUILTIN_CVTPS2PD,
16283 IX86_BUILTIN_CVTTPS2DQ,
16285 IX86_BUILTIN_MOVNTI,
16286 IX86_BUILTIN_MOVNTPD,
16287 IX86_BUILTIN_MOVNTDQ,
16289 /* SSE2 MMX */
16290 IX86_BUILTIN_MASKMOVDQU,
16291 IX86_BUILTIN_MOVMSKPD,
16292 IX86_BUILTIN_PMOVMSKB128,
16294 IX86_BUILTIN_PACKSSWB128,
16295 IX86_BUILTIN_PACKSSDW128,
16296 IX86_BUILTIN_PACKUSWB128,
16298 IX86_BUILTIN_PADDB128,
16299 IX86_BUILTIN_PADDW128,
16300 IX86_BUILTIN_PADDD128,
16301 IX86_BUILTIN_PADDQ128,
16302 IX86_BUILTIN_PADDSB128,
16303 IX86_BUILTIN_PADDSW128,
16304 IX86_BUILTIN_PADDUSB128,
16305 IX86_BUILTIN_PADDUSW128,
16306 IX86_BUILTIN_PSUBB128,
16307 IX86_BUILTIN_PSUBW128,
16308 IX86_BUILTIN_PSUBD128,
16309 IX86_BUILTIN_PSUBQ128,
16310 IX86_BUILTIN_PSUBSB128,
16311 IX86_BUILTIN_PSUBSW128,
16312 IX86_BUILTIN_PSUBUSB128,
16313 IX86_BUILTIN_PSUBUSW128,
16315 IX86_BUILTIN_PAND128,
16316 IX86_BUILTIN_PANDN128,
16317 IX86_BUILTIN_POR128,
16318 IX86_BUILTIN_PXOR128,
16320 IX86_BUILTIN_PAVGB128,
16321 IX86_BUILTIN_PAVGW128,
16323 IX86_BUILTIN_PCMPEQB128,
16324 IX86_BUILTIN_PCMPEQW128,
16325 IX86_BUILTIN_PCMPEQD128,
16326 IX86_BUILTIN_PCMPGTB128,
16327 IX86_BUILTIN_PCMPGTW128,
16328 IX86_BUILTIN_PCMPGTD128,
16330 IX86_BUILTIN_PMADDWD128,
16332 IX86_BUILTIN_PMAXSW128,
16333 IX86_BUILTIN_PMAXUB128,
16334 IX86_BUILTIN_PMINSW128,
16335 IX86_BUILTIN_PMINUB128,
16337 IX86_BUILTIN_PMULUDQ,
16338 IX86_BUILTIN_PMULUDQ128,
16339 IX86_BUILTIN_PMULHUW128,
16340 IX86_BUILTIN_PMULHW128,
16341 IX86_BUILTIN_PMULLW128,
16343 IX86_BUILTIN_PSADBW128,
16344 IX86_BUILTIN_PSHUFHW,
16345 IX86_BUILTIN_PSHUFLW,
16346 IX86_BUILTIN_PSHUFD,
16348 IX86_BUILTIN_PSLLDQI128,
16349 IX86_BUILTIN_PSLLWI128,
16350 IX86_BUILTIN_PSLLDI128,
16351 IX86_BUILTIN_PSLLQI128,
16352 IX86_BUILTIN_PSRAWI128,
16353 IX86_BUILTIN_PSRADI128,
16354 IX86_BUILTIN_PSRLDQI128,
16355 IX86_BUILTIN_PSRLWI128,
16356 IX86_BUILTIN_PSRLDI128,
16357 IX86_BUILTIN_PSRLQI128,
16359 IX86_BUILTIN_PSLLDQ128,
16360 IX86_BUILTIN_PSLLW128,
16361 IX86_BUILTIN_PSLLD128,
16362 IX86_BUILTIN_PSLLQ128,
16363 IX86_BUILTIN_PSRAW128,
16364 IX86_BUILTIN_PSRAD128,
16365 IX86_BUILTIN_PSRLW128,
16366 IX86_BUILTIN_PSRLD128,
16367 IX86_BUILTIN_PSRLQ128,
16369 IX86_BUILTIN_PUNPCKHBW128,
16370 IX86_BUILTIN_PUNPCKHWD128,
16371 IX86_BUILTIN_PUNPCKHDQ128,
16372 IX86_BUILTIN_PUNPCKHQDQ128,
16373 IX86_BUILTIN_PUNPCKLBW128,
16374 IX86_BUILTIN_PUNPCKLWD128,
16375 IX86_BUILTIN_PUNPCKLDQ128,
16376 IX86_BUILTIN_PUNPCKLQDQ128,
16378 IX86_BUILTIN_CLFLUSH,
16379 IX86_BUILTIN_MFENCE,
16380 IX86_BUILTIN_LFENCE,
16382 /* Prescott New Instructions. */
16383 IX86_BUILTIN_ADDSUBPS,
16384 IX86_BUILTIN_HADDPS,
16385 IX86_BUILTIN_HSUBPS,
16386 IX86_BUILTIN_MOVSHDUP,
16387 IX86_BUILTIN_MOVSLDUP,
16388 IX86_BUILTIN_ADDSUBPD,
16389 IX86_BUILTIN_HADDPD,
16390 IX86_BUILTIN_HSUBPD,
16391 IX86_BUILTIN_LDDQU,
16393 IX86_BUILTIN_MONITOR,
16394 IX86_BUILTIN_MWAIT,
16396 /* SSSE3. */
16397 IX86_BUILTIN_PHADDW,
16398 IX86_BUILTIN_PHADDD,
16399 IX86_BUILTIN_PHADDSW,
16400 IX86_BUILTIN_PHSUBW,
16401 IX86_BUILTIN_PHSUBD,
16402 IX86_BUILTIN_PHSUBSW,
16403 IX86_BUILTIN_PMADDUBSW,
16404 IX86_BUILTIN_PMULHRSW,
16405 IX86_BUILTIN_PSHUFB,
16406 IX86_BUILTIN_PSIGNB,
16407 IX86_BUILTIN_PSIGNW,
16408 IX86_BUILTIN_PSIGND,
16409 IX86_BUILTIN_PALIGNR,
16410 IX86_BUILTIN_PABSB,
16411 IX86_BUILTIN_PABSW,
16412 IX86_BUILTIN_PABSD,
16414 IX86_BUILTIN_PHADDW128,
16415 IX86_BUILTIN_PHADDD128,
16416 IX86_BUILTIN_PHADDSW128,
16417 IX86_BUILTIN_PHSUBW128,
16418 IX86_BUILTIN_PHSUBD128,
16419 IX86_BUILTIN_PHSUBSW128,
16420 IX86_BUILTIN_PMADDUBSW128,
16421 IX86_BUILTIN_PMULHRSW128,
16422 IX86_BUILTIN_PSHUFB128,
16423 IX86_BUILTIN_PSIGNB128,
16424 IX86_BUILTIN_PSIGNW128,
16425 IX86_BUILTIN_PSIGND128,
16426 IX86_BUILTIN_PALIGNR128,
16427 IX86_BUILTIN_PABSB128,
16428 IX86_BUILTIN_PABSW128,
16429 IX86_BUILTIN_PABSD128,
16431 /* AMDFAM10 - SSE4A New Instructions. */
16432 IX86_BUILTIN_MOVNTSD,
16433 IX86_BUILTIN_MOVNTSS,
16434 IX86_BUILTIN_EXTRQI,
16435 IX86_BUILTIN_EXTRQ,
16436 IX86_BUILTIN_INSERTQI,
16437 IX86_BUILTIN_INSERTQ,
16439 /* SSE4.1. */
16440 IX86_BUILTIN_BLENDPD,
16441 IX86_BUILTIN_BLENDPS,
16442 IX86_BUILTIN_BLENDVPD,
16443 IX86_BUILTIN_BLENDVPS,
16444 IX86_BUILTIN_PBLENDVB128,
16445 IX86_BUILTIN_PBLENDW128,
16447 IX86_BUILTIN_DPPD,
16448 IX86_BUILTIN_DPPS,
16450 IX86_BUILTIN_INSERTPS128,
16452 IX86_BUILTIN_MOVNTDQA,
16453 IX86_BUILTIN_MPSADBW128,
16454 IX86_BUILTIN_PACKUSDW128,
16455 IX86_BUILTIN_PCMPEQQ,
16456 IX86_BUILTIN_PHMINPOSUW128,
16458 IX86_BUILTIN_PMAXSB128,
16459 IX86_BUILTIN_PMAXSD128,
16460 IX86_BUILTIN_PMAXUD128,
16461 IX86_BUILTIN_PMAXUW128,
16463 IX86_BUILTIN_PMINSB128,
16464 IX86_BUILTIN_PMINSD128,
16465 IX86_BUILTIN_PMINUD128,
16466 IX86_BUILTIN_PMINUW128,
16468 IX86_BUILTIN_PMOVSXBW128,
16469 IX86_BUILTIN_PMOVSXBD128,
16470 IX86_BUILTIN_PMOVSXBQ128,
16471 IX86_BUILTIN_PMOVSXWD128,
16472 IX86_BUILTIN_PMOVSXWQ128,
16473 IX86_BUILTIN_PMOVSXDQ128,
16475 IX86_BUILTIN_PMOVZXBW128,
16476 IX86_BUILTIN_PMOVZXBD128,
16477 IX86_BUILTIN_PMOVZXBQ128,
16478 IX86_BUILTIN_PMOVZXWD128,
16479 IX86_BUILTIN_PMOVZXWQ128,
16480 IX86_BUILTIN_PMOVZXDQ128,
16482 IX86_BUILTIN_PMULDQ128,
16483 IX86_BUILTIN_PMULLD128,
16485 IX86_BUILTIN_ROUNDPD,
16486 IX86_BUILTIN_ROUNDPS,
16487 IX86_BUILTIN_ROUNDSD,
16488 IX86_BUILTIN_ROUNDSS,
16490 IX86_BUILTIN_PTESTZ,
16491 IX86_BUILTIN_PTESTC,
16492 IX86_BUILTIN_PTESTNZC,
16494 IX86_BUILTIN_VEC_INIT_V2SI,
16495 IX86_BUILTIN_VEC_INIT_V4HI,
16496 IX86_BUILTIN_VEC_INIT_V8QI,
16497 IX86_BUILTIN_VEC_EXT_V2DF,
16498 IX86_BUILTIN_VEC_EXT_V2DI,
16499 IX86_BUILTIN_VEC_EXT_V4SF,
16500 IX86_BUILTIN_VEC_EXT_V4SI,
16501 IX86_BUILTIN_VEC_EXT_V8HI,
16502 IX86_BUILTIN_VEC_EXT_V2SI,
16503 IX86_BUILTIN_VEC_EXT_V4HI,
16504 IX86_BUILTIN_VEC_EXT_V16QI,
16505 IX86_BUILTIN_VEC_SET_V2DI,
16506 IX86_BUILTIN_VEC_SET_V4SF,
16507 IX86_BUILTIN_VEC_SET_V4SI,
16508 IX86_BUILTIN_VEC_SET_V8HI,
16509 IX86_BUILTIN_VEC_SET_V4HI,
16510 IX86_BUILTIN_VEC_SET_V16QI,
16512 IX86_BUILTIN_MAX
16515 /* Table for the ix86 builtin decls. */
16516 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
16518 /* Add a ix86 target builtin function with CODE, NAME and TYPE. Do so,
16519 * if the target_flags include one of MASK. Stores the function decl
16520 * in the ix86_builtins array.
16521 * Returns the function decl or NULL_TREE, if the builtin was not added. */
16523 static inline tree
16524 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
16526 tree decl = NULL_TREE;
16528 if (mask & target_flags
16529 && (!(mask & MASK_64BIT) || TARGET_64BIT))
16531 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
16532 NULL, NULL_TREE);
16533 ix86_builtins[(int) code] = decl;
16536 return decl;
16539 /* Like def_builtin, but also marks the function decl "const". */
16541 static inline tree
16542 def_builtin_const (int mask, const char *name, tree type,
16543 enum ix86_builtins code)
16545 tree decl = def_builtin (mask, name, type, code);
16546 if (decl)
16547 TREE_READONLY (decl) = 1;
16548 return decl;
16551 /* Bits for builtin_description.flag. */
16553 /* Set when we don't support the comparison natively, and should
16554 swap_comparison in order to support it. */
16555 #define BUILTIN_DESC_SWAP_OPERANDS 1
16557 struct builtin_description
16559 const unsigned int mask;
16560 const enum insn_code icode;
16561 const char *const name;
16562 const enum ix86_builtins code;
16563 const enum rtx_code comparison;
16564 const unsigned int flag;
16567 static const struct builtin_description bdesc_comi[] =
16569 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
16570 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
16571 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
16572 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
16573 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
16574 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
16575 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
16576 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
16577 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
16578 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
16579 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
16580 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
16581 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
16582 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
16583 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
16584 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
16585 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
16586 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
16587 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
16588 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
16589 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
16590 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
16591 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
16592 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
16595 static const struct builtin_description bdesc_ptest[] =
16597 /* SSE4.1 */
16598 { MASK_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, 0 },
16599 { MASK_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, 0 },
16600 { MASK_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, 0 },
16603 /* SSE builtins with 3 arguments and the last argument must be a 8 bit
16604 constant or xmm0. */
16605 static const struct builtin_description bdesc_sse_3arg[] =
16607 /* SSE4.1 */
16608 { MASK_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, 0, 0 },
16609 { MASK_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, 0, 0 },
16610 { MASK_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, 0, 0 },
16611 { MASK_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, 0, 0 },
16612 { MASK_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, 0, 0 },
16613 { MASK_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, 0, 0 },
16614 { MASK_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, 0, 0 },
16615 { MASK_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, 0, 0 },
16616 { MASK_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, 0, 0 },
16617 { MASK_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, 0, 0 },
16618 { MASK_SSE4_1, CODE_FOR_sse4_1_roundsd, 0, IX86_BUILTIN_ROUNDSD, 0, 0 },
16619 { MASK_SSE4_1, CODE_FOR_sse4_1_roundss, 0, IX86_BUILTIN_ROUNDSS, 0, 0 },
16622 static const struct builtin_description bdesc_2arg[] =
16624 /* SSE */
16625 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
16626 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
16627 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
16628 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
16629 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
16630 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
16631 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
16632 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
16634 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
16635 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
16636 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
16637 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
16638 BUILTIN_DESC_SWAP_OPERANDS },
16639 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
16640 BUILTIN_DESC_SWAP_OPERANDS },
16641 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
16642 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
16643 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
16644 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
16645 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
16646 BUILTIN_DESC_SWAP_OPERANDS },
16647 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
16648 BUILTIN_DESC_SWAP_OPERANDS },
16649 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
16650 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
16651 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
16652 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
16653 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
16654 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
16655 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
16656 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
16657 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
16658 BUILTIN_DESC_SWAP_OPERANDS },
16659 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
16660 BUILTIN_DESC_SWAP_OPERANDS },
16661 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 },
16663 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
16664 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
16665 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
16666 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
16668 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
16669 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
16670 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
16671 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
16673 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
16674 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
16675 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
16676 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
16677 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
16679 /* MMX */
16680 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
16681 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
16682 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
16683 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
16684 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
16685 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
16686 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
16687 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
16689 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
16690 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
16691 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
16692 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
16693 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
16694 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
16695 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
16696 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
16698 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
16699 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
16700 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
16702 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
16703 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
16704 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
16705 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
16707 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
16708 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
16710 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
16711 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
16712 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
16713 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
16714 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
16715 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
16717 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
16718 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
16719 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
16720 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
16722 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
16723 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
16724 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
16725 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
16726 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
16727 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
16729 /* Special. */
16730 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
16731 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
16732 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
16734 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
16735 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
16736 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
16738 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
16739 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
16740 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
16741 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
16742 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
16743 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
16745 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
16746 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
16747 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
16748 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
16749 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
16750 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
16752 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
16753 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
16754 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
16755 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
16757 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
16758 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
16760 /* SSE2 */
16761 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
16762 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
16763 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
16764 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
16765 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
16766 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
16767 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
16768 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
16770 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
16771 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
16772 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
16773 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
16774 BUILTIN_DESC_SWAP_OPERANDS },
16775 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
16776 BUILTIN_DESC_SWAP_OPERANDS },
16777 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
16778 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
16779 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
16780 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
16781 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
16782 BUILTIN_DESC_SWAP_OPERANDS },
16783 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
16784 BUILTIN_DESC_SWAP_OPERANDS },
16785 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
16786 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
16787 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
16788 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
16789 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
16790 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
16791 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
16792 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
16793 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
16795 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
16796 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
16797 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
16798 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
16800 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
16801 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
16802 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
16803 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
16805 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
16806 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
16807 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
16809 /* SSE2 MMX */
16810 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
16811 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
16812 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
16813 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
16814 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
16815 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
16816 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
16817 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
16819 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
16820 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
16821 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
16822 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
16823 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
16824 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
16825 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
16826 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
16828 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
16829 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
16831 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
16832 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
16833 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
16834 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
16836 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
16837 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
16839 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
16840 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
16841 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
16842 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
16843 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
16844 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
16846 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
16847 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
16848 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
16849 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
16851 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
16852 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
16853 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
16854 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
16855 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
16856 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
16857 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
16858 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
16860 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
16861 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
16862 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
16864 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
16865 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
16867 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
16868 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
16870 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
16871 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
16872 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
16874 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
16875 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
16876 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
16878 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
16879 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
16881 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
16883 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
16884 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
16885 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
16886 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
16888 /* SSE3 MMX */
16889 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
16890 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
16891 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
16892 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
16893 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
16894 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
16896 /* SSSE3 */
16897 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
16898 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
16899 { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
16900 { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
16901 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
16902 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
16903 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
16904 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
16905 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
16906 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
16907 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
16908 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
16909 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
16910 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
16911 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
16912 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
16913 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
16914 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
16915 { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
16916 { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
16917 { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
16918 { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
16919 { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
16920 { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 },
16922 /* SSE4.1 */
16923 { MASK_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, 0, 0 },
16924 { MASK_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, 0, 0 },
16925 { MASK_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, 0, 0 },
16926 { MASK_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, 0, 0 },
16927 { MASK_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, 0, 0 },
16928 { MASK_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, 0, 0 },
16929 { MASK_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, 0, 0 },
16930 { MASK_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, 0, 0 },
16931 { MASK_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, 0, 0 },
16932 { MASK_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, 0, 0 },
16933 { MASK_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, 0, IX86_BUILTIN_PMULDQ128, 0, 0 },
16934 { MASK_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, 0, 0 },
16937 static const struct builtin_description bdesc_1arg[] =
16939 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
16940 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
16942 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
16943 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
16944 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
16946 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
16947 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
16948 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
16949 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
16950 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
16951 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
16953 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
16954 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
16956 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
16958 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
16959 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
16961 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
16962 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
16963 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
16964 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
16965 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
16967 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
16969 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
16970 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
16971 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
16972 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
16974 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
16975 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
16976 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
16978 /* SSE3 */
16979 { MASK_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, 0, 0 },
16980 { MASK_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, 0, 0 },
16982 /* SSSE3 */
16983 { MASK_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
16984 { MASK_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
16985 { MASK_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
16986 { MASK_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
16987 { MASK_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
16988 { MASK_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
16990 /* SSE4.1 */
16991 { MASK_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVSXBW128, 0, 0 },
16992 { MASK_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVSXBD128, 0, 0 },
16993 { MASK_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVSXBQ128, 0, 0 },
16994 { MASK_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVSXWD128, 0, 0 },
16995 { MASK_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVSXWQ128, 0, 0 },
16996 { MASK_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, 0, IX86_BUILTIN_PMOVSXDQ128, 0, 0 },
16997 { MASK_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVZXBW128, 0, 0 },
16998 { MASK_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVZXBD128, 0, 0 },
16999 { MASK_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVZXBQ128, 0, 0 },
17000 { MASK_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVZXWD128, 0, 0 },
17001 { MASK_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVZXWQ128, 0, 0 },
17002 { MASK_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, 0, IX86_BUILTIN_PMOVZXDQ128, 0, 0 },
17003 { MASK_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, 0, 0 },
17005 /* Fake 1 arg builtins with a constant smaller than 8 bits as the
17006 2nd arg. */
17007 { MASK_SSE4_1, CODE_FOR_sse4_1_roundpd, 0, IX86_BUILTIN_ROUNDPD, 0, 0 },
17008 { MASK_SSE4_1, CODE_FOR_sse4_1_roundps, 0, IX86_BUILTIN_ROUNDPS, 0, 0 },
17011 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
17012 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
17013 builtins. */
17014 static void
17015 ix86_init_mmx_sse_builtins (void)
17017 const struct builtin_description * d;
17018 size_t i;
17020 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
17021 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
17022 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
17023 tree V2DI_type_node
17024 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
17025 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
17026 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
17027 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
17028 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
17029 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
17030 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
17032 tree pchar_type_node = build_pointer_type (char_type_node);
17033 tree pcchar_type_node = build_pointer_type (
17034 build_type_variant (char_type_node, 1, 0));
17035 tree pfloat_type_node = build_pointer_type (float_type_node);
17036 tree pcfloat_type_node = build_pointer_type (
17037 build_type_variant (float_type_node, 1, 0));
17038 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
17039 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
17040 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
17042 /* Comparisons. */
17043 tree int_ftype_v4sf_v4sf
17044 = build_function_type_list (integer_type_node,
17045 V4SF_type_node, V4SF_type_node, NULL_TREE);
17046 tree v4si_ftype_v4sf_v4sf
17047 = build_function_type_list (V4SI_type_node,
17048 V4SF_type_node, V4SF_type_node, NULL_TREE);
17049 /* MMX/SSE/integer conversions. */
17050 tree int_ftype_v4sf
17051 = build_function_type_list (integer_type_node,
17052 V4SF_type_node, NULL_TREE);
17053 tree int64_ftype_v4sf
17054 = build_function_type_list (long_long_integer_type_node,
17055 V4SF_type_node, NULL_TREE);
17056 tree int_ftype_v8qi
17057 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
17058 tree v4sf_ftype_v4sf_int
17059 = build_function_type_list (V4SF_type_node,
17060 V4SF_type_node, integer_type_node, NULL_TREE);
17061 tree v4sf_ftype_v4sf_int64
17062 = build_function_type_list (V4SF_type_node,
17063 V4SF_type_node, long_long_integer_type_node,
17064 NULL_TREE);
17065 tree v4sf_ftype_v4sf_v2si
17066 = build_function_type_list (V4SF_type_node,
17067 V4SF_type_node, V2SI_type_node, NULL_TREE);
17069 /* Miscellaneous. */
17070 tree v8qi_ftype_v4hi_v4hi
17071 = build_function_type_list (V8QI_type_node,
17072 V4HI_type_node, V4HI_type_node, NULL_TREE);
17073 tree v4hi_ftype_v2si_v2si
17074 = build_function_type_list (V4HI_type_node,
17075 V2SI_type_node, V2SI_type_node, NULL_TREE);
17076 tree v4sf_ftype_v4sf_v4sf_int
17077 = build_function_type_list (V4SF_type_node,
17078 V4SF_type_node, V4SF_type_node,
17079 integer_type_node, NULL_TREE);
17080 tree v2si_ftype_v4hi_v4hi
17081 = build_function_type_list (V2SI_type_node,
17082 V4HI_type_node, V4HI_type_node, NULL_TREE);
17083 tree v4hi_ftype_v4hi_int
17084 = build_function_type_list (V4HI_type_node,
17085 V4HI_type_node, integer_type_node, NULL_TREE);
17086 tree v4hi_ftype_v4hi_di
17087 = build_function_type_list (V4HI_type_node,
17088 V4HI_type_node, long_long_unsigned_type_node,
17089 NULL_TREE);
17090 tree v2si_ftype_v2si_di
17091 = build_function_type_list (V2SI_type_node,
17092 V2SI_type_node, long_long_unsigned_type_node,
17093 NULL_TREE);
17094 tree void_ftype_void
17095 = build_function_type (void_type_node, void_list_node);
17096 tree void_ftype_unsigned
17097 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
17098 tree void_ftype_unsigned_unsigned
17099 = build_function_type_list (void_type_node, unsigned_type_node,
17100 unsigned_type_node, NULL_TREE);
17101 tree void_ftype_pcvoid_unsigned_unsigned
17102 = build_function_type_list (void_type_node, const_ptr_type_node,
17103 unsigned_type_node, unsigned_type_node,
17104 NULL_TREE);
17105 tree unsigned_ftype_void
17106 = build_function_type (unsigned_type_node, void_list_node);
17107 tree v2si_ftype_v4sf
17108 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
17109 /* Loads/stores. */
17110 tree void_ftype_v8qi_v8qi_pchar
17111 = build_function_type_list (void_type_node,
17112 V8QI_type_node, V8QI_type_node,
17113 pchar_type_node, NULL_TREE);
17114 tree v4sf_ftype_pcfloat
17115 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
17116 /* @@@ the type is bogus */
17117 tree v4sf_ftype_v4sf_pv2si
17118 = build_function_type_list (V4SF_type_node,
17119 V4SF_type_node, pv2si_type_node, NULL_TREE);
17120 tree void_ftype_pv2si_v4sf
17121 = build_function_type_list (void_type_node,
17122 pv2si_type_node, V4SF_type_node, NULL_TREE);
17123 tree void_ftype_pfloat_v4sf
17124 = build_function_type_list (void_type_node,
17125 pfloat_type_node, V4SF_type_node, NULL_TREE);
17126 tree void_ftype_pdi_di
17127 = build_function_type_list (void_type_node,
17128 pdi_type_node, long_long_unsigned_type_node,
17129 NULL_TREE);
17130 tree void_ftype_pv2di_v2di
17131 = build_function_type_list (void_type_node,
17132 pv2di_type_node, V2DI_type_node, NULL_TREE);
17133 /* Normal vector unops. */
17134 tree v4sf_ftype_v4sf
17135 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
17136 tree v16qi_ftype_v16qi
17137 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
17138 tree v8hi_ftype_v8hi
17139 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
17140 tree v4si_ftype_v4si
17141 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
17142 tree v8qi_ftype_v8qi
17143 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
17144 tree v4hi_ftype_v4hi
17145 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
17147 /* Normal vector binops. */
17148 tree v4sf_ftype_v4sf_v4sf
17149 = build_function_type_list (V4SF_type_node,
17150 V4SF_type_node, V4SF_type_node, NULL_TREE);
17151 tree v8qi_ftype_v8qi_v8qi
17152 = build_function_type_list (V8QI_type_node,
17153 V8QI_type_node, V8QI_type_node, NULL_TREE);
17154 tree v4hi_ftype_v4hi_v4hi
17155 = build_function_type_list (V4HI_type_node,
17156 V4HI_type_node, V4HI_type_node, NULL_TREE);
17157 tree v2si_ftype_v2si_v2si
17158 = build_function_type_list (V2SI_type_node,
17159 V2SI_type_node, V2SI_type_node, NULL_TREE);
17160 tree di_ftype_di_di
17161 = build_function_type_list (long_long_unsigned_type_node,
17162 long_long_unsigned_type_node,
17163 long_long_unsigned_type_node, NULL_TREE);
17165 tree di_ftype_di_di_int
17166 = build_function_type_list (long_long_unsigned_type_node,
17167 long_long_unsigned_type_node,
17168 long_long_unsigned_type_node,
17169 integer_type_node, NULL_TREE);
17171 tree v2si_ftype_v2sf
17172 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
17173 tree v2sf_ftype_v2si
17174 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
17175 tree v2si_ftype_v2si
17176 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
17177 tree v2sf_ftype_v2sf
17178 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
17179 tree v2sf_ftype_v2sf_v2sf
17180 = build_function_type_list (V2SF_type_node,
17181 V2SF_type_node, V2SF_type_node, NULL_TREE);
17182 tree v2si_ftype_v2sf_v2sf
17183 = build_function_type_list (V2SI_type_node,
17184 V2SF_type_node, V2SF_type_node, NULL_TREE);
17185 tree pint_type_node = build_pointer_type (integer_type_node);
17186 tree pdouble_type_node = build_pointer_type (double_type_node);
17187 tree pcdouble_type_node = build_pointer_type (
17188 build_type_variant (double_type_node, 1, 0));
17189 tree int_ftype_v2df_v2df
17190 = build_function_type_list (integer_type_node,
17191 V2DF_type_node, V2DF_type_node, NULL_TREE);
17193 tree void_ftype_pcvoid
17194 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
17195 tree v4sf_ftype_v4si
17196 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
17197 tree v4si_ftype_v4sf
17198 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
17199 tree v2df_ftype_v4si
17200 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
17201 tree v4si_ftype_v2df
17202 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
17203 tree v2si_ftype_v2df
17204 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
17205 tree v4sf_ftype_v2df
17206 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
17207 tree v2df_ftype_v2si
17208 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
17209 tree v2df_ftype_v4sf
17210 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
17211 tree int_ftype_v2df
17212 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
17213 tree int64_ftype_v2df
17214 = build_function_type_list (long_long_integer_type_node,
17215 V2DF_type_node, NULL_TREE);
17216 tree v2df_ftype_v2df_int
17217 = build_function_type_list (V2DF_type_node,
17218 V2DF_type_node, integer_type_node, NULL_TREE);
17219 tree v2df_ftype_v2df_int64
17220 = build_function_type_list (V2DF_type_node,
17221 V2DF_type_node, long_long_integer_type_node,
17222 NULL_TREE);
17223 tree v4sf_ftype_v4sf_v2df
17224 = build_function_type_list (V4SF_type_node,
17225 V4SF_type_node, V2DF_type_node, NULL_TREE);
17226 tree v2df_ftype_v2df_v4sf
17227 = build_function_type_list (V2DF_type_node,
17228 V2DF_type_node, V4SF_type_node, NULL_TREE);
17229 tree v2df_ftype_v2df_v2df_int
17230 = build_function_type_list (V2DF_type_node,
17231 V2DF_type_node, V2DF_type_node,
17232 integer_type_node,
17233 NULL_TREE);
17234 tree v2df_ftype_v2df_pcdouble
17235 = build_function_type_list (V2DF_type_node,
17236 V2DF_type_node, pcdouble_type_node, NULL_TREE);
17237 tree void_ftype_pdouble_v2df
17238 = build_function_type_list (void_type_node,
17239 pdouble_type_node, V2DF_type_node, NULL_TREE);
17240 tree void_ftype_pint_int
17241 = build_function_type_list (void_type_node,
17242 pint_type_node, integer_type_node, NULL_TREE);
17243 tree void_ftype_v16qi_v16qi_pchar
17244 = build_function_type_list (void_type_node,
17245 V16QI_type_node, V16QI_type_node,
17246 pchar_type_node, NULL_TREE);
17247 tree v2df_ftype_pcdouble
17248 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
17249 tree v2df_ftype_v2df_v2df
17250 = build_function_type_list (V2DF_type_node,
17251 V2DF_type_node, V2DF_type_node, NULL_TREE);
17252 tree v16qi_ftype_v16qi_v16qi
17253 = build_function_type_list (V16QI_type_node,
17254 V16QI_type_node, V16QI_type_node, NULL_TREE);
17255 tree v8hi_ftype_v8hi_v8hi
17256 = build_function_type_list (V8HI_type_node,
17257 V8HI_type_node, V8HI_type_node, NULL_TREE);
17258 tree v4si_ftype_v4si_v4si
17259 = build_function_type_list (V4SI_type_node,
17260 V4SI_type_node, V4SI_type_node, NULL_TREE);
17261 tree v2di_ftype_v2di_v2di
17262 = build_function_type_list (V2DI_type_node,
17263 V2DI_type_node, V2DI_type_node, NULL_TREE);
17264 tree v2di_ftype_v2df_v2df
17265 = build_function_type_list (V2DI_type_node,
17266 V2DF_type_node, V2DF_type_node, NULL_TREE);
17267 tree v2df_ftype_v2df
17268 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
17269 tree v2di_ftype_v2di_int
17270 = build_function_type_list (V2DI_type_node,
17271 V2DI_type_node, integer_type_node, NULL_TREE);
17272 tree v2di_ftype_v2di_v2di_int
17273 = build_function_type_list (V2DI_type_node, V2DI_type_node,
17274 V2DI_type_node, integer_type_node, NULL_TREE);
17275 tree v4si_ftype_v4si_int
17276 = build_function_type_list (V4SI_type_node,
17277 V4SI_type_node, integer_type_node, NULL_TREE);
17278 tree v8hi_ftype_v8hi_int
17279 = build_function_type_list (V8HI_type_node,
17280 V8HI_type_node, integer_type_node, NULL_TREE);
17281 tree v4si_ftype_v8hi_v8hi
17282 = build_function_type_list (V4SI_type_node,
17283 V8HI_type_node, V8HI_type_node, NULL_TREE);
17284 tree di_ftype_v8qi_v8qi
17285 = build_function_type_list (long_long_unsigned_type_node,
17286 V8QI_type_node, V8QI_type_node, NULL_TREE);
17287 tree di_ftype_v2si_v2si
17288 = build_function_type_list (long_long_unsigned_type_node,
17289 V2SI_type_node, V2SI_type_node, NULL_TREE);
17290 tree v2di_ftype_v16qi_v16qi
17291 = build_function_type_list (V2DI_type_node,
17292 V16QI_type_node, V16QI_type_node, NULL_TREE);
17293 tree v2di_ftype_v4si_v4si
17294 = build_function_type_list (V2DI_type_node,
17295 V4SI_type_node, V4SI_type_node, NULL_TREE);
17296 tree int_ftype_v16qi
17297 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
17298 tree v16qi_ftype_pcchar
17299 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
17300 tree void_ftype_pchar_v16qi
17301 = build_function_type_list (void_type_node,
17302 pchar_type_node, V16QI_type_node, NULL_TREE);
17304 tree v2di_ftype_v2di_unsigned_unsigned
17305 = build_function_type_list (V2DI_type_node, V2DI_type_node,
17306 unsigned_type_node, unsigned_type_node,
17307 NULL_TREE);
17308 tree v2di_ftype_v2di_v2di_unsigned_unsigned
17309 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
17310 unsigned_type_node, unsigned_type_node,
17311 NULL_TREE);
17312 tree v2di_ftype_v2di_v16qi
17313 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
17314 NULL_TREE);
17315 tree v2df_ftype_v2df_v2df_v2df
17316 = build_function_type_list (V2DF_type_node,
17317 V2DF_type_node, V2DF_type_node,
17318 V2DF_type_node, NULL_TREE);
17319 tree v4sf_ftype_v4sf_v4sf_v4sf
17320 = build_function_type_list (V4SF_type_node,
17321 V4SF_type_node, V4SF_type_node,
17322 V4SF_type_node, NULL_TREE);
17323 tree v8hi_ftype_v16qi
17324 = build_function_type_list (V8HI_type_node, V16QI_type_node,
17325 NULL_TREE);
17326 tree v4si_ftype_v16qi
17327 = build_function_type_list (V4SI_type_node, V16QI_type_node,
17328 NULL_TREE);
17329 tree v2di_ftype_v16qi
17330 = build_function_type_list (V2DI_type_node, V16QI_type_node,
17331 NULL_TREE);
17332 tree v4si_ftype_v8hi
17333 = build_function_type_list (V4SI_type_node, V8HI_type_node,
17334 NULL_TREE);
17335 tree v2di_ftype_v8hi
17336 = build_function_type_list (V2DI_type_node, V8HI_type_node,
17337 NULL_TREE);
17338 tree v2di_ftype_v4si
17339 = build_function_type_list (V2DI_type_node, V4SI_type_node,
17340 NULL_TREE);
17341 tree v2di_ftype_pv2di
17342 = build_function_type_list (V2DI_type_node, pv2di_type_node,
17343 NULL_TREE);
17344 tree v16qi_ftype_v16qi_v16qi_int
17345 = build_function_type_list (V16QI_type_node, V16QI_type_node,
17346 V16QI_type_node, integer_type_node,
17347 NULL_TREE);
17348 tree v16qi_ftype_v16qi_v16qi_v16qi
17349 = build_function_type_list (V16QI_type_node, V16QI_type_node,
17350 V16QI_type_node, V16QI_type_node,
17351 NULL_TREE);
17352 tree v8hi_ftype_v8hi_v8hi_int
17353 = build_function_type_list (V8HI_type_node, V8HI_type_node,
17354 V8HI_type_node, integer_type_node,
17355 NULL_TREE);
17356 tree v4si_ftype_v4si_v4si_int
17357 = build_function_type_list (V4SI_type_node, V4SI_type_node,
17358 V4SI_type_node, integer_type_node,
17359 NULL_TREE);
17360 tree int_ftype_v2di_v2di
17361 = build_function_type_list (integer_type_node,
17362 V2DI_type_node, V2DI_type_node,
17363 NULL_TREE);
17365 tree float80_type;
17366 tree float128_type;
17367 tree ftype;
17369 /* The __float80 type. */
17370 if (TYPE_MODE (long_double_type_node) == XFmode)
17371 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
17372 "__float80");
17373 else
17375 /* The __float80 type. */
17376 float80_type = make_node (REAL_TYPE);
17377 TYPE_PRECISION (float80_type) = 80;
17378 layout_type (float80_type);
17379 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
17382 if (TARGET_64BIT)
17384 float128_type = make_node (REAL_TYPE);
17385 TYPE_PRECISION (float128_type) = 128;
17386 layout_type (float128_type);
17387 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
17390 /* Add all SSE builtins that are more or less simple operations on
17391 three operands. */
17392 for (i = 0, d = bdesc_sse_3arg;
17393 i < ARRAY_SIZE (bdesc_sse_3arg);
17394 i++, d++)
17396 /* Use one of the operands; the target can have a different mode for
17397 mask-generating compares. */
17398 enum machine_mode mode;
17399 tree type;
17401 if (d->name == 0)
17402 continue;
17403 mode = insn_data[d->icode].operand[1].mode;
17405 switch (mode)
17407 case V16QImode:
17408 type = v16qi_ftype_v16qi_v16qi_int;
17409 break;
17410 case V8HImode:
17411 type = v8hi_ftype_v8hi_v8hi_int;
17412 break;
17413 case V4SImode:
17414 type = v4si_ftype_v4si_v4si_int;
17415 break;
17416 case V2DImode:
17417 type = v2di_ftype_v2di_v2di_int;
17418 break;
17419 case V2DFmode:
17420 type = v2df_ftype_v2df_v2df_int;
17421 break;
17422 case V4SFmode:
17423 type = v4sf_ftype_v4sf_v4sf_int;
17424 break;
17425 default:
17426 gcc_unreachable ();
17429 /* Override for variable blends. */
17430 switch (d->icode)
17432 case CODE_FOR_sse4_1_blendvpd:
17433 type = v2df_ftype_v2df_v2df_v2df;
17434 break;
17435 case CODE_FOR_sse4_1_blendvps:
17436 type = v4sf_ftype_v4sf_v4sf_v4sf;
17437 break;
17438 case CODE_FOR_sse4_1_pblendvb:
17439 type = v16qi_ftype_v16qi_v16qi_v16qi;
17440 break;
17441 default:
17442 break;
17445 def_builtin (d->mask, d->name, type, d->code);
17448 /* Add all builtins that are more or less simple operations on two
17449 operands. */
17450 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17452 /* Use one of the operands; the target can have a different mode for
17453 mask-generating compares. */
17454 enum machine_mode mode;
17455 tree type;
17457 if (d->name == 0)
17458 continue;
17459 mode = insn_data[d->icode].operand[1].mode;
17461 switch (mode)
17463 case V16QImode:
17464 type = v16qi_ftype_v16qi_v16qi;
17465 break;
17466 case V8HImode:
17467 type = v8hi_ftype_v8hi_v8hi;
17468 break;
17469 case V4SImode:
17470 type = v4si_ftype_v4si_v4si;
17471 break;
17472 case V2DImode:
17473 type = v2di_ftype_v2di_v2di;
17474 break;
17475 case V2DFmode:
17476 type = v2df_ftype_v2df_v2df;
17477 break;
17478 case V4SFmode:
17479 type = v4sf_ftype_v4sf_v4sf;
17480 break;
17481 case V8QImode:
17482 type = v8qi_ftype_v8qi_v8qi;
17483 break;
17484 case V4HImode:
17485 type = v4hi_ftype_v4hi_v4hi;
17486 break;
17487 case V2SImode:
17488 type = v2si_ftype_v2si_v2si;
17489 break;
17490 case DImode:
17491 type = di_ftype_di_di;
17492 break;
17494 default:
17495 gcc_unreachable ();
17498 /* Override for comparisons. */
17499 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
17500 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
17501 type = v4si_ftype_v4sf_v4sf;
17503 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
17504 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
17505 type = v2di_ftype_v2df_v2df;
17507 def_builtin (d->mask, d->name, type, d->code);
17510 /* Add all builtins that are more or less simple operations on 1 operand. */
17511 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17513 enum machine_mode mode;
17514 tree type;
17516 if (d->name == 0)
17517 continue;
17518 mode = insn_data[d->icode].operand[1].mode;
17520 switch (mode)
17522 case V16QImode:
17523 type = v16qi_ftype_v16qi;
17524 break;
17525 case V8HImode:
17526 type = v8hi_ftype_v8hi;
17527 break;
17528 case V4SImode:
17529 type = v4si_ftype_v4si;
17530 break;
17531 case V2DFmode:
17532 type = v2df_ftype_v2df;
17533 break;
17534 case V4SFmode:
17535 type = v4sf_ftype_v4sf;
17536 break;
17537 case V8QImode:
17538 type = v8qi_ftype_v8qi;
17539 break;
17540 case V4HImode:
17541 type = v4hi_ftype_v4hi;
17542 break;
17543 case V2SImode:
17544 type = v2si_ftype_v2si;
17545 break;
17547 default:
17548 abort ();
17551 def_builtin (d->mask, d->name, type, d->code);
17554 /* Add the remaining MMX insns with somewhat more complicated types. */
17555 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
17556 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
17557 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
17558 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
17560 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
17561 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
17562 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
17564 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
17565 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
17567 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
17568 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
17570 /* comi/ucomi insns. */
17571 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
17572 if (d->mask == MASK_SSE2)
17573 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
17574 else
17575 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
17577 /* ptest insns. */
17578 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
17579 def_builtin (d->mask, d->name, int_ftype_v2di_v2di, d->code);
17581 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
17582 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
17583 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
17585 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
17586 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
17587 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
17588 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
17589 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
17590 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
17591 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
17592 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
17593 def_builtin_const (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
17594 def_builtin_const (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
17595 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
17597 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
17599 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
17600 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
17602 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
17603 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
17604 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
17605 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
17607 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
17608 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
17609 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
17610 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
17612 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
17614 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
17616 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
17617 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
17618 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
17619 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
17620 def_builtin_const (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
17621 def_builtin_const (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
17623 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
17625 /* Original 3DNow! */
17626 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
17627 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
17628 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
17629 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
17630 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
17631 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
17632 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
17633 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
17634 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
17635 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
17636 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
17637 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
17638 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
17639 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
17640 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
17641 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
17642 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
17643 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
17644 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
17645 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
17647 /* 3DNow! extension as used in the Athlon CPU. */
17648 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
17649 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
17650 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
17651 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
17652 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
17653 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
17655 /* SSE2 */
17656 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
17658 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
17659 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
17661 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
17662 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
17664 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
17665 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
17666 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
17667 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
17668 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
17670 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
17671 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
17672 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
17673 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
17675 def_builtin_const (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
17676 def_builtin_const (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
17678 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
17680 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
17681 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
17683 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
17684 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
17685 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
17686 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
17687 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
17689 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
17691 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
17692 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
17693 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
17694 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
17696 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
17697 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
17698 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
17700 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
17701 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
17702 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
17703 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
17705 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
17706 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
17707 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
17709 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
17710 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
17712 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
17713 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
17715 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
17716 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
17717 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
17718 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
17719 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
17720 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
17721 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
17723 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
17724 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
17725 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
17726 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
17727 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
17728 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
17729 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
17731 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
17732 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
17733 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
17734 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
17736 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
17738 /* Prescott New Instructions. */
17739 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
17740 void_ftype_pcvoid_unsigned_unsigned,
17741 IX86_BUILTIN_MONITOR);
17742 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
17743 void_ftype_unsigned_unsigned,
17744 IX86_BUILTIN_MWAIT);
17745 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
17746 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
17748 /* SSSE3. */
17749 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128",
17750 v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
17751 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
17752 IX86_BUILTIN_PALIGNR);
17754 /* SSE4.1. */
17755 def_builtin (MASK_SSE4_1, "__builtin_ia32_movntdqa",
17756 v2di_ftype_pv2di, IX86_BUILTIN_MOVNTDQA);
17757 def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovsxbw128",
17758 v8hi_ftype_v16qi, IX86_BUILTIN_PMOVSXBW128);
17759 def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovsxbd128",
17760 v4si_ftype_v16qi, IX86_BUILTIN_PMOVSXBD128);
17761 def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovsxbq128",
17762 v2di_ftype_v16qi, IX86_BUILTIN_PMOVSXBQ128);
17763 def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovsxwd128",
17764 v4si_ftype_v8hi, IX86_BUILTIN_PMOVSXWD128);
17765 def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovsxwq128",
17766 v2di_ftype_v8hi, IX86_BUILTIN_PMOVSXWQ128);
17767 def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovsxdq128",
17768 v2di_ftype_v4si, IX86_BUILTIN_PMOVSXDQ128);
17769 def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovzxbw128",
17770 v8hi_ftype_v16qi, IX86_BUILTIN_PMOVZXBW128);
17771 def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovzxbd128",
17772 v4si_ftype_v16qi, IX86_BUILTIN_PMOVZXBD128);
17773 def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovzxbq128",
17774 v2di_ftype_v16qi, IX86_BUILTIN_PMOVZXBQ128);
17775 def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovzxwd128",
17776 v4si_ftype_v8hi, IX86_BUILTIN_PMOVZXWD128);
17777 def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovzxwq128",
17778 v2di_ftype_v8hi, IX86_BUILTIN_PMOVZXWQ128);
17779 def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovzxdq128",
17780 v2di_ftype_v4si, IX86_BUILTIN_PMOVZXDQ128);
17781 def_builtin (MASK_SSE4_1, "__builtin_ia32_pmuldq128",
17782 v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULDQ128);
17783 def_builtin_const (MASK_SSE4_1, "__builtin_ia32_roundpd",
17784 v2df_ftype_v2df_int, IX86_BUILTIN_ROUNDPD);
17785 def_builtin_const (MASK_SSE4_1, "__builtin_ia32_roundps",
17786 v4sf_ftype_v4sf_int, IX86_BUILTIN_ROUNDPS);
17787 def_builtin_const (MASK_SSE4_1, "__builtin_ia32_roundsd",
17788 v2df_ftype_v2df_v2df_int, IX86_BUILTIN_ROUNDSD);
17789 def_builtin_const (MASK_SSE4_1, "__builtin_ia32_roundss",
17790 v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_ROUNDSS);
17792 /* AMDFAM10 SSE4A New built-ins */
17793 def_builtin (MASK_SSE4A, "__builtin_ia32_movntsd",
17794 void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
17795 def_builtin (MASK_SSE4A, "__builtin_ia32_movntss",
17796 void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
17797 def_builtin (MASK_SSE4A, "__builtin_ia32_extrqi",
17798 v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
17799 def_builtin (MASK_SSE4A, "__builtin_ia32_extrq",
17800 v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
17801 def_builtin (MASK_SSE4A, "__builtin_ia32_insertqi",
17802 v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
17803 def_builtin (MASK_SSE4A, "__builtin_ia32_insertq",
17804 v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
17806 /* Access to the vec_init patterns. */
17807 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
17808 integer_type_node, NULL_TREE);
17809 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
17810 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
17812 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
17813 short_integer_type_node,
17814 short_integer_type_node,
17815 short_integer_type_node, NULL_TREE);
17816 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
17817 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
17819 ftype = build_function_type_list (V8QI_type_node, char_type_node,
17820 char_type_node, char_type_node,
17821 char_type_node, char_type_node,
17822 char_type_node, char_type_node,
17823 char_type_node, NULL_TREE);
17824 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
17825 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
17827 /* Access to the vec_extract patterns. */
17828 ftype = build_function_type_list (double_type_node, V2DF_type_node,
17829 integer_type_node, NULL_TREE);
17830 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
17831 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
17833 ftype = build_function_type_list (long_long_integer_type_node,
17834 V2DI_type_node, integer_type_node,
17835 NULL_TREE);
17836 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
17837 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
17839 ftype = build_function_type_list (float_type_node, V4SF_type_node,
17840 integer_type_node, NULL_TREE);
17841 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
17842 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
17844 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
17845 integer_type_node, NULL_TREE);
17846 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
17847 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
17849 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
17850 integer_type_node, NULL_TREE);
17851 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
17852 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
17854 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
17855 integer_type_node, NULL_TREE);
17856 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
17857 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
17859 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
17860 integer_type_node, NULL_TREE);
17861 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
17862 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
17864 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
17865 integer_type_node, NULL_TREE);
17866 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v16qi",
17867 ftype, IX86_BUILTIN_VEC_EXT_V16QI);
17869 /* Access to the vec_set patterns. */
17870 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
17871 intDI_type_node,
17872 integer_type_node, NULL_TREE);
17873 def_builtin (MASK_SSE4_1 | MASK_64BIT, "__builtin_ia32_vec_set_v2di",
17874 ftype, IX86_BUILTIN_VEC_SET_V2DI);
17876 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
17877 float_type_node,
17878 integer_type_node, NULL_TREE);
17879 def_builtin (MASK_SSE4_1, "__builtin_ia32_vec_set_v4sf",
17880 ftype, IX86_BUILTIN_VEC_SET_V4SF);
17882 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
17883 intSI_type_node,
17884 integer_type_node, NULL_TREE);
17885 def_builtin (MASK_SSE4_1, "__builtin_ia32_vec_set_v4si",
17886 ftype, IX86_BUILTIN_VEC_SET_V4SI);
17888 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
17889 intHI_type_node,
17890 integer_type_node, NULL_TREE);
17891 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
17892 ftype, IX86_BUILTIN_VEC_SET_V8HI);
17894 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
17895 intHI_type_node,
17896 integer_type_node, NULL_TREE);
17897 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
17898 ftype, IX86_BUILTIN_VEC_SET_V4HI);
17900 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
17901 intQI_type_node,
17902 integer_type_node, NULL_TREE);
17903 def_builtin (MASK_SSE4_1, "__builtin_ia32_vec_set_v16qi",
17904 ftype, IX86_BUILTIN_VEC_SET_V16QI);
17907 static void
17908 ix86_init_builtins (void)
17910 if (TARGET_MMX)
17911 ix86_init_mmx_sse_builtins ();
17914 /* Errors in the source file can cause expand_expr to return const0_rtx
17915 where we expect a vector. To avoid crashing, use one of the vector
17916 clear instructions. */
17917 static rtx
17918 safe_vector_operand (rtx x, enum machine_mode mode)
17920 if (x == const0_rtx)
17921 x = CONST0_RTX (mode);
17922 return x;
17925 /* Subroutine of ix86_expand_builtin to take care of SSE insns with
17926 4 operands. The third argument must be a constant smaller than 8
17927 bits or xmm0. */
17929 static rtx
17930 ix86_expand_sse_4_operands_builtin (enum insn_code icode, tree exp,
17931 rtx target)
17933 rtx pat;
17934 tree arg0 = CALL_EXPR_ARG (exp, 0);
17935 tree arg1 = CALL_EXPR_ARG (exp, 1);
17936 tree arg2 = CALL_EXPR_ARG (exp, 2);
17937 rtx op0 = expand_normal (arg0);
17938 rtx op1 = expand_normal (arg1);
17939 rtx op2 = expand_normal (arg2);
17940 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17941 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17942 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
17943 enum machine_mode mode2;
17944 rtx xmm0;
17946 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17947 op0 = copy_to_mode_reg (mode0, op0);
17948 if ((optimize && !register_operand (op1, mode1))
17949 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
17950 op1 = copy_to_mode_reg (mode1, op1);
17952 switch (icode)
17954 case CODE_FOR_sse4_1_blendvpd:
17955 case CODE_FOR_sse4_1_blendvps:
17956 case CODE_FOR_sse4_1_pblendvb:
17957 /* The third argument of variable blends must be xmm0. */
17958 xmm0 = gen_rtx_REG (tmode, FIRST_SSE_REG);
17959 emit_move_insn (xmm0, op2);
17960 op2 = xmm0;
17961 break;
17962 default:
17963 mode2 = insn_data[icode].operand[2].mode;
17964 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
17966 switch (icode)
17968 case CODE_FOR_sse4_1_roundsd:
17969 case CODE_FOR_sse4_1_roundss:
17970 error ("the third argument must be a 4-bit immediate");
17971 break;
17972 default:
17973 error ("the third argument must be a 8-bit immediate");
17974 break;
17976 return const0_rtx;
17978 break;
17981 if (optimize
17982 || target == 0
17983 || GET_MODE (target) != tmode
17984 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17985 target = gen_reg_rtx (tmode);
17986 pat = GEN_FCN (icode) (target, op0, op1, op2);
17987 if (! pat)
17988 return 0;
17989 emit_insn (pat);
17990 return target;
17993 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
17995 static rtx
17996 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
17998 rtx pat, xops[3];
17999 tree arg0 = CALL_EXPR_ARG (exp, 0);
18000 tree arg1 = CALL_EXPR_ARG (exp, 1);
18001 rtx op0 = expand_normal (arg0);
18002 rtx op1 = expand_normal (arg1);
18003 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18004 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18005 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18007 if (VECTOR_MODE_P (mode0))
18008 op0 = safe_vector_operand (op0, mode0);
18009 if (VECTOR_MODE_P (mode1))
18010 op1 = safe_vector_operand (op1, mode1);
18012 if (optimize || !target
18013 || GET_MODE (target) != tmode
18014 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18015 target = gen_reg_rtx (tmode);
18017 if (GET_MODE (op1) == SImode && mode1 == TImode)
18019 rtx x = gen_reg_rtx (V4SImode);
18020 emit_insn (gen_sse2_loadd (x, op1));
18021 op1 = gen_lowpart (TImode, x);
18024 /* The insn must want input operands in the same modes as the
18025 result. */
18026 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
18027 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
18029 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
18030 op0 = copy_to_mode_reg (mode0, op0);
18031 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
18032 op1 = copy_to_mode_reg (mode1, op1);
18034 /* ??? Using ix86_fixup_binary_operands is problematic when
18035 we've got mismatched modes. Fake it. */
18037 xops[0] = target;
18038 xops[1] = op0;
18039 xops[2] = op1;
18041 if (tmode == mode0 && tmode == mode1)
18043 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
18044 op0 = xops[1];
18045 op1 = xops[2];
18047 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
18049 op0 = force_reg (mode0, op0);
18050 op1 = force_reg (mode1, op1);
18051 target = gen_reg_rtx (tmode);
18054 pat = GEN_FCN (icode) (target, op0, op1);
18055 if (! pat)
18056 return 0;
18057 emit_insn (pat);
18058 return target;
18061 /* Subroutine of ix86_expand_builtin to take care of stores. */
18063 static rtx
18064 ix86_expand_store_builtin (enum insn_code icode, tree exp)
18066 rtx pat;
18067 tree arg0 = CALL_EXPR_ARG (exp, 0);
18068 tree arg1 = CALL_EXPR_ARG (exp, 1);
18069 rtx op0 = expand_normal (arg0);
18070 rtx op1 = expand_normal (arg1);
18071 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
18072 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
18074 if (VECTOR_MODE_P (mode1))
18075 op1 = safe_vector_operand (op1, mode1);
18077 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
18078 op1 = copy_to_mode_reg (mode1, op1);
18080 pat = GEN_FCN (icode) (op0, op1);
18081 if (pat)
18082 emit_insn (pat);
18083 return 0;
18086 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
18088 static rtx
18089 ix86_expand_unop_builtin (enum insn_code icode, tree exp,
18090 rtx target, int do_load)
18092 rtx pat;
18093 tree arg0 = CALL_EXPR_ARG (exp, 0);
18094 rtx op0 = expand_normal (arg0);
18095 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18096 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18098 if (optimize || !target
18099 || GET_MODE (target) != tmode
18100 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18101 target = gen_reg_rtx (tmode);
18102 if (do_load)
18103 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
18104 else
18106 if (VECTOR_MODE_P (mode0))
18107 op0 = safe_vector_operand (op0, mode0);
18109 if ((optimize && !register_operand (op0, mode0))
18110 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18111 op0 = copy_to_mode_reg (mode0, op0);
18114 switch (icode)
18116 case CODE_FOR_sse4_1_roundpd:
18117 case CODE_FOR_sse4_1_roundps:
18119 tree arg1 = CALL_EXPR_ARG (exp, 1);
18120 rtx op1 = expand_normal (arg1);
18121 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18123 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18125 error ("the second argument must be a 4-bit immediate");
18126 return const0_rtx;
18128 pat = GEN_FCN (icode) (target, op0, op1);
18130 break;
18131 default:
18132 pat = GEN_FCN (icode) (target, op0);
18133 break;
18136 if (! pat)
18137 return 0;
18138 emit_insn (pat);
18139 return target;
18142 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
18143 sqrtss, rsqrtss, rcpss. */
18145 static rtx
18146 ix86_expand_unop1_builtin (enum insn_code icode, tree exp, rtx target)
18148 rtx pat;
18149 tree arg0 = CALL_EXPR_ARG (exp, 0);
18150 rtx op1, op0 = expand_normal (arg0);
18151 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18152 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18154 if (optimize || !target
18155 || GET_MODE (target) != tmode
18156 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18157 target = gen_reg_rtx (tmode);
18159 if (VECTOR_MODE_P (mode0))
18160 op0 = safe_vector_operand (op0, mode0);
18162 if ((optimize && !register_operand (op0, mode0))
18163 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18164 op0 = copy_to_mode_reg (mode0, op0);
18166 op1 = op0;
18167 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
18168 op1 = copy_to_mode_reg (mode0, op1);
18170 pat = GEN_FCN (icode) (target, op0, op1);
18171 if (! pat)
18172 return 0;
18173 emit_insn (pat);
18174 return target;
18177 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
18179 static rtx
18180 ix86_expand_sse_compare (const struct builtin_description *d, tree exp,
18181 rtx target)
18183 rtx pat;
18184 tree arg0 = CALL_EXPR_ARG (exp, 0);
18185 tree arg1 = CALL_EXPR_ARG (exp, 1);
18186 rtx op0 = expand_normal (arg0);
18187 rtx op1 = expand_normal (arg1);
18188 rtx op2;
18189 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
18190 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
18191 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
18192 enum rtx_code comparison = d->comparison;
18194 if (VECTOR_MODE_P (mode0))
18195 op0 = safe_vector_operand (op0, mode0);
18196 if (VECTOR_MODE_P (mode1))
18197 op1 = safe_vector_operand (op1, mode1);
18199 /* Swap operands if we have a comparison that isn't available in
18200 hardware. */
18201 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
18203 rtx tmp = gen_reg_rtx (mode1);
18204 emit_move_insn (tmp, op1);
18205 op1 = op0;
18206 op0 = tmp;
18209 if (optimize || !target
18210 || GET_MODE (target) != tmode
18211 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
18212 target = gen_reg_rtx (tmode);
18214 if ((optimize && !register_operand (op0, mode0))
18215 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
18216 op0 = copy_to_mode_reg (mode0, op0);
18217 if ((optimize && !register_operand (op1, mode1))
18218 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
18219 op1 = copy_to_mode_reg (mode1, op1);
18221 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
18222 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
18223 if (! pat)
18224 return 0;
18225 emit_insn (pat);
18226 return target;
18229 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
18231 static rtx
18232 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
18233 rtx target)
18235 rtx pat;
18236 tree arg0 = CALL_EXPR_ARG (exp, 0);
18237 tree arg1 = CALL_EXPR_ARG (exp, 1);
18238 rtx op0 = expand_normal (arg0);
18239 rtx op1 = expand_normal (arg1);
18240 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
18241 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
18242 enum rtx_code comparison = d->comparison;
18244 if (VECTOR_MODE_P (mode0))
18245 op0 = safe_vector_operand (op0, mode0);
18246 if (VECTOR_MODE_P (mode1))
18247 op1 = safe_vector_operand (op1, mode1);
18249 /* Swap operands if we have a comparison that isn't available in
18250 hardware. */
18251 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
18253 rtx tmp = op1;
18254 op1 = op0;
18255 op0 = tmp;
18258 target = gen_reg_rtx (SImode);
18259 emit_move_insn (target, const0_rtx);
18260 target = gen_rtx_SUBREG (QImode, target, 0);
18262 if ((optimize && !register_operand (op0, mode0))
18263 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
18264 op0 = copy_to_mode_reg (mode0, op0);
18265 if ((optimize && !register_operand (op1, mode1))
18266 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
18267 op1 = copy_to_mode_reg (mode1, op1);
18269 pat = GEN_FCN (d->icode) (op0, op1);
18270 if (! pat)
18271 return 0;
18272 emit_insn (pat);
18273 emit_insn (gen_rtx_SET (VOIDmode,
18274 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
18275 gen_rtx_fmt_ee (comparison, QImode,
18276 SET_DEST (pat),
18277 const0_rtx)));
18279 return SUBREG_REG (target);
18282 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
18284 static rtx
18285 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
18286 rtx target)
18288 rtx pat;
18289 tree arg0 = CALL_EXPR_ARG (exp, 0);
18290 tree arg1 = CALL_EXPR_ARG (exp, 1);
18291 rtx op0 = expand_normal (arg0);
18292 rtx op1 = expand_normal (arg1);
18293 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
18294 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
18295 enum rtx_code comparison = d->comparison;
18297 if (VECTOR_MODE_P (mode0))
18298 op0 = safe_vector_operand (op0, mode0);
18299 if (VECTOR_MODE_P (mode1))
18300 op1 = safe_vector_operand (op1, mode1);
18302 target = gen_reg_rtx (SImode);
18303 emit_move_insn (target, const0_rtx);
18304 target = gen_rtx_SUBREG (QImode, target, 0);
18306 if ((optimize && !register_operand (op0, mode0))
18307 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
18308 op0 = copy_to_mode_reg (mode0, op0);
18309 if ((optimize && !register_operand (op1, mode1))
18310 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
18311 op1 = copy_to_mode_reg (mode1, op1);
18313 pat = GEN_FCN (d->icode) (op0, op1);
18314 if (! pat)
18315 return 0;
18316 emit_insn (pat);
18317 emit_insn (gen_rtx_SET (VOIDmode,
18318 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
18319 gen_rtx_fmt_ee (comparison, QImode,
18320 SET_DEST (pat),
18321 const0_rtx)));
18323 return SUBREG_REG (target);
18326 /* Return the integer constant in ARG. Constrain it to be in the range
18327 of the subparts of VEC_TYPE; issue an error if not. */
18329 static int
18330 get_element_number (tree vec_type, tree arg)
18332 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
18334 if (!host_integerp (arg, 1)
18335 || (elt = tree_low_cst (arg, 1), elt > max))
18337 error ("selector must be an integer constant in the range 0..%wi", max);
18338 return 0;
18341 return elt;
18344 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
18345 ix86_expand_vector_init. We DO have language-level syntax for this, in
18346 the form of (type){ init-list }. Except that since we can't place emms
18347 instructions from inside the compiler, we can't allow the use of MMX
18348 registers unless the user explicitly asks for it. So we do *not* define
18349 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
18350 we have builtins invoked by mmintrin.h that gives us license to emit
18351 these sorts of instructions. */
18353 static rtx
18354 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
18356 enum machine_mode tmode = TYPE_MODE (type);
18357 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
18358 int i, n_elt = GET_MODE_NUNITS (tmode);
18359 rtvec v = rtvec_alloc (n_elt);
18361 gcc_assert (VECTOR_MODE_P (tmode));
18362 gcc_assert (call_expr_nargs (exp) == n_elt);
18364 for (i = 0; i < n_elt; ++i)
18366 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
18367 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
18370 if (!target || !register_operand (target, tmode))
18371 target = gen_reg_rtx (tmode);
18373 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
18374 return target;
18377 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
18378 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
18379 had a language-level syntax for referencing vector elements. */
18381 static rtx
18382 ix86_expand_vec_ext_builtin (tree exp, rtx target)
18384 enum machine_mode tmode, mode0;
18385 tree arg0, arg1;
18386 int elt;
18387 rtx op0;
18389 arg0 = CALL_EXPR_ARG (exp, 0);
18390 arg1 = CALL_EXPR_ARG (exp, 1);
18392 op0 = expand_normal (arg0);
18393 elt = get_element_number (TREE_TYPE (arg0), arg1);
18395 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
18396 mode0 = TYPE_MODE (TREE_TYPE (arg0));
18397 gcc_assert (VECTOR_MODE_P (mode0));
18399 op0 = force_reg (mode0, op0);
18401 if (optimize || !target || !register_operand (target, tmode))
18402 target = gen_reg_rtx (tmode);
18404 ix86_expand_vector_extract (true, target, op0, elt);
18406 return target;
18409 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
18410 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
18411 a language-level syntax for referencing vector elements. */
18413 static rtx
18414 ix86_expand_vec_set_builtin (tree exp)
18416 enum machine_mode tmode, mode1;
18417 tree arg0, arg1, arg2;
18418 int elt;
18419 rtx op0, op1, target;
18421 arg0 = CALL_EXPR_ARG (exp, 0);
18422 arg1 = CALL_EXPR_ARG (exp, 1);
18423 arg2 = CALL_EXPR_ARG (exp, 2);
18425 tmode = TYPE_MODE (TREE_TYPE (arg0));
18426 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
18427 gcc_assert (VECTOR_MODE_P (tmode));
18429 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
18430 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
18431 elt = get_element_number (TREE_TYPE (arg0), arg2);
18433 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
18434 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
18436 op0 = force_reg (tmode, op0);
18437 op1 = force_reg (mode1, op1);
18439 /* OP0 is the source of these builtin functions and shouldn't be
18440 modified. Create a copy, use it and return it as target. */
18441 target = gen_reg_rtx (tmode);
18442 emit_move_insn (target, op0);
18443 ix86_expand_vector_set (true, target, op1, elt);
18445 return target;
18448 /* Expand an expression EXP that calls a built-in function,
18449 with result going to TARGET if that's convenient
18450 (and in mode MODE if that's convenient).
18451 SUBTARGET may be used as the target for computing one of EXP's operands.
18452 IGNORE is nonzero if the value is to be ignored. */
18454 static rtx
18455 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
18456 enum machine_mode mode ATTRIBUTE_UNUSED,
18457 int ignore ATTRIBUTE_UNUSED)
18459 const struct builtin_description *d;
18460 size_t i;
18461 enum insn_code icode;
18462 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
18463 tree arg0, arg1, arg2, arg3;
18464 rtx op0, op1, op2, op3, pat;
18465 enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
18466 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
18468 switch (fcode)
18470 case IX86_BUILTIN_EMMS:
18471 emit_insn (gen_mmx_emms ());
18472 return 0;
18474 case IX86_BUILTIN_SFENCE:
18475 emit_insn (gen_sse_sfence ());
18476 return 0;
18478 case IX86_BUILTIN_MASKMOVQ:
18479 case IX86_BUILTIN_MASKMOVDQU:
18480 icode = (fcode == IX86_BUILTIN_MASKMOVQ
18481 ? CODE_FOR_mmx_maskmovq
18482 : CODE_FOR_sse2_maskmovdqu);
18483 /* Note the arg order is different from the operand order. */
18484 arg1 = CALL_EXPR_ARG (exp, 0);
18485 arg2 = CALL_EXPR_ARG (exp, 1);
18486 arg0 = CALL_EXPR_ARG (exp, 2);
18487 op0 = expand_normal (arg0);
18488 op1 = expand_normal (arg1);
18489 op2 = expand_normal (arg2);
18490 mode0 = insn_data[icode].operand[0].mode;
18491 mode1 = insn_data[icode].operand[1].mode;
18492 mode2 = insn_data[icode].operand[2].mode;
18494 op0 = force_reg (Pmode, op0);
18495 op0 = gen_rtx_MEM (mode1, op0);
18497 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
18498 op0 = copy_to_mode_reg (mode0, op0);
18499 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
18500 op1 = copy_to_mode_reg (mode1, op1);
18501 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
18502 op2 = copy_to_mode_reg (mode2, op2);
18503 pat = GEN_FCN (icode) (op0, op1, op2);
18504 if (! pat)
18505 return 0;
18506 emit_insn (pat);
18507 return 0;
18509 case IX86_BUILTIN_SQRTSS:
18510 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, exp, target);
18511 case IX86_BUILTIN_RSQRTSS:
18512 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, exp, target);
18513 case IX86_BUILTIN_RCPSS:
18514 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, exp, target);
18516 case IX86_BUILTIN_LOADUPS:
18517 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, exp, target, 1);
18519 case IX86_BUILTIN_STOREUPS:
18520 return ix86_expand_store_builtin (CODE_FOR_sse_movups, exp);
18522 case IX86_BUILTIN_LOADHPS:
18523 case IX86_BUILTIN_LOADLPS:
18524 case IX86_BUILTIN_LOADHPD:
18525 case IX86_BUILTIN_LOADLPD:
18526 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
18527 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
18528 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
18529 : CODE_FOR_sse2_loadlpd);
18530 arg0 = CALL_EXPR_ARG (exp, 0);
18531 arg1 = CALL_EXPR_ARG (exp, 1);
18532 op0 = expand_normal (arg0);
18533 op1 = expand_normal (arg1);
18534 tmode = insn_data[icode].operand[0].mode;
18535 mode0 = insn_data[icode].operand[1].mode;
18536 mode1 = insn_data[icode].operand[2].mode;
18538 op0 = force_reg (mode0, op0);
18539 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
18540 if (optimize || target == 0
18541 || GET_MODE (target) != tmode
18542 || !register_operand (target, tmode))
18543 target = gen_reg_rtx (tmode);
18544 pat = GEN_FCN (icode) (target, op0, op1);
18545 if (! pat)
18546 return 0;
18547 emit_insn (pat);
18548 return target;
18550 case IX86_BUILTIN_STOREHPS:
18551 case IX86_BUILTIN_STORELPS:
18552 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
18553 : CODE_FOR_sse_storelps);
18554 arg0 = CALL_EXPR_ARG (exp, 0);
18555 arg1 = CALL_EXPR_ARG (exp, 1);
18556 op0 = expand_normal (arg0);
18557 op1 = expand_normal (arg1);
18558 mode0 = insn_data[icode].operand[0].mode;
18559 mode1 = insn_data[icode].operand[1].mode;
18561 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
18562 op1 = force_reg (mode1, op1);
18564 pat = GEN_FCN (icode) (op0, op1);
18565 if (! pat)
18566 return 0;
18567 emit_insn (pat);
18568 return const0_rtx;
18570 case IX86_BUILTIN_MOVNTPS:
18571 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, exp);
18572 case IX86_BUILTIN_MOVNTQ:
18573 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, exp);
18575 case IX86_BUILTIN_LDMXCSR:
18576 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
18577 target = assign_386_stack_local (SImode, SLOT_TEMP);
18578 emit_move_insn (target, op0);
18579 emit_insn (gen_sse_ldmxcsr (target));
18580 return 0;
18582 case IX86_BUILTIN_STMXCSR:
18583 target = assign_386_stack_local (SImode, SLOT_TEMP);
18584 emit_insn (gen_sse_stmxcsr (target));
18585 return copy_to_mode_reg (SImode, target);
18587 case IX86_BUILTIN_SHUFPS:
18588 case IX86_BUILTIN_SHUFPD:
18589 icode = (fcode == IX86_BUILTIN_SHUFPS
18590 ? CODE_FOR_sse_shufps
18591 : CODE_FOR_sse2_shufpd);
18592 arg0 = CALL_EXPR_ARG (exp, 0);
18593 arg1 = CALL_EXPR_ARG (exp, 1);
18594 arg2 = CALL_EXPR_ARG (exp, 2);
18595 op0 = expand_normal (arg0);
18596 op1 = expand_normal (arg1);
18597 op2 = expand_normal (arg2);
18598 tmode = insn_data[icode].operand[0].mode;
18599 mode0 = insn_data[icode].operand[1].mode;
18600 mode1 = insn_data[icode].operand[2].mode;
18601 mode2 = insn_data[icode].operand[3].mode;
18603 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18604 op0 = copy_to_mode_reg (mode0, op0);
18605 if ((optimize && !register_operand (op1, mode1))
18606 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
18607 op1 = copy_to_mode_reg (mode1, op1);
18608 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
18610 /* @@@ better error message */
18611 error ("mask must be an immediate");
18612 return gen_reg_rtx (tmode);
18614 if (optimize || target == 0
18615 || GET_MODE (target) != tmode
18616 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18617 target = gen_reg_rtx (tmode);
18618 pat = GEN_FCN (icode) (target, op0, op1, op2);
18619 if (! pat)
18620 return 0;
18621 emit_insn (pat);
18622 return target;
18624 case IX86_BUILTIN_PSHUFW:
18625 case IX86_BUILTIN_PSHUFD:
18626 case IX86_BUILTIN_PSHUFHW:
18627 case IX86_BUILTIN_PSHUFLW:
18628 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
18629 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
18630 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
18631 : CODE_FOR_mmx_pshufw);
18632 arg0 = CALL_EXPR_ARG (exp, 0);
18633 arg1 = CALL_EXPR_ARG (exp, 1);
18634 op0 = expand_normal (arg0);
18635 op1 = expand_normal (arg1);
18636 tmode = insn_data[icode].operand[0].mode;
18637 mode1 = insn_data[icode].operand[1].mode;
18638 mode2 = insn_data[icode].operand[2].mode;
18640 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18641 op0 = copy_to_mode_reg (mode1, op0);
18642 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18644 /* @@@ better error message */
18645 error ("mask must be an immediate");
18646 return const0_rtx;
18648 if (target == 0
18649 || GET_MODE (target) != tmode
18650 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18651 target = gen_reg_rtx (tmode);
18652 pat = GEN_FCN (icode) (target, op0, op1);
18653 if (! pat)
18654 return 0;
18655 emit_insn (pat);
18656 return target;
18658 case IX86_BUILTIN_PSLLWI128:
18659 icode = CODE_FOR_ashlv8hi3;
18660 goto do_pshifti;
18661 case IX86_BUILTIN_PSLLDI128:
18662 icode = CODE_FOR_ashlv4si3;
18663 goto do_pshifti;
18664 case IX86_BUILTIN_PSLLQI128:
18665 icode = CODE_FOR_ashlv2di3;
18666 goto do_pshifti;
18667 case IX86_BUILTIN_PSRAWI128:
18668 icode = CODE_FOR_ashrv8hi3;
18669 goto do_pshifti;
18670 case IX86_BUILTIN_PSRADI128:
18671 icode = CODE_FOR_ashrv4si3;
18672 goto do_pshifti;
18673 case IX86_BUILTIN_PSRLWI128:
18674 icode = CODE_FOR_lshrv8hi3;
18675 goto do_pshifti;
18676 case IX86_BUILTIN_PSRLDI128:
18677 icode = CODE_FOR_lshrv4si3;
18678 goto do_pshifti;
18679 case IX86_BUILTIN_PSRLQI128:
18680 icode = CODE_FOR_lshrv2di3;
18681 goto do_pshifti;
18682 do_pshifti:
18683 arg0 = CALL_EXPR_ARG (exp, 0);
18684 arg1 = CALL_EXPR_ARG (exp, 1);
18685 op0 = expand_normal (arg0);
18686 op1 = expand_normal (arg1);
18688 if (!CONST_INT_P (op1))
18690 error ("shift must be an immediate");
18691 return const0_rtx;
18693 if (INTVAL (op1) < 0 || INTVAL (op1) > 255)
18694 op1 = GEN_INT (255);
18696 tmode = insn_data[icode].operand[0].mode;
18697 mode1 = insn_data[icode].operand[1].mode;
18698 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18699 op0 = copy_to_reg (op0);
18701 target = gen_reg_rtx (tmode);
18702 pat = GEN_FCN (icode) (target, op0, op1);
18703 if (!pat)
18704 return 0;
18705 emit_insn (pat);
18706 return target;
18708 case IX86_BUILTIN_PSLLW128:
18709 icode = CODE_FOR_ashlv8hi3;
18710 goto do_pshift;
18711 case IX86_BUILTIN_PSLLD128:
18712 icode = CODE_FOR_ashlv4si3;
18713 goto do_pshift;
18714 case IX86_BUILTIN_PSLLQ128:
18715 icode = CODE_FOR_ashlv2di3;
18716 goto do_pshift;
18717 case IX86_BUILTIN_PSRAW128:
18718 icode = CODE_FOR_ashrv8hi3;
18719 goto do_pshift;
18720 case IX86_BUILTIN_PSRAD128:
18721 icode = CODE_FOR_ashrv4si3;
18722 goto do_pshift;
18723 case IX86_BUILTIN_PSRLW128:
18724 icode = CODE_FOR_lshrv8hi3;
18725 goto do_pshift;
18726 case IX86_BUILTIN_PSRLD128:
18727 icode = CODE_FOR_lshrv4si3;
18728 goto do_pshift;
18729 case IX86_BUILTIN_PSRLQ128:
18730 icode = CODE_FOR_lshrv2di3;
18731 goto do_pshift;
18732 do_pshift:
18733 arg0 = CALL_EXPR_ARG (exp, 0);
18734 arg1 = CALL_EXPR_ARG (exp, 1);
18735 op0 = expand_normal (arg0);
18736 op1 = expand_normal (arg1);
18738 tmode = insn_data[icode].operand[0].mode;
18739 mode1 = insn_data[icode].operand[1].mode;
18741 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18742 op0 = copy_to_reg (op0);
18744 op1 = simplify_gen_subreg (TImode, op1, GET_MODE (op1), 0);
18745 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
18746 op1 = copy_to_reg (op1);
18748 target = gen_reg_rtx (tmode);
18749 pat = GEN_FCN (icode) (target, op0, op1);
18750 if (!pat)
18751 return 0;
18752 emit_insn (pat);
18753 return target;
18755 case IX86_BUILTIN_PSLLDQI128:
18756 case IX86_BUILTIN_PSRLDQI128:
18757 icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
18758 : CODE_FOR_sse2_lshrti3);
18759 arg0 = CALL_EXPR_ARG (exp, 0);
18760 arg1 = CALL_EXPR_ARG (exp, 1);
18761 op0 = expand_normal (arg0);
18762 op1 = expand_normal (arg1);
18763 tmode = insn_data[icode].operand[0].mode;
18764 mode1 = insn_data[icode].operand[1].mode;
18765 mode2 = insn_data[icode].operand[2].mode;
18767 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18769 op0 = copy_to_reg (op0);
18770 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
18772 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18774 error ("shift must be an immediate");
18775 return const0_rtx;
18777 target = gen_reg_rtx (V2DImode);
18778 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
18779 op0, op1);
18780 if (! pat)
18781 return 0;
18782 emit_insn (pat);
18783 return target;
18785 case IX86_BUILTIN_FEMMS:
18786 emit_insn (gen_mmx_femms ());
18787 return NULL_RTX;
18789 case IX86_BUILTIN_PAVGUSB:
18790 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, exp, target);
18792 case IX86_BUILTIN_PF2ID:
18793 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, exp, target, 0);
18795 case IX86_BUILTIN_PFACC:
18796 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, exp, target);
18798 case IX86_BUILTIN_PFADD:
18799 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, exp, target);
18801 case IX86_BUILTIN_PFCMPEQ:
18802 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, exp, target);
18804 case IX86_BUILTIN_PFCMPGE:
18805 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, exp, target);
18807 case IX86_BUILTIN_PFCMPGT:
18808 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, exp, target);
18810 case IX86_BUILTIN_PFMAX:
18811 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, exp, target);
18813 case IX86_BUILTIN_PFMIN:
18814 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, exp, target);
18816 case IX86_BUILTIN_PFMUL:
18817 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, exp, target);
18819 case IX86_BUILTIN_PFRCP:
18820 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, exp, target, 0);
18822 case IX86_BUILTIN_PFRCPIT1:
18823 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, exp, target);
18825 case IX86_BUILTIN_PFRCPIT2:
18826 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, exp, target);
18828 case IX86_BUILTIN_PFRSQIT1:
18829 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, exp, target);
18831 case IX86_BUILTIN_PFRSQRT:
18832 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, exp, target, 0);
18834 case IX86_BUILTIN_PFSUB:
18835 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, exp, target);
18837 case IX86_BUILTIN_PFSUBR:
18838 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, exp, target);
18840 case IX86_BUILTIN_PI2FD:
18841 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, exp, target, 0);
18843 case IX86_BUILTIN_PMULHRW:
18844 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, exp, target);
18846 case IX86_BUILTIN_PF2IW:
18847 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, exp, target, 0);
18849 case IX86_BUILTIN_PFNACC:
18850 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, exp, target);
18852 case IX86_BUILTIN_PFPNACC:
18853 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, exp, target);
18855 case IX86_BUILTIN_PI2FW:
18856 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, exp, target, 0);
18858 case IX86_BUILTIN_PSWAPDSI:
18859 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, exp, target, 0);
18861 case IX86_BUILTIN_PSWAPDSF:
18862 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, exp, target, 0);
18864 case IX86_BUILTIN_SQRTSD:
18865 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, exp, target);
18866 case IX86_BUILTIN_LOADUPD:
18867 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, exp, target, 1);
18868 case IX86_BUILTIN_STOREUPD:
18869 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, exp);
18871 case IX86_BUILTIN_MFENCE:
18872 emit_insn (gen_sse2_mfence ());
18873 return 0;
18874 case IX86_BUILTIN_LFENCE:
18875 emit_insn (gen_sse2_lfence ());
18876 return 0;
18878 case IX86_BUILTIN_CLFLUSH:
18879 arg0 = CALL_EXPR_ARG (exp, 0);
18880 op0 = expand_normal (arg0);
18881 icode = CODE_FOR_sse2_clflush;
18882 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
18883 op0 = copy_to_mode_reg (Pmode, op0);
18885 emit_insn (gen_sse2_clflush (op0));
18886 return 0;
18888 case IX86_BUILTIN_MOVNTPD:
18889 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, exp);
18890 case IX86_BUILTIN_MOVNTDQ:
18891 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, exp);
18892 case IX86_BUILTIN_MOVNTI:
18893 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, exp);
18895 case IX86_BUILTIN_LOADDQU:
18896 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, exp, target, 1);
18897 case IX86_BUILTIN_STOREDQU:
18898 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, exp);
18900 case IX86_BUILTIN_MONITOR:
18901 arg0 = CALL_EXPR_ARG (exp, 0);
18902 arg1 = CALL_EXPR_ARG (exp, 1);
18903 arg2 = CALL_EXPR_ARG (exp, 2);
18904 op0 = expand_normal (arg0);
18905 op1 = expand_normal (arg1);
18906 op2 = expand_normal (arg2);
18907 if (!REG_P (op0))
18908 op0 = copy_to_mode_reg (Pmode, op0);
18909 if (!REG_P (op1))
18910 op1 = copy_to_mode_reg (SImode, op1);
18911 if (!REG_P (op2))
18912 op2 = copy_to_mode_reg (SImode, op2);
18913 if (!TARGET_64BIT)
18914 emit_insn (gen_sse3_monitor (op0, op1, op2));
18915 else
18916 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
18917 return 0;
18919 case IX86_BUILTIN_MWAIT:
18920 arg0 = CALL_EXPR_ARG (exp, 0);
18921 arg1 = CALL_EXPR_ARG (exp, 1);
18922 op0 = expand_normal (arg0);
18923 op1 = expand_normal (arg1);
18924 if (!REG_P (op0))
18925 op0 = copy_to_mode_reg (SImode, op0);
18926 if (!REG_P (op1))
18927 op1 = copy_to_mode_reg (SImode, op1);
18928 emit_insn (gen_sse3_mwait (op0, op1));
18929 return 0;
18931 case IX86_BUILTIN_LDDQU:
18932 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, exp,
18933 target, 1);
18935 case IX86_BUILTIN_PALIGNR:
18936 case IX86_BUILTIN_PALIGNR128:
18937 if (fcode == IX86_BUILTIN_PALIGNR)
18939 icode = CODE_FOR_ssse3_palignrdi;
18940 mode = DImode;
18942 else
18944 icode = CODE_FOR_ssse3_palignrti;
18945 mode = V2DImode;
18947 arg0 = CALL_EXPR_ARG (exp, 0);
18948 arg1 = CALL_EXPR_ARG (exp, 1);
18949 arg2 = CALL_EXPR_ARG (exp, 2);
18950 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
18951 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
18952 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
18953 tmode = insn_data[icode].operand[0].mode;
18954 mode1 = insn_data[icode].operand[1].mode;
18955 mode2 = insn_data[icode].operand[2].mode;
18956 mode3 = insn_data[icode].operand[3].mode;
18958 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18960 op0 = copy_to_reg (op0);
18961 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
18963 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18965 op1 = copy_to_reg (op1);
18966 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
18968 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18970 error ("shift must be an immediate");
18971 return const0_rtx;
18973 target = gen_reg_rtx (mode);
18974 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
18975 op0, op1, op2);
18976 if (! pat)
18977 return 0;
18978 emit_insn (pat);
18979 return target;
18981 case IX86_BUILTIN_MOVNTDQA:
18982 return ix86_expand_unop_builtin (CODE_FOR_sse4_1_movntdqa, exp,
18983 target, 1);
18985 case IX86_BUILTIN_MOVNTSD:
18986 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, exp);
18988 case IX86_BUILTIN_MOVNTSS:
18989 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, exp);
18991 case IX86_BUILTIN_INSERTQ:
18992 case IX86_BUILTIN_EXTRQ:
18993 icode = (fcode == IX86_BUILTIN_EXTRQ
18994 ? CODE_FOR_sse4a_extrq
18995 : CODE_FOR_sse4a_insertq);
18996 arg0 = CALL_EXPR_ARG (exp, 0);
18997 arg1 = CALL_EXPR_ARG (exp, 1);
18998 op0 = expand_normal (arg0);
18999 op1 = expand_normal (arg1);
19000 tmode = insn_data[icode].operand[0].mode;
19001 mode1 = insn_data[icode].operand[1].mode;
19002 mode2 = insn_data[icode].operand[2].mode;
19003 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19004 op0 = copy_to_mode_reg (mode1, op0);
19005 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19006 op1 = copy_to_mode_reg (mode2, op1);
19007 if (optimize || target == 0
19008 || GET_MODE (target) != tmode
19009 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19010 target = gen_reg_rtx (tmode);
19011 pat = GEN_FCN (icode) (target, op0, op1);
19012 if (! pat)
19013 return NULL_RTX;
19014 emit_insn (pat);
19015 return target;
19017 case IX86_BUILTIN_EXTRQI:
19018 icode = CODE_FOR_sse4a_extrqi;
19019 arg0 = CALL_EXPR_ARG (exp, 0);
19020 arg1 = CALL_EXPR_ARG (exp, 1);
19021 arg2 = CALL_EXPR_ARG (exp, 2);
19022 op0 = expand_normal (arg0);
19023 op1 = expand_normal (arg1);
19024 op2 = expand_normal (arg2);
19025 tmode = insn_data[icode].operand[0].mode;
19026 mode1 = insn_data[icode].operand[1].mode;
19027 mode2 = insn_data[icode].operand[2].mode;
19028 mode3 = insn_data[icode].operand[3].mode;
19029 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19030 op0 = copy_to_mode_reg (mode1, op0);
19031 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19033 error ("index mask must be an immediate");
19034 return gen_reg_rtx (tmode);
19036 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19038 error ("length mask must be an immediate");
19039 return gen_reg_rtx (tmode);
19041 if (optimize || target == 0
19042 || GET_MODE (target) != tmode
19043 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19044 target = gen_reg_rtx (tmode);
19045 pat = GEN_FCN (icode) (target, op0, op1, op2);
19046 if (! pat)
19047 return NULL_RTX;
19048 emit_insn (pat);
19049 return target;
19051 case IX86_BUILTIN_INSERTQI:
19052 icode = CODE_FOR_sse4a_insertqi;
19053 arg0 = CALL_EXPR_ARG (exp, 0);
19054 arg1 = CALL_EXPR_ARG (exp, 1);
19055 arg2 = CALL_EXPR_ARG (exp, 2);
19056 arg3 = CALL_EXPR_ARG (exp, 3);
19057 op0 = expand_normal (arg0);
19058 op1 = expand_normal (arg1);
19059 op2 = expand_normal (arg2);
19060 op3 = expand_normal (arg3);
19061 tmode = insn_data[icode].operand[0].mode;
19062 mode1 = insn_data[icode].operand[1].mode;
19063 mode2 = insn_data[icode].operand[2].mode;
19064 mode3 = insn_data[icode].operand[3].mode;
19065 mode4 = insn_data[icode].operand[4].mode;
19067 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19068 op0 = copy_to_mode_reg (mode1, op0);
19070 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19071 op1 = copy_to_mode_reg (mode2, op1);
19073 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19075 error ("index mask must be an immediate");
19076 return gen_reg_rtx (tmode);
19078 if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
19080 error ("length mask must be an immediate");
19081 return gen_reg_rtx (tmode);
19083 if (optimize || target == 0
19084 || GET_MODE (target) != tmode
19085 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19086 target = gen_reg_rtx (tmode);
19087 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
19088 if (! pat)
19089 return NULL_RTX;
19090 emit_insn (pat);
19091 return target;
19093 case IX86_BUILTIN_VEC_INIT_V2SI:
19094 case IX86_BUILTIN_VEC_INIT_V4HI:
19095 case IX86_BUILTIN_VEC_INIT_V8QI:
19096 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
19098 case IX86_BUILTIN_VEC_EXT_V2DF:
19099 case IX86_BUILTIN_VEC_EXT_V2DI:
19100 case IX86_BUILTIN_VEC_EXT_V4SF:
19101 case IX86_BUILTIN_VEC_EXT_V4SI:
19102 case IX86_BUILTIN_VEC_EXT_V8HI:
19103 case IX86_BUILTIN_VEC_EXT_V2SI:
19104 case IX86_BUILTIN_VEC_EXT_V4HI:
19105 case IX86_BUILTIN_VEC_EXT_V16QI:
19106 return ix86_expand_vec_ext_builtin (exp, target);
19108 case IX86_BUILTIN_VEC_SET_V2DI:
19109 case IX86_BUILTIN_VEC_SET_V4SF:
19110 case IX86_BUILTIN_VEC_SET_V4SI:
19111 case IX86_BUILTIN_VEC_SET_V8HI:
19112 case IX86_BUILTIN_VEC_SET_V4HI:
19113 case IX86_BUILTIN_VEC_SET_V16QI:
19114 return ix86_expand_vec_set_builtin (exp);
19116 default:
19117 break;
19120 for (i = 0, d = bdesc_sse_3arg;
19121 i < ARRAY_SIZE (bdesc_sse_3arg);
19122 i++, d++)
19123 if (d->code == fcode)
19124 return ix86_expand_sse_4_operands_builtin (d->icode, exp,
19125 target);
19127 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19128 if (d->code == fcode)
19130 /* Compares are treated specially. */
19131 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
19132 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
19133 || d->icode == CODE_FOR_sse2_maskcmpv2df3
19134 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
19135 return ix86_expand_sse_compare (d, exp, target);
19137 return ix86_expand_binop_builtin (d->icode, exp, target);
19140 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
19141 if (d->code == fcode)
19142 return ix86_expand_unop_builtin (d->icode, exp, target, 0);
19144 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
19145 if (d->code == fcode)
19146 return ix86_expand_sse_comi (d, exp, target);
19148 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
19149 if (d->code == fcode)
19150 return ix86_expand_sse_ptest (d, exp, target);
19152 gcc_unreachable ();
19155 /* Returns a function decl for a vectorized version of the builtin function
19156 with builtin function code FN and the result vector type TYPE, or NULL_TREE
19157 if it is not available. */
19159 static tree
19160 ix86_builtin_vectorized_function (enum built_in_function fn, tree type_out,
19161 tree type_in)
19163 enum machine_mode in_mode, out_mode;
19164 int in_n, out_n;
19166 if (TREE_CODE (type_out) != VECTOR_TYPE
19167 || TREE_CODE (type_in) != VECTOR_TYPE)
19168 return NULL_TREE;
19170 out_mode = TYPE_MODE (TREE_TYPE (type_out));
19171 out_n = TYPE_VECTOR_SUBPARTS (type_out);
19172 in_mode = TYPE_MODE (TREE_TYPE (type_in));
19173 in_n = TYPE_VECTOR_SUBPARTS (type_in);
19175 switch (fn)
19177 case BUILT_IN_SQRT:
19178 if (out_mode == DFmode && out_n == 2
19179 && in_mode == DFmode && in_n == 2)
19180 return ix86_builtins[IX86_BUILTIN_SQRTPD];
19181 return NULL_TREE;
19183 case BUILT_IN_SQRTF:
19184 if (out_mode == SFmode && out_n == 4
19185 && in_mode == SFmode && in_n == 4)
19186 return ix86_builtins[IX86_BUILTIN_SQRTPS];
19187 return NULL_TREE;
19189 case BUILT_IN_LRINTF:
19190 if (out_mode == SImode && out_n == 4
19191 && in_mode == SFmode && in_n == 4)
19192 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
19193 return NULL_TREE;
19195 default:
19199 return NULL_TREE;
19202 /* Returns a decl of a function that implements conversion of the
19203 input vector of type TYPE, or NULL_TREE if it is not available. */
19205 static tree
19206 ix86_builtin_conversion (enum tree_code code, tree type)
19208 if (TREE_CODE (type) != VECTOR_TYPE)
19209 return NULL_TREE;
19211 switch (code)
19213 case FLOAT_EXPR:
19214 switch (TYPE_MODE (type))
19216 case V4SImode:
19217 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
19218 default:
19219 return NULL_TREE;
19222 case FIX_TRUNC_EXPR:
19223 switch (TYPE_MODE (type))
19225 case V4SFmode:
19226 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
19227 default:
19228 return NULL_TREE;
19230 default:
19231 return NULL_TREE;
19236 /* Store OPERAND to the memory after reload is completed. This means
19237 that we can't easily use assign_stack_local. */
19239 ix86_force_to_memory (enum machine_mode mode, rtx operand)
19241 rtx result;
19243 gcc_assert (reload_completed);
19244 if (TARGET_RED_ZONE)
19246 result = gen_rtx_MEM (mode,
19247 gen_rtx_PLUS (Pmode,
19248 stack_pointer_rtx,
19249 GEN_INT (-RED_ZONE_SIZE)));
19250 emit_move_insn (result, operand);
19252 else if (!TARGET_RED_ZONE && TARGET_64BIT)
19254 switch (mode)
19256 case HImode:
19257 case SImode:
19258 operand = gen_lowpart (DImode, operand);
19259 /* FALLTHRU */
19260 case DImode:
19261 emit_insn (
19262 gen_rtx_SET (VOIDmode,
19263 gen_rtx_MEM (DImode,
19264 gen_rtx_PRE_DEC (DImode,
19265 stack_pointer_rtx)),
19266 operand));
19267 break;
19268 default:
19269 gcc_unreachable ();
19271 result = gen_rtx_MEM (mode, stack_pointer_rtx);
19273 else
19275 switch (mode)
19277 case DImode:
19279 rtx operands[2];
19280 split_di (&operand, 1, operands, operands + 1);
19281 emit_insn (
19282 gen_rtx_SET (VOIDmode,
19283 gen_rtx_MEM (SImode,
19284 gen_rtx_PRE_DEC (Pmode,
19285 stack_pointer_rtx)),
19286 operands[1]));
19287 emit_insn (
19288 gen_rtx_SET (VOIDmode,
19289 gen_rtx_MEM (SImode,
19290 gen_rtx_PRE_DEC (Pmode,
19291 stack_pointer_rtx)),
19292 operands[0]));
19294 break;
19295 case HImode:
19296 /* Store HImodes as SImodes. */
19297 operand = gen_lowpart (SImode, operand);
19298 /* FALLTHRU */
19299 case SImode:
19300 emit_insn (
19301 gen_rtx_SET (VOIDmode,
19302 gen_rtx_MEM (GET_MODE (operand),
19303 gen_rtx_PRE_DEC (SImode,
19304 stack_pointer_rtx)),
19305 operand));
19306 break;
19307 default:
19308 gcc_unreachable ();
19310 result = gen_rtx_MEM (mode, stack_pointer_rtx);
19312 return result;
19315 /* Free operand from the memory. */
19316 void
19317 ix86_free_from_memory (enum machine_mode mode)
19319 if (!TARGET_RED_ZONE)
19321 int size;
19323 if (mode == DImode || TARGET_64BIT)
19324 size = 8;
19325 else
19326 size = 4;
19327 /* Use LEA to deallocate stack space. In peephole2 it will be converted
19328 to pop or add instruction if registers are available. */
19329 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19330 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
19331 GEN_INT (size))));
19335 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
19336 QImode must go into class Q_REGS.
19337 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
19338 movdf to do mem-to-mem moves through integer regs. */
19339 enum reg_class
19340 ix86_preferred_reload_class (rtx x, enum reg_class class)
19342 enum machine_mode mode = GET_MODE (x);
19344 /* We're only allowed to return a subclass of CLASS. Many of the
19345 following checks fail for NO_REGS, so eliminate that early. */
19346 if (class == NO_REGS)
19347 return NO_REGS;
19349 /* All classes can load zeros. */
19350 if (x == CONST0_RTX (mode))
19351 return class;
19353 /* Force constants into memory if we are loading a (nonzero) constant into
19354 an MMX or SSE register. This is because there are no MMX/SSE instructions
19355 to load from a constant. */
19356 if (CONSTANT_P (x)
19357 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
19358 return NO_REGS;
19360 /* Prefer SSE regs only, if we can use them for math. */
19361 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
19362 return SSE_CLASS_P (class) ? class : NO_REGS;
19364 /* Floating-point constants need more complex checks. */
19365 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
19367 /* General regs can load everything. */
19368 if (reg_class_subset_p (class, GENERAL_REGS))
19369 return class;
19371 /* Floats can load 0 and 1 plus some others. Note that we eliminated
19372 zero above. We only want to wind up preferring 80387 registers if
19373 we plan on doing computation with them. */
19374 if (TARGET_80387
19375 && standard_80387_constant_p (x))
19377 /* Limit class to non-sse. */
19378 if (class == FLOAT_SSE_REGS)
19379 return FLOAT_REGS;
19380 if (class == FP_TOP_SSE_REGS)
19381 return FP_TOP_REG;
19382 if (class == FP_SECOND_SSE_REGS)
19383 return FP_SECOND_REG;
19384 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
19385 return class;
19388 return NO_REGS;
19391 /* Generally when we see PLUS here, it's the function invariant
19392 (plus soft-fp const_int). Which can only be computed into general
19393 regs. */
19394 if (GET_CODE (x) == PLUS)
19395 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
19397 /* QImode constants are easy to load, but non-constant QImode data
19398 must go into Q_REGS. */
19399 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
19401 if (reg_class_subset_p (class, Q_REGS))
19402 return class;
19403 if (reg_class_subset_p (Q_REGS, class))
19404 return Q_REGS;
19405 return NO_REGS;
19408 return class;
19411 /* Discourage putting floating-point values in SSE registers unless
19412 SSE math is being used, and likewise for the 387 registers. */
19413 enum reg_class
19414 ix86_preferred_output_reload_class (rtx x, enum reg_class class)
19416 enum machine_mode mode = GET_MODE (x);
19418 /* Restrict the output reload class to the register bank that we are doing
19419 math on. If we would like not to return a subset of CLASS, reject this
19420 alternative: if reload cannot do this, it will still use its choice. */
19421 mode = GET_MODE (x);
19422 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
19423 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
19425 if (X87_FLOAT_MODE_P (mode))
19427 if (class == FP_TOP_SSE_REGS)
19428 return FP_TOP_REG;
19429 else if (class == FP_SECOND_SSE_REGS)
19430 return FP_SECOND_REG;
19431 else
19432 return FLOAT_CLASS_P (class) ? class : NO_REGS;
19435 return class;
19438 /* If we are copying between general and FP registers, we need a memory
19439 location. The same is true for SSE and MMX registers.
19441 The macro can't work reliably when one of the CLASSES is class containing
19442 registers from multiple units (SSE, MMX, integer). We avoid this by never
19443 combining those units in single alternative in the machine description.
19444 Ensure that this constraint holds to avoid unexpected surprises.
19446 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
19447 enforce these sanity checks. */
19450 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
19451 enum machine_mode mode, int strict)
19453 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
19454 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
19455 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
19456 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
19457 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
19458 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
19460 gcc_assert (!strict);
19461 return true;
19464 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
19465 return true;
19467 /* ??? This is a lie. We do have moves between mmx/general, and for
19468 mmx/sse2. But by saying we need secondary memory we discourage the
19469 register allocator from using the mmx registers unless needed. */
19470 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
19471 return true;
19473 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
19475 /* SSE1 doesn't have any direct moves from other classes. */
19476 if (!TARGET_SSE2)
19477 return true;
19479 /* If the target says that inter-unit moves are more expensive
19480 than moving through memory, then don't generate them. */
19481 if (!TARGET_INTER_UNIT_MOVES)
19482 return true;
19484 /* Between SSE and general, we have moves no larger than word size. */
19485 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
19486 return true;
19489 return false;
19492 /* Return true if the registers in CLASS cannot represent the change from
19493 modes FROM to TO. */
19495 bool
19496 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
19497 enum reg_class class)
19499 if (from == to)
19500 return false;
19502 /* x87 registers can't do subreg at all, as all values are reformatted
19503 to extended precision. */
19504 if (MAYBE_FLOAT_CLASS_P (class))
19505 return true;
19507 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
19509 /* Vector registers do not support QI or HImode loads. If we don't
19510 disallow a change to these modes, reload will assume it's ok to
19511 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
19512 the vec_dupv4hi pattern. */
19513 if (GET_MODE_SIZE (from) < 4)
19514 return true;
19516 /* Vector registers do not support subreg with nonzero offsets, which
19517 are otherwise valid for integer registers. Since we can't see
19518 whether we have a nonzero offset from here, prohibit all
19519 nonparadoxical subregs changing size. */
19520 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
19521 return true;
19524 return false;
19527 /* Return the cost of moving data from a register in class CLASS1 to
19528 one in class CLASS2.
19530 It is not required that the cost always equal 2 when FROM is the same as TO;
19531 on some machines it is expensive to move between registers if they are not
19532 general registers. */
19535 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
19536 enum reg_class class2)
19538 /* In case we require secondary memory, compute cost of the store followed
19539 by load. In order to avoid bad register allocation choices, we need
19540 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
19542 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
19544 int cost = 1;
19546 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
19547 MEMORY_MOVE_COST (mode, class1, 1));
19548 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
19549 MEMORY_MOVE_COST (mode, class2, 1));
19551 /* In case of copying from general_purpose_register we may emit multiple
19552 stores followed by single load causing memory size mismatch stall.
19553 Count this as arbitrarily high cost of 20. */
19554 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
19555 cost += 20;
19557 /* In the case of FP/MMX moves, the registers actually overlap, and we
19558 have to switch modes in order to treat them differently. */
19559 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
19560 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
19561 cost += 20;
19563 return cost;
19566 /* Moves between SSE/MMX and integer unit are expensive. */
19567 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
19568 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
19569 return ix86_cost->mmxsse_to_integer;
19570 if (MAYBE_FLOAT_CLASS_P (class1))
19571 return ix86_cost->fp_move;
19572 if (MAYBE_SSE_CLASS_P (class1))
19573 return ix86_cost->sse_move;
19574 if (MAYBE_MMX_CLASS_P (class1))
19575 return ix86_cost->mmx_move;
19576 return 2;
19579 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
19581 bool
19582 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
19584 /* Flags and only flags can only hold CCmode values. */
19585 if (CC_REGNO_P (regno))
19586 return GET_MODE_CLASS (mode) == MODE_CC;
19587 if (GET_MODE_CLASS (mode) == MODE_CC
19588 || GET_MODE_CLASS (mode) == MODE_RANDOM
19589 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
19590 return 0;
19591 if (FP_REGNO_P (regno))
19592 return VALID_FP_MODE_P (mode);
19593 if (SSE_REGNO_P (regno))
19595 /* We implement the move patterns for all vector modes into and
19596 out of SSE registers, even when no operation instructions
19597 are available. */
19598 return (VALID_SSE_REG_MODE (mode)
19599 || VALID_SSE2_REG_MODE (mode)
19600 || VALID_MMX_REG_MODE (mode)
19601 || VALID_MMX_REG_MODE_3DNOW (mode));
19603 if (MMX_REGNO_P (regno))
19605 /* We implement the move patterns for 3DNOW modes even in MMX mode,
19606 so if the register is available at all, then we can move data of
19607 the given mode into or out of it. */
19608 return (VALID_MMX_REG_MODE (mode)
19609 || VALID_MMX_REG_MODE_3DNOW (mode));
19612 if (mode == QImode)
19614 /* Take care for QImode values - they can be in non-QI regs,
19615 but then they do cause partial register stalls. */
19616 if (regno < 4 || TARGET_64BIT)
19617 return 1;
19618 if (!TARGET_PARTIAL_REG_STALL)
19619 return 1;
19620 return reload_in_progress || reload_completed;
19622 /* We handle both integer and floats in the general purpose registers. */
19623 else if (VALID_INT_MODE_P (mode))
19624 return 1;
19625 else if (VALID_FP_MODE_P (mode))
19626 return 1;
19627 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
19628 on to use that value in smaller contexts, this can easily force a
19629 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
19630 supporting DImode, allow it. */
19631 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
19632 return 1;
19634 return 0;
19637 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
19638 tieable integer mode. */
19640 static bool
19641 ix86_tieable_integer_mode_p (enum machine_mode mode)
19643 switch (mode)
19645 case HImode:
19646 case SImode:
19647 return true;
19649 case QImode:
19650 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
19652 case DImode:
19653 return TARGET_64BIT;
19655 default:
19656 return false;
19660 /* Return true if MODE1 is accessible in a register that can hold MODE2
19661 without copying. That is, all register classes that can hold MODE2
19662 can also hold MODE1. */
19664 bool
19665 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
19667 if (mode1 == mode2)
19668 return true;
19670 if (ix86_tieable_integer_mode_p (mode1)
19671 && ix86_tieable_integer_mode_p (mode2))
19672 return true;
19674 /* MODE2 being XFmode implies fp stack or general regs, which means we
19675 can tie any smaller floating point modes to it. Note that we do not
19676 tie this with TFmode. */
19677 if (mode2 == XFmode)
19678 return mode1 == SFmode || mode1 == DFmode;
19680 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
19681 that we can tie it with SFmode. */
19682 if (mode2 == DFmode)
19683 return mode1 == SFmode;
19685 /* If MODE2 is only appropriate for an SSE register, then tie with
19686 any other mode acceptable to SSE registers. */
19687 if (GET_MODE_SIZE (mode2) == 16
19688 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
19689 return (GET_MODE_SIZE (mode1) == 16
19690 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
19692 /* If MODE2 is appropriate for an MMX register, then tie
19693 with any other mode acceptable to MMX registers. */
19694 if (GET_MODE_SIZE (mode2) == 8
19695 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
19696 return (GET_MODE_SIZE (mode1) == 8
19697 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
19699 return false;
19702 /* Return the cost of moving data of mode M between a
19703 register and memory. A value of 2 is the default; this cost is
19704 relative to those in `REGISTER_MOVE_COST'.
19706 If moving between registers and memory is more expensive than
19707 between two registers, you should define this macro to express the
19708 relative cost.
19710 Model also increased moving costs of QImode registers in non
19711 Q_REGS classes.
19714 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
19716 if (FLOAT_CLASS_P (class))
19718 int index;
19719 switch (mode)
19721 case SFmode:
19722 index = 0;
19723 break;
19724 case DFmode:
19725 index = 1;
19726 break;
19727 case XFmode:
19728 index = 2;
19729 break;
19730 default:
19731 return 100;
19733 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
19735 if (SSE_CLASS_P (class))
19737 int index;
19738 switch (GET_MODE_SIZE (mode))
19740 case 4:
19741 index = 0;
19742 break;
19743 case 8:
19744 index = 1;
19745 break;
19746 case 16:
19747 index = 2;
19748 break;
19749 default:
19750 return 100;
19752 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
19754 if (MMX_CLASS_P (class))
19756 int index;
19757 switch (GET_MODE_SIZE (mode))
19759 case 4:
19760 index = 0;
19761 break;
19762 case 8:
19763 index = 1;
19764 break;
19765 default:
19766 return 100;
19768 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
19770 switch (GET_MODE_SIZE (mode))
19772 case 1:
19773 if (in)
19774 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
19775 : ix86_cost->movzbl_load);
19776 else
19777 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
19778 : ix86_cost->int_store[0] + 4);
19779 break;
19780 case 2:
19781 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
19782 default:
19783 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
19784 if (mode == TFmode)
19785 mode = XFmode;
19786 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
19787 * (((int) GET_MODE_SIZE (mode)
19788 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
19792 /* Compute a (partial) cost for rtx X. Return true if the complete
19793 cost has been computed, and false if subexpressions should be
19794 scanned. In either case, *TOTAL contains the cost result. */
19796 static bool
19797 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
19799 enum machine_mode mode = GET_MODE (x);
19801 switch (code)
19803 case CONST_INT:
19804 case CONST:
19805 case LABEL_REF:
19806 case SYMBOL_REF:
19807 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
19808 *total = 3;
19809 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
19810 *total = 2;
19811 else if (flag_pic && SYMBOLIC_CONST (x)
19812 && (!TARGET_64BIT
19813 || (!GET_CODE (x) != LABEL_REF
19814 && (GET_CODE (x) != SYMBOL_REF
19815 || !SYMBOL_REF_LOCAL_P (x)))))
19816 *total = 1;
19817 else
19818 *total = 0;
19819 return true;
19821 case CONST_DOUBLE:
19822 if (mode == VOIDmode)
19823 *total = 0;
19824 else
19825 switch (standard_80387_constant_p (x))
19827 case 1: /* 0.0 */
19828 *total = 1;
19829 break;
19830 default: /* Other constants */
19831 *total = 2;
19832 break;
19833 case 0:
19834 case -1:
19835 /* Start with (MEM (SYMBOL_REF)), since that's where
19836 it'll probably end up. Add a penalty for size. */
19837 *total = (COSTS_N_INSNS (1)
19838 + (flag_pic != 0 && !TARGET_64BIT)
19839 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
19840 break;
19842 return true;
19844 case ZERO_EXTEND:
19845 /* The zero extensions is often completely free on x86_64, so make
19846 it as cheap as possible. */
19847 if (TARGET_64BIT && mode == DImode
19848 && GET_MODE (XEXP (x, 0)) == SImode)
19849 *total = 1;
19850 else if (TARGET_ZERO_EXTEND_WITH_AND)
19851 *total = ix86_cost->add;
19852 else
19853 *total = ix86_cost->movzx;
19854 return false;
19856 case SIGN_EXTEND:
19857 *total = ix86_cost->movsx;
19858 return false;
19860 case ASHIFT:
19861 if (CONST_INT_P (XEXP (x, 1))
19862 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
19864 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
19865 if (value == 1)
19867 *total = ix86_cost->add;
19868 return false;
19870 if ((value == 2 || value == 3)
19871 && ix86_cost->lea <= ix86_cost->shift_const)
19873 *total = ix86_cost->lea;
19874 return false;
19877 /* FALLTHRU */
19879 case ROTATE:
19880 case ASHIFTRT:
19881 case LSHIFTRT:
19882 case ROTATERT:
19883 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
19885 if (CONST_INT_P (XEXP (x, 1)))
19887 if (INTVAL (XEXP (x, 1)) > 32)
19888 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
19889 else
19890 *total = ix86_cost->shift_const * 2;
19892 else
19894 if (GET_CODE (XEXP (x, 1)) == AND)
19895 *total = ix86_cost->shift_var * 2;
19896 else
19897 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
19900 else
19902 if (CONST_INT_P (XEXP (x, 1)))
19903 *total = ix86_cost->shift_const;
19904 else
19905 *total = ix86_cost->shift_var;
19907 return false;
19909 case MULT:
19910 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19912 /* ??? SSE scalar cost should be used here. */
19913 *total = ix86_cost->fmul;
19914 return false;
19916 else if (X87_FLOAT_MODE_P (mode))
19918 *total = ix86_cost->fmul;
19919 return false;
19921 else if (FLOAT_MODE_P (mode))
19923 /* ??? SSE vector cost should be used here. */
19924 *total = ix86_cost->fmul;
19925 return false;
19927 else
19929 rtx op0 = XEXP (x, 0);
19930 rtx op1 = XEXP (x, 1);
19931 int nbits;
19932 if (CONST_INT_P (XEXP (x, 1)))
19934 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
19935 for (nbits = 0; value != 0; value &= value - 1)
19936 nbits++;
19938 else
19939 /* This is arbitrary. */
19940 nbits = 7;
19942 /* Compute costs correctly for widening multiplication. */
19943 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
19944 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
19945 == GET_MODE_SIZE (mode))
19947 int is_mulwiden = 0;
19948 enum machine_mode inner_mode = GET_MODE (op0);
19950 if (GET_CODE (op0) == GET_CODE (op1))
19951 is_mulwiden = 1, op1 = XEXP (op1, 0);
19952 else if (CONST_INT_P (op1))
19954 if (GET_CODE (op0) == SIGN_EXTEND)
19955 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
19956 == INTVAL (op1);
19957 else
19958 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
19961 if (is_mulwiden)
19962 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
19965 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
19966 + nbits * ix86_cost->mult_bit
19967 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
19969 return true;
19972 case DIV:
19973 case UDIV:
19974 case MOD:
19975 case UMOD:
19976 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19977 /* ??? SSE cost should be used here. */
19978 *total = ix86_cost->fdiv;
19979 else if (X87_FLOAT_MODE_P (mode))
19980 *total = ix86_cost->fdiv;
19981 else if (FLOAT_MODE_P (mode))
19982 /* ??? SSE vector cost should be used here. */
19983 *total = ix86_cost->fdiv;
19984 else
19985 *total = ix86_cost->divide[MODE_INDEX (mode)];
19986 return false;
19988 case PLUS:
19989 if (GET_MODE_CLASS (mode) == MODE_INT
19990 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
19992 if (GET_CODE (XEXP (x, 0)) == PLUS
19993 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
19994 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
19995 && CONSTANT_P (XEXP (x, 1)))
19997 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
19998 if (val == 2 || val == 4 || val == 8)
20000 *total = ix86_cost->lea;
20001 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
20002 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
20003 outer_code);
20004 *total += rtx_cost (XEXP (x, 1), outer_code);
20005 return true;
20008 else if (GET_CODE (XEXP (x, 0)) == MULT
20009 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
20011 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
20012 if (val == 2 || val == 4 || val == 8)
20014 *total = ix86_cost->lea;
20015 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
20016 *total += rtx_cost (XEXP (x, 1), outer_code);
20017 return true;
20020 else if (GET_CODE (XEXP (x, 0)) == PLUS)
20022 *total = ix86_cost->lea;
20023 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
20024 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
20025 *total += rtx_cost (XEXP (x, 1), outer_code);
20026 return true;
20029 /* FALLTHRU */
20031 case MINUS:
20032 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20034 /* ??? SSE cost should be used here. */
20035 *total = ix86_cost->fadd;
20036 return false;
20038 else if (X87_FLOAT_MODE_P (mode))
20040 *total = ix86_cost->fadd;
20041 return false;
20043 else if (FLOAT_MODE_P (mode))
20045 /* ??? SSE vector cost should be used here. */
20046 *total = ix86_cost->fadd;
20047 return false;
20049 /* FALLTHRU */
20051 case AND:
20052 case IOR:
20053 case XOR:
20054 if (!TARGET_64BIT && mode == DImode)
20056 *total = (ix86_cost->add * 2
20057 + (rtx_cost (XEXP (x, 0), outer_code)
20058 << (GET_MODE (XEXP (x, 0)) != DImode))
20059 + (rtx_cost (XEXP (x, 1), outer_code)
20060 << (GET_MODE (XEXP (x, 1)) != DImode)));
20061 return true;
20063 /* FALLTHRU */
20065 case NEG:
20066 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20068 /* ??? SSE cost should be used here. */
20069 *total = ix86_cost->fchs;
20070 return false;
20072 else if (X87_FLOAT_MODE_P (mode))
20074 *total = ix86_cost->fchs;
20075 return false;
20077 else if (FLOAT_MODE_P (mode))
20079 /* ??? SSE vector cost should be used here. */
20080 *total = ix86_cost->fchs;
20081 return false;
20083 /* FALLTHRU */
20085 case NOT:
20086 if (!TARGET_64BIT && mode == DImode)
20087 *total = ix86_cost->add * 2;
20088 else
20089 *total = ix86_cost->add;
20090 return false;
20092 case COMPARE:
20093 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
20094 && XEXP (XEXP (x, 0), 1) == const1_rtx
20095 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
20096 && XEXP (x, 1) == const0_rtx)
20098 /* This kind of construct is implemented using test[bwl].
20099 Treat it as if we had an AND. */
20100 *total = (ix86_cost->add
20101 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
20102 + rtx_cost (const1_rtx, outer_code));
20103 return true;
20105 return false;
20107 case FLOAT_EXTEND:
20108 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
20109 *total = 0;
20110 return false;
20112 case ABS:
20113 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20114 /* ??? SSE cost should be used here. */
20115 *total = ix86_cost->fabs;
20116 else if (X87_FLOAT_MODE_P (mode))
20117 *total = ix86_cost->fabs;
20118 else if (FLOAT_MODE_P (mode))
20119 /* ??? SSE vector cost should be used here. */
20120 *total = ix86_cost->fabs;
20121 return false;
20123 case SQRT:
20124 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20125 /* ??? SSE cost should be used here. */
20126 *total = ix86_cost->fsqrt;
20127 else if (X87_FLOAT_MODE_P (mode))
20128 *total = ix86_cost->fsqrt;
20129 else if (FLOAT_MODE_P (mode))
20130 /* ??? SSE vector cost should be used here. */
20131 *total = ix86_cost->fsqrt;
20132 return false;
20134 case UNSPEC:
20135 if (XINT (x, 1) == UNSPEC_TP)
20136 *total = 0;
20137 return false;
20139 default:
20140 return false;
20144 #if TARGET_MACHO
20146 static int current_machopic_label_num;
20148 /* Given a symbol name and its associated stub, write out the
20149 definition of the stub. */
20151 void
20152 machopic_output_stub (FILE *file, const char *symb, const char *stub)
20154 unsigned int length;
20155 char *binder_name, *symbol_name, lazy_ptr_name[32];
20156 int label = ++current_machopic_label_num;
20158 /* For 64-bit we shouldn't get here. */
20159 gcc_assert (!TARGET_64BIT);
20161 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
20162 symb = (*targetm.strip_name_encoding) (symb);
20164 length = strlen (stub);
20165 binder_name = alloca (length + 32);
20166 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
20168 length = strlen (symb);
20169 symbol_name = alloca (length + 32);
20170 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
20172 sprintf (lazy_ptr_name, "L%d$lz", label);
20174 if (MACHOPIC_PURE)
20175 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
20176 else
20177 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
20179 fprintf (file, "%s:\n", stub);
20180 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20182 if (MACHOPIC_PURE)
20184 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
20185 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
20186 fprintf (file, "\tjmp\t*%%edx\n");
20188 else
20189 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
20191 fprintf (file, "%s:\n", binder_name);
20193 if (MACHOPIC_PURE)
20195 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
20196 fprintf (file, "\tpushl\t%%eax\n");
20198 else
20199 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
20201 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
20203 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
20204 fprintf (file, "%s:\n", lazy_ptr_name);
20205 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20206 fprintf (file, "\t.long %s\n", binder_name);
20209 void
20210 darwin_x86_file_end (void)
20212 darwin_file_end ();
20213 ix86_file_end ();
20215 #endif /* TARGET_MACHO */
20217 /* Order the registers for register allocator. */
20219 void
20220 x86_order_regs_for_local_alloc (void)
20222 int pos = 0;
20223 int i;
20225 /* First allocate the local general purpose registers. */
20226 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
20227 if (GENERAL_REGNO_P (i) && call_used_regs[i])
20228 reg_alloc_order [pos++] = i;
20230 /* Global general purpose registers. */
20231 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
20232 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
20233 reg_alloc_order [pos++] = i;
20235 /* x87 registers come first in case we are doing FP math
20236 using them. */
20237 if (!TARGET_SSE_MATH)
20238 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
20239 reg_alloc_order [pos++] = i;
20241 /* SSE registers. */
20242 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
20243 reg_alloc_order [pos++] = i;
20244 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
20245 reg_alloc_order [pos++] = i;
20247 /* x87 registers. */
20248 if (TARGET_SSE_MATH)
20249 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
20250 reg_alloc_order [pos++] = i;
20252 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
20253 reg_alloc_order [pos++] = i;
20255 /* Initialize the rest of array as we do not allocate some registers
20256 at all. */
20257 while (pos < FIRST_PSEUDO_REGISTER)
20258 reg_alloc_order [pos++] = 0;
20261 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
20262 struct attribute_spec.handler. */
20263 static tree
20264 ix86_handle_struct_attribute (tree *node, tree name,
20265 tree args ATTRIBUTE_UNUSED,
20266 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
20268 tree *type = NULL;
20269 if (DECL_P (*node))
20271 if (TREE_CODE (*node) == TYPE_DECL)
20272 type = &TREE_TYPE (*node);
20274 else
20275 type = node;
20277 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
20278 || TREE_CODE (*type) == UNION_TYPE)))
20280 warning (OPT_Wattributes, "%qs attribute ignored",
20281 IDENTIFIER_POINTER (name));
20282 *no_add_attrs = true;
20285 else if ((is_attribute_p ("ms_struct", name)
20286 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
20287 || ((is_attribute_p ("gcc_struct", name)
20288 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
20290 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
20291 IDENTIFIER_POINTER (name));
20292 *no_add_attrs = true;
20295 return NULL_TREE;
20298 static bool
20299 ix86_ms_bitfield_layout_p (tree record_type)
20301 return (TARGET_MS_BITFIELD_LAYOUT &&
20302 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
20303 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
20306 /* Returns an expression indicating where the this parameter is
20307 located on entry to the FUNCTION. */
20309 static rtx
20310 x86_this_parameter (tree function)
20312 tree type = TREE_TYPE (function);
20313 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
20315 if (TARGET_64BIT)
20317 const int *parm_regs;
20319 if (TARGET_64BIT_MS_ABI)
20320 parm_regs = x86_64_ms_abi_int_parameter_registers;
20321 else
20322 parm_regs = x86_64_int_parameter_registers;
20323 return gen_rtx_REG (DImode, parm_regs[aggr]);
20326 if (ix86_function_regparm (type, function) > 0
20327 && !type_has_variadic_args_p (type))
20329 int regno = 0;
20330 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
20331 regno = 2;
20332 return gen_rtx_REG (SImode, regno);
20335 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
20338 /* Determine whether x86_output_mi_thunk can succeed. */
20340 static bool
20341 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
20342 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
20343 HOST_WIDE_INT vcall_offset, tree function)
20345 /* 64-bit can handle anything. */
20346 if (TARGET_64BIT)
20347 return true;
20349 /* For 32-bit, everything's fine if we have one free register. */
20350 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
20351 return true;
20353 /* Need a free register for vcall_offset. */
20354 if (vcall_offset)
20355 return false;
20357 /* Need a free register for GOT references. */
20358 if (flag_pic && !(*targetm.binds_local_p) (function))
20359 return false;
20361 /* Otherwise ok. */
20362 return true;
20365 /* Output the assembler code for a thunk function. THUNK_DECL is the
20366 declaration for the thunk function itself, FUNCTION is the decl for
20367 the target function. DELTA is an immediate constant offset to be
20368 added to THIS. If VCALL_OFFSET is nonzero, the word at
20369 *(*this + vcall_offset) should be added to THIS. */
20371 static void
20372 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
20373 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
20374 HOST_WIDE_INT vcall_offset, tree function)
20376 rtx xops[3];
20377 rtx this = x86_this_parameter (function);
20378 rtx this_reg, tmp;
20380 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
20381 pull it in now and let DELTA benefit. */
20382 if (REG_P (this))
20383 this_reg = this;
20384 else if (vcall_offset)
20386 /* Put the this parameter into %eax. */
20387 xops[0] = this;
20388 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
20389 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
20391 else
20392 this_reg = NULL_RTX;
20394 /* Adjust the this parameter by a fixed constant. */
20395 if (delta)
20397 xops[0] = GEN_INT (delta);
20398 xops[1] = this_reg ? this_reg : this;
20399 if (TARGET_64BIT)
20401 if (!x86_64_general_operand (xops[0], DImode))
20403 tmp = gen_rtx_REG (DImode, R10_REG);
20404 xops[1] = tmp;
20405 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
20406 xops[0] = tmp;
20407 xops[1] = this;
20409 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
20411 else
20412 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
20415 /* Adjust the this parameter by a value stored in the vtable. */
20416 if (vcall_offset)
20418 if (TARGET_64BIT)
20419 tmp = gen_rtx_REG (DImode, R10_REG);
20420 else
20422 int tmp_regno = 2 /* ECX */;
20423 if (lookup_attribute ("fastcall",
20424 TYPE_ATTRIBUTES (TREE_TYPE (function))))
20425 tmp_regno = 0 /* EAX */;
20426 tmp = gen_rtx_REG (SImode, tmp_regno);
20429 xops[0] = gen_rtx_MEM (Pmode, this_reg);
20430 xops[1] = tmp;
20431 if (TARGET_64BIT)
20432 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
20433 else
20434 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
20436 /* Adjust the this parameter. */
20437 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
20438 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
20440 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
20441 xops[0] = GEN_INT (vcall_offset);
20442 xops[1] = tmp2;
20443 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
20444 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
20446 xops[1] = this_reg;
20447 if (TARGET_64BIT)
20448 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
20449 else
20450 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
20453 /* If necessary, drop THIS back to its stack slot. */
20454 if (this_reg && this_reg != this)
20456 xops[0] = this_reg;
20457 xops[1] = this;
20458 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
20461 xops[0] = XEXP (DECL_RTL (function), 0);
20462 if (TARGET_64BIT)
20464 if (!flag_pic || (*targetm.binds_local_p) (function))
20465 output_asm_insn ("jmp\t%P0", xops);
20466 /* All thunks should be in the same object as their target,
20467 and thus binds_local_p should be true. */
20468 else if (TARGET_64BIT_MS_ABI)
20469 gcc_unreachable ();
20470 else
20472 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
20473 tmp = gen_rtx_CONST (Pmode, tmp);
20474 tmp = gen_rtx_MEM (QImode, tmp);
20475 xops[0] = tmp;
20476 output_asm_insn ("jmp\t%A0", xops);
20479 else
20481 if (!flag_pic || (*targetm.binds_local_p) (function))
20482 output_asm_insn ("jmp\t%P0", xops);
20483 else
20484 #if TARGET_MACHO
20485 if (TARGET_MACHO)
20487 rtx sym_ref = XEXP (DECL_RTL (function), 0);
20488 tmp = (gen_rtx_SYMBOL_REF
20489 (Pmode,
20490 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
20491 tmp = gen_rtx_MEM (QImode, tmp);
20492 xops[0] = tmp;
20493 output_asm_insn ("jmp\t%0", xops);
20495 else
20496 #endif /* TARGET_MACHO */
20498 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
20499 output_set_got (tmp, NULL_RTX);
20501 xops[1] = tmp;
20502 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
20503 output_asm_insn ("jmp\t{*}%1", xops);
20508 static void
20509 x86_file_start (void)
20511 default_file_start ();
20512 #if TARGET_MACHO
20513 darwin_file_start ();
20514 #endif
20515 if (X86_FILE_START_VERSION_DIRECTIVE)
20516 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
20517 if (X86_FILE_START_FLTUSED)
20518 fputs ("\t.global\t__fltused\n", asm_out_file);
20519 if (ix86_asm_dialect == ASM_INTEL)
20520 fputs ("\t.intel_syntax\n", asm_out_file);
20524 x86_field_alignment (tree field, int computed)
20526 enum machine_mode mode;
20527 tree type = TREE_TYPE (field);
20529 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
20530 return computed;
20531 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
20532 ? get_inner_array_type (type) : type);
20533 if (mode == DFmode || mode == DCmode
20534 || GET_MODE_CLASS (mode) == MODE_INT
20535 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
20536 return MIN (32, computed);
20537 return computed;
20540 /* Output assembler code to FILE to increment profiler label # LABELNO
20541 for profiling a function entry. */
20542 void
20543 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
20545 if (TARGET_64BIT)
20547 #ifndef NO_PROFILE_COUNTERS
20548 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
20549 #endif
20551 if (!TARGET_64BIT_MS_ABI && flag_pic)
20552 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
20553 else
20554 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
20556 else if (flag_pic)
20558 #ifndef NO_PROFILE_COUNTERS
20559 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
20560 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
20561 #endif
20562 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
20564 else
20566 #ifndef NO_PROFILE_COUNTERS
20567 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
20568 PROFILE_COUNT_REGISTER);
20569 #endif
20570 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
20574 /* We don't have exact information about the insn sizes, but we may assume
20575 quite safely that we are informed about all 1 byte insns and memory
20576 address sizes. This is enough to eliminate unnecessary padding in
20577 99% of cases. */
20579 static int
20580 min_insn_size (rtx insn)
20582 int l = 0;
20584 if (!INSN_P (insn) || !active_insn_p (insn))
20585 return 0;
20587 /* Discard alignments we've emit and jump instructions. */
20588 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
20589 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
20590 return 0;
20591 if (JUMP_P (insn)
20592 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
20593 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
20594 return 0;
20596 /* Important case - calls are always 5 bytes.
20597 It is common to have many calls in the row. */
20598 if (CALL_P (insn)
20599 && symbolic_reference_mentioned_p (PATTERN (insn))
20600 && !SIBLING_CALL_P (insn))
20601 return 5;
20602 if (get_attr_length (insn) <= 1)
20603 return 1;
20605 /* For normal instructions we may rely on the sizes of addresses
20606 and the presence of symbol to require 4 bytes of encoding.
20607 This is not the case for jumps where references are PC relative. */
20608 if (!JUMP_P (insn))
20610 l = get_attr_length_address (insn);
20611 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
20612 l = 4;
20614 if (l)
20615 return 1+l;
20616 else
20617 return 2;
20620 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
20621 window. */
20623 static void
20624 ix86_avoid_jump_misspredicts (void)
20626 rtx insn, start = get_insns ();
20627 int nbytes = 0, njumps = 0;
20628 int isjump = 0;
20630 /* Look for all minimal intervals of instructions containing 4 jumps.
20631 The intervals are bounded by START and INSN. NBYTES is the total
20632 size of instructions in the interval including INSN and not including
20633 START. When the NBYTES is smaller than 16 bytes, it is possible
20634 that the end of START and INSN ends up in the same 16byte page.
20636 The smallest offset in the page INSN can start is the case where START
20637 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
20638 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
20640 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
20643 nbytes += min_insn_size (insn);
20644 if (dump_file)
20645 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
20646 INSN_UID (insn), min_insn_size (insn));
20647 if ((JUMP_P (insn)
20648 && GET_CODE (PATTERN (insn)) != ADDR_VEC
20649 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
20650 || CALL_P (insn))
20651 njumps++;
20652 else
20653 continue;
20655 while (njumps > 3)
20657 start = NEXT_INSN (start);
20658 if ((JUMP_P (start)
20659 && GET_CODE (PATTERN (start)) != ADDR_VEC
20660 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
20661 || CALL_P (start))
20662 njumps--, isjump = 1;
20663 else
20664 isjump = 0;
20665 nbytes -= min_insn_size (start);
20667 gcc_assert (njumps >= 0);
20668 if (dump_file)
20669 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
20670 INSN_UID (start), INSN_UID (insn), nbytes);
20672 if (njumps == 3 && isjump && nbytes < 16)
20674 int padsize = 15 - nbytes + min_insn_size (insn);
20676 if (dump_file)
20677 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
20678 INSN_UID (insn), padsize);
20679 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
20684 /* AMD Athlon works faster
20685 when RET is not destination of conditional jump or directly preceded
20686 by other jump instruction. We avoid the penalty by inserting NOP just
20687 before the RET instructions in such cases. */
20688 static void
20689 ix86_pad_returns (void)
20691 edge e;
20692 edge_iterator ei;
20694 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
20696 basic_block bb = e->src;
20697 rtx ret = BB_END (bb);
20698 rtx prev;
20699 bool replace = false;
20701 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
20702 || !maybe_hot_bb_p (bb))
20703 continue;
20704 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
20705 if (active_insn_p (prev) || LABEL_P (prev))
20706 break;
20707 if (prev && LABEL_P (prev))
20709 edge e;
20710 edge_iterator ei;
20712 FOR_EACH_EDGE (e, ei, bb->preds)
20713 if (EDGE_FREQUENCY (e) && e->src->index >= 0
20714 && !(e->flags & EDGE_FALLTHRU))
20715 replace = true;
20717 if (!replace)
20719 prev = prev_active_insn (ret);
20720 if (prev
20721 && ((JUMP_P (prev) && any_condjump_p (prev))
20722 || CALL_P (prev)))
20723 replace = true;
20724 /* Empty functions get branch mispredict even when the jump destination
20725 is not visible to us. */
20726 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
20727 replace = true;
20729 if (replace)
20731 emit_insn_before (gen_return_internal_long (), ret);
20732 delete_insn (ret);
20737 /* Implement machine specific optimizations. We implement padding of returns
20738 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
20739 static void
20740 ix86_reorg (void)
20742 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
20743 ix86_pad_returns ();
20744 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
20745 ix86_avoid_jump_misspredicts ();
20748 /* Return nonzero when QImode register that must be represented via REX prefix
20749 is used. */
20750 bool
20751 x86_extended_QIreg_mentioned_p (rtx insn)
20753 int i;
20754 extract_insn_cached (insn);
20755 for (i = 0; i < recog_data.n_operands; i++)
20756 if (REG_P (recog_data.operand[i])
20757 && REGNO (recog_data.operand[i]) >= 4)
20758 return true;
20759 return false;
20762 /* Return nonzero when P points to register encoded via REX prefix.
20763 Called via for_each_rtx. */
20764 static int
20765 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
20767 unsigned int regno;
20768 if (!REG_P (*p))
20769 return 0;
20770 regno = REGNO (*p);
20771 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
20774 /* Return true when INSN mentions register that must be encoded using REX
20775 prefix. */
20776 bool
20777 x86_extended_reg_mentioned_p (rtx insn)
20779 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
20782 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
20783 optabs would emit if we didn't have TFmode patterns. */
20785 void
20786 x86_emit_floatuns (rtx operands[2])
20788 rtx neglab, donelab, i0, i1, f0, in, out;
20789 enum machine_mode mode, inmode;
20791 inmode = GET_MODE (operands[1]);
20792 gcc_assert (inmode == SImode || inmode == DImode);
20794 out = operands[0];
20795 in = force_reg (inmode, operands[1]);
20796 mode = GET_MODE (out);
20797 neglab = gen_label_rtx ();
20798 donelab = gen_label_rtx ();
20799 f0 = gen_reg_rtx (mode);
20801 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
20803 expand_float (out, in, 0);
20805 emit_jump_insn (gen_jump (donelab));
20806 emit_barrier ();
20808 emit_label (neglab);
20810 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
20811 1, OPTAB_DIRECT);
20812 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
20813 1, OPTAB_DIRECT);
20814 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
20816 expand_float (f0, i0, 0);
20818 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
20820 emit_label (donelab);
20823 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20824 with all elements equal to VAR. Return true if successful. */
20826 static bool
20827 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
20828 rtx target, rtx val)
20830 enum machine_mode smode, wsmode, wvmode;
20831 rtx x;
20833 switch (mode)
20835 case V2SImode:
20836 case V2SFmode:
20837 if (!mmx_ok)
20838 return false;
20839 /* FALLTHRU */
20841 case V2DFmode:
20842 case V2DImode:
20843 case V4SFmode:
20844 case V4SImode:
20845 val = force_reg (GET_MODE_INNER (mode), val);
20846 x = gen_rtx_VEC_DUPLICATE (mode, val);
20847 emit_insn (gen_rtx_SET (VOIDmode, target, x));
20848 return true;
20850 case V4HImode:
20851 if (!mmx_ok)
20852 return false;
20853 if (TARGET_SSE || TARGET_3DNOW_A)
20855 val = gen_lowpart (SImode, val);
20856 x = gen_rtx_TRUNCATE (HImode, val);
20857 x = gen_rtx_VEC_DUPLICATE (mode, x);
20858 emit_insn (gen_rtx_SET (VOIDmode, target, x));
20859 return true;
20861 else
20863 smode = HImode;
20864 wsmode = SImode;
20865 wvmode = V2SImode;
20866 goto widen;
20869 case V8QImode:
20870 if (!mmx_ok)
20871 return false;
20872 smode = QImode;
20873 wsmode = HImode;
20874 wvmode = V4HImode;
20875 goto widen;
20876 case V8HImode:
20877 if (TARGET_SSE2)
20879 rtx tmp1, tmp2;
20880 /* Extend HImode to SImode using a paradoxical SUBREG. */
20881 tmp1 = gen_reg_rtx (SImode);
20882 emit_move_insn (tmp1, gen_lowpart (SImode, val));
20883 /* Insert the SImode value as low element of V4SImode vector. */
20884 tmp2 = gen_reg_rtx (V4SImode);
20885 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
20886 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
20887 CONST0_RTX (V4SImode),
20888 const1_rtx);
20889 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
20890 /* Cast the V4SImode vector back to a V8HImode vector. */
20891 tmp1 = gen_reg_rtx (V8HImode);
20892 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
20893 /* Duplicate the low short through the whole low SImode word. */
20894 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
20895 /* Cast the V8HImode vector back to a V4SImode vector. */
20896 tmp2 = gen_reg_rtx (V4SImode);
20897 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
20898 /* Replicate the low element of the V4SImode vector. */
20899 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
20900 /* Cast the V2SImode back to V8HImode, and store in target. */
20901 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
20902 return true;
20904 smode = HImode;
20905 wsmode = SImode;
20906 wvmode = V4SImode;
20907 goto widen;
20908 case V16QImode:
20909 if (TARGET_SSE2)
20911 rtx tmp1, tmp2;
20912 /* Extend QImode to SImode using a paradoxical SUBREG. */
20913 tmp1 = gen_reg_rtx (SImode);
20914 emit_move_insn (tmp1, gen_lowpart (SImode, val));
20915 /* Insert the SImode value as low element of V4SImode vector. */
20916 tmp2 = gen_reg_rtx (V4SImode);
20917 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
20918 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
20919 CONST0_RTX (V4SImode),
20920 const1_rtx);
20921 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
20922 /* Cast the V4SImode vector back to a V16QImode vector. */
20923 tmp1 = gen_reg_rtx (V16QImode);
20924 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
20925 /* Duplicate the low byte through the whole low SImode word. */
20926 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
20927 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
20928 /* Cast the V16QImode vector back to a V4SImode vector. */
20929 tmp2 = gen_reg_rtx (V4SImode);
20930 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
20931 /* Replicate the low element of the V4SImode vector. */
20932 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
20933 /* Cast the V2SImode back to V16QImode, and store in target. */
20934 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
20935 return true;
20937 smode = QImode;
20938 wsmode = HImode;
20939 wvmode = V8HImode;
20940 goto widen;
20941 widen:
20942 /* Replicate the value once into the next wider mode and recurse. */
20943 val = convert_modes (wsmode, smode, val, true);
20944 x = expand_simple_binop (wsmode, ASHIFT, val,
20945 GEN_INT (GET_MODE_BITSIZE (smode)),
20946 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20947 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
20949 x = gen_reg_rtx (wvmode);
20950 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
20951 gcc_unreachable ();
20952 emit_move_insn (target, gen_lowpart (mode, x));
20953 return true;
20955 default:
20956 return false;
20960 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20961 whose ONE_VAR element is VAR, and other elements are zero. Return true
20962 if successful. */
20964 static bool
20965 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
20966 rtx target, rtx var, int one_var)
20968 enum machine_mode vsimode;
20969 rtx new_target;
20970 rtx x, tmp;
20972 switch (mode)
20974 case V2SFmode:
20975 case V2SImode:
20976 if (!mmx_ok)
20977 return false;
20978 /* FALLTHRU */
20980 case V2DFmode:
20981 case V2DImode:
20982 if (one_var != 0)
20983 return false;
20984 var = force_reg (GET_MODE_INNER (mode), var);
20985 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
20986 emit_insn (gen_rtx_SET (VOIDmode, target, x));
20987 return true;
20989 case V4SFmode:
20990 case V4SImode:
20991 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
20992 new_target = gen_reg_rtx (mode);
20993 else
20994 new_target = target;
20995 var = force_reg (GET_MODE_INNER (mode), var);
20996 x = gen_rtx_VEC_DUPLICATE (mode, var);
20997 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
20998 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
20999 if (one_var != 0)
21001 /* We need to shuffle the value to the correct position, so
21002 create a new pseudo to store the intermediate result. */
21004 /* With SSE2, we can use the integer shuffle insns. */
21005 if (mode != V4SFmode && TARGET_SSE2)
21007 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
21008 GEN_INT (1),
21009 GEN_INT (one_var == 1 ? 0 : 1),
21010 GEN_INT (one_var == 2 ? 0 : 1),
21011 GEN_INT (one_var == 3 ? 0 : 1)));
21012 if (target != new_target)
21013 emit_move_insn (target, new_target);
21014 return true;
21017 /* Otherwise convert the intermediate result to V4SFmode and
21018 use the SSE1 shuffle instructions. */
21019 if (mode != V4SFmode)
21021 tmp = gen_reg_rtx (V4SFmode);
21022 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
21024 else
21025 tmp = new_target;
21027 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
21028 GEN_INT (1),
21029 GEN_INT (one_var == 1 ? 0 : 1),
21030 GEN_INT (one_var == 2 ? 0+4 : 1+4),
21031 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
21033 if (mode != V4SFmode)
21034 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
21035 else if (tmp != target)
21036 emit_move_insn (target, tmp);
21038 else if (target != new_target)
21039 emit_move_insn (target, new_target);
21040 return true;
21042 case V8HImode:
21043 case V16QImode:
21044 vsimode = V4SImode;
21045 goto widen;
21046 case V4HImode:
21047 case V8QImode:
21048 if (!mmx_ok)
21049 return false;
21050 vsimode = V2SImode;
21051 goto widen;
21052 widen:
21053 if (one_var != 0)
21054 return false;
21056 /* Zero extend the variable element to SImode and recurse. */
21057 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
21059 x = gen_reg_rtx (vsimode);
21060 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
21061 var, one_var))
21062 gcc_unreachable ();
21064 emit_move_insn (target, gen_lowpart (mode, x));
21065 return true;
21067 default:
21068 return false;
21072 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
21073 consisting of the values in VALS. It is known that all elements
21074 except ONE_VAR are constants. Return true if successful. */
21076 static bool
21077 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
21078 rtx target, rtx vals, int one_var)
21080 rtx var = XVECEXP (vals, 0, one_var);
21081 enum machine_mode wmode;
21082 rtx const_vec, x;
21084 const_vec = copy_rtx (vals);
21085 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
21086 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
21088 switch (mode)
21090 case V2DFmode:
21091 case V2DImode:
21092 case V2SFmode:
21093 case V2SImode:
21094 /* For the two element vectors, it's just as easy to use
21095 the general case. */
21096 return false;
21098 case V4SFmode:
21099 case V4SImode:
21100 case V8HImode:
21101 case V4HImode:
21102 break;
21104 case V16QImode:
21105 wmode = V8HImode;
21106 goto widen;
21107 case V8QImode:
21108 wmode = V4HImode;
21109 goto widen;
21110 widen:
21111 /* There's no way to set one QImode entry easily. Combine
21112 the variable value with its adjacent constant value, and
21113 promote to an HImode set. */
21114 x = XVECEXP (vals, 0, one_var ^ 1);
21115 if (one_var & 1)
21117 var = convert_modes (HImode, QImode, var, true);
21118 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
21119 NULL_RTX, 1, OPTAB_LIB_WIDEN);
21120 x = GEN_INT (INTVAL (x) & 0xff);
21122 else
21124 var = convert_modes (HImode, QImode, var, true);
21125 x = gen_int_mode (INTVAL (x) << 8, HImode);
21127 if (x != const0_rtx)
21128 var = expand_simple_binop (HImode, IOR, var, x, var,
21129 1, OPTAB_LIB_WIDEN);
21131 x = gen_reg_rtx (wmode);
21132 emit_move_insn (x, gen_lowpart (wmode, const_vec));
21133 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
21135 emit_move_insn (target, gen_lowpart (mode, x));
21136 return true;
21138 default:
21139 return false;
21142 emit_move_insn (target, const_vec);
21143 ix86_expand_vector_set (mmx_ok, target, var, one_var);
21144 return true;
21147 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
21148 all values variable, and none identical. */
21150 static void
21151 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
21152 rtx target, rtx vals)
21154 enum machine_mode half_mode = GET_MODE_INNER (mode);
21155 rtx op0 = NULL, op1 = NULL;
21156 bool use_vec_concat = false;
21158 switch (mode)
21160 case V2SFmode:
21161 case V2SImode:
21162 if (!mmx_ok && !TARGET_SSE)
21163 break;
21164 /* FALLTHRU */
21166 case V2DFmode:
21167 case V2DImode:
21168 /* For the two element vectors, we always implement VEC_CONCAT. */
21169 op0 = XVECEXP (vals, 0, 0);
21170 op1 = XVECEXP (vals, 0, 1);
21171 use_vec_concat = true;
21172 break;
21174 case V4SFmode:
21175 half_mode = V2SFmode;
21176 goto half;
21177 case V4SImode:
21178 half_mode = V2SImode;
21179 goto half;
21180 half:
21182 rtvec v;
21184 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
21185 Recurse to load the two halves. */
21187 op0 = gen_reg_rtx (half_mode);
21188 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
21189 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
21191 op1 = gen_reg_rtx (half_mode);
21192 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
21193 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
21195 use_vec_concat = true;
21197 break;
21199 case V8HImode:
21200 case V16QImode:
21201 case V4HImode:
21202 case V8QImode:
21203 break;
21205 default:
21206 gcc_unreachable ();
21209 if (use_vec_concat)
21211 if (!register_operand (op0, half_mode))
21212 op0 = force_reg (half_mode, op0);
21213 if (!register_operand (op1, half_mode))
21214 op1 = force_reg (half_mode, op1);
21216 emit_insn (gen_rtx_SET (VOIDmode, target,
21217 gen_rtx_VEC_CONCAT (mode, op0, op1)));
21219 else
21221 int i, j, n_elts, n_words, n_elt_per_word;
21222 enum machine_mode inner_mode;
21223 rtx words[4], shift;
21225 inner_mode = GET_MODE_INNER (mode);
21226 n_elts = GET_MODE_NUNITS (mode);
21227 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
21228 n_elt_per_word = n_elts / n_words;
21229 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
21231 for (i = 0; i < n_words; ++i)
21233 rtx word = NULL_RTX;
21235 for (j = 0; j < n_elt_per_word; ++j)
21237 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
21238 elt = convert_modes (word_mode, inner_mode, elt, true);
21240 if (j == 0)
21241 word = elt;
21242 else
21244 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
21245 word, 1, OPTAB_LIB_WIDEN);
21246 word = expand_simple_binop (word_mode, IOR, word, elt,
21247 word, 1, OPTAB_LIB_WIDEN);
21251 words[i] = word;
21254 if (n_words == 1)
21255 emit_move_insn (target, gen_lowpart (mode, words[0]));
21256 else if (n_words == 2)
21258 rtx tmp = gen_reg_rtx (mode);
21259 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
21260 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
21261 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
21262 emit_move_insn (target, tmp);
21264 else if (n_words == 4)
21266 rtx tmp = gen_reg_rtx (V4SImode);
21267 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
21268 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
21269 emit_move_insn (target, gen_lowpart (mode, tmp));
21271 else
21272 gcc_unreachable ();
21276 /* Initialize vector TARGET via VALS. Suppress the use of MMX
21277 instructions unless MMX_OK is true. */
21279 void
21280 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
21282 enum machine_mode mode = GET_MODE (target);
21283 enum machine_mode inner_mode = GET_MODE_INNER (mode);
21284 int n_elts = GET_MODE_NUNITS (mode);
21285 int n_var = 0, one_var = -1;
21286 bool all_same = true, all_const_zero = true;
21287 int i;
21288 rtx x;
21290 for (i = 0; i < n_elts; ++i)
21292 x = XVECEXP (vals, 0, i);
21293 if (!CONSTANT_P (x))
21294 n_var++, one_var = i;
21295 else if (x != CONST0_RTX (inner_mode))
21296 all_const_zero = false;
21297 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
21298 all_same = false;
21301 /* Constants are best loaded from the constant pool. */
21302 if (n_var == 0)
21304 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
21305 return;
21308 /* If all values are identical, broadcast the value. */
21309 if (all_same
21310 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
21311 XVECEXP (vals, 0, 0)))
21312 return;
21314 /* Values where only one field is non-constant are best loaded from
21315 the pool and overwritten via move later. */
21316 if (n_var == 1)
21318 if (all_const_zero
21319 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
21320 XVECEXP (vals, 0, one_var),
21321 one_var))
21322 return;
21324 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
21325 return;
21328 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
21331 void
21332 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
21334 enum machine_mode mode = GET_MODE (target);
21335 enum machine_mode inner_mode = GET_MODE_INNER (mode);
21336 bool use_vec_merge = false;
21337 rtx tmp;
21339 switch (mode)
21341 case V2SFmode:
21342 case V2SImode:
21343 if (mmx_ok)
21345 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
21346 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
21347 if (elt == 0)
21348 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
21349 else
21350 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
21351 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
21352 return;
21354 break;
21356 case V2DImode:
21357 use_vec_merge = TARGET_SSE4_1;
21358 if (use_vec_merge)
21359 break;
21361 case V2DFmode:
21363 rtx op0, op1;
21365 /* For the two element vectors, we implement a VEC_CONCAT with
21366 the extraction of the other element. */
21368 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
21369 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
21371 if (elt == 0)
21372 op0 = val, op1 = tmp;
21373 else
21374 op0 = tmp, op1 = val;
21376 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
21377 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
21379 return;
21381 case V4SFmode:
21382 use_vec_merge = TARGET_SSE4_1;
21383 if (use_vec_merge)
21384 break;
21386 switch (elt)
21388 case 0:
21389 use_vec_merge = true;
21390 break;
21392 case 1:
21393 /* tmp = target = A B C D */
21394 tmp = copy_to_reg (target);
21395 /* target = A A B B */
21396 emit_insn (gen_sse_unpcklps (target, target, target));
21397 /* target = X A B B */
21398 ix86_expand_vector_set (false, target, val, 0);
21399 /* target = A X C D */
21400 emit_insn (gen_sse_shufps_1 (target, target, tmp,
21401 GEN_INT (1), GEN_INT (0),
21402 GEN_INT (2+4), GEN_INT (3+4)));
21403 return;
21405 case 2:
21406 /* tmp = target = A B C D */
21407 tmp = copy_to_reg (target);
21408 /* tmp = X B C D */
21409 ix86_expand_vector_set (false, tmp, val, 0);
21410 /* target = A B X D */
21411 emit_insn (gen_sse_shufps_1 (target, target, tmp,
21412 GEN_INT (0), GEN_INT (1),
21413 GEN_INT (0+4), GEN_INT (3+4)));
21414 return;
21416 case 3:
21417 /* tmp = target = A B C D */
21418 tmp = copy_to_reg (target);
21419 /* tmp = X B C D */
21420 ix86_expand_vector_set (false, tmp, val, 0);
21421 /* target = A B X D */
21422 emit_insn (gen_sse_shufps_1 (target, target, tmp,
21423 GEN_INT (0), GEN_INT (1),
21424 GEN_INT (2+4), GEN_INT (0+4)));
21425 return;
21427 default:
21428 gcc_unreachable ();
21430 break;
21432 case V4SImode:
21433 use_vec_merge = TARGET_SSE4_1;
21434 if (use_vec_merge)
21435 break;
21437 /* Element 0 handled by vec_merge below. */
21438 if (elt == 0)
21440 use_vec_merge = true;
21441 break;
21444 if (TARGET_SSE2)
21446 /* With SSE2, use integer shuffles to swap element 0 and ELT,
21447 store into element 0, then shuffle them back. */
21449 rtx order[4];
21451 order[0] = GEN_INT (elt);
21452 order[1] = const1_rtx;
21453 order[2] = const2_rtx;
21454 order[3] = GEN_INT (3);
21455 order[elt] = const0_rtx;
21457 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
21458 order[1], order[2], order[3]));
21460 ix86_expand_vector_set (false, target, val, 0);
21462 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
21463 order[1], order[2], order[3]));
21465 else
21467 /* For SSE1, we have to reuse the V4SF code. */
21468 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
21469 gen_lowpart (SFmode, val), elt);
21471 return;
21473 case V8HImode:
21474 use_vec_merge = TARGET_SSE2;
21475 break;
21476 case V4HImode:
21477 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
21478 break;
21480 case V16QImode:
21481 use_vec_merge = TARGET_SSE4_1;
21482 break;
21484 case V8QImode:
21485 default:
21486 break;
21489 if (use_vec_merge)
21491 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
21492 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
21493 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
21495 else
21497 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
21499 emit_move_insn (mem, target);
21501 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
21502 emit_move_insn (tmp, val);
21504 emit_move_insn (target, mem);
21508 void
21509 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
21511 enum machine_mode mode = GET_MODE (vec);
21512 enum machine_mode inner_mode = GET_MODE_INNER (mode);
21513 bool use_vec_extr = false;
21514 rtx tmp;
21516 switch (mode)
21518 case V2SImode:
21519 case V2SFmode:
21520 if (!mmx_ok)
21521 break;
21522 /* FALLTHRU */
21524 case V2DFmode:
21525 case V2DImode:
21526 use_vec_extr = true;
21527 break;
21529 case V4SFmode:
21530 use_vec_extr = TARGET_SSE4_1;
21531 if (use_vec_extr)
21532 break;
21534 switch (elt)
21536 case 0:
21537 tmp = vec;
21538 break;
21540 case 1:
21541 case 3:
21542 tmp = gen_reg_rtx (mode);
21543 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
21544 GEN_INT (elt), GEN_INT (elt),
21545 GEN_INT (elt+4), GEN_INT (elt+4)));
21546 break;
21548 case 2:
21549 tmp = gen_reg_rtx (mode);
21550 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
21551 break;
21553 default:
21554 gcc_unreachable ();
21556 vec = tmp;
21557 use_vec_extr = true;
21558 elt = 0;
21559 break;
21561 case V4SImode:
21562 use_vec_extr = TARGET_SSE4_1;
21563 if (use_vec_extr)
21564 break;
21566 if (TARGET_SSE2)
21568 switch (elt)
21570 case 0:
21571 tmp = vec;
21572 break;
21574 case 1:
21575 case 3:
21576 tmp = gen_reg_rtx (mode);
21577 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
21578 GEN_INT (elt), GEN_INT (elt),
21579 GEN_INT (elt), GEN_INT (elt)));
21580 break;
21582 case 2:
21583 tmp = gen_reg_rtx (mode);
21584 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
21585 break;
21587 default:
21588 gcc_unreachable ();
21590 vec = tmp;
21591 use_vec_extr = true;
21592 elt = 0;
21594 else
21596 /* For SSE1, we have to reuse the V4SF code. */
21597 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
21598 gen_lowpart (V4SFmode, vec), elt);
21599 return;
21601 break;
21603 case V8HImode:
21604 use_vec_extr = TARGET_SSE2;
21605 break;
21606 case V4HImode:
21607 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
21608 break;
21610 case V16QImode:
21611 use_vec_extr = TARGET_SSE4_1;
21612 break;
21614 case V8QImode:
21615 /* ??? Could extract the appropriate HImode element and shift. */
21616 default:
21617 break;
21620 if (use_vec_extr)
21622 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
21623 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
21625 /* Let the rtl optimizers know about the zero extension performed. */
21626 if (inner_mode == QImode || inner_mode == HImode)
21628 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
21629 target = gen_lowpart (SImode, target);
21632 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
21634 else
21636 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
21638 emit_move_insn (mem, vec);
21640 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
21641 emit_move_insn (target, tmp);
21645 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
21646 pattern to reduce; DEST is the destination; IN is the input vector. */
21648 void
21649 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
21651 rtx tmp1, tmp2, tmp3;
21653 tmp1 = gen_reg_rtx (V4SFmode);
21654 tmp2 = gen_reg_rtx (V4SFmode);
21655 tmp3 = gen_reg_rtx (V4SFmode);
21657 emit_insn (gen_sse_movhlps (tmp1, in, in));
21658 emit_insn (fn (tmp2, tmp1, in));
21660 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
21661 GEN_INT (1), GEN_INT (1),
21662 GEN_INT (1+4), GEN_INT (1+4)));
21663 emit_insn (fn (dest, tmp2, tmp3));
21666 /* Target hook for scalar_mode_supported_p. */
21667 static bool
21668 ix86_scalar_mode_supported_p (enum machine_mode mode)
21670 if (DECIMAL_FLOAT_MODE_P (mode))
21671 return true;
21672 else if (mode == TFmode)
21673 return TARGET_64BIT;
21674 else
21675 return default_scalar_mode_supported_p (mode);
21678 /* Implements target hook vector_mode_supported_p. */
21679 static bool
21680 ix86_vector_mode_supported_p (enum machine_mode mode)
21682 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
21683 return true;
21684 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
21685 return true;
21686 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
21687 return true;
21688 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
21689 return true;
21690 return false;
21693 /* Worker function for TARGET_MD_ASM_CLOBBERS.
21695 We do this in the new i386 backend to maintain source compatibility
21696 with the old cc0-based compiler. */
21698 static tree
21699 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
21700 tree inputs ATTRIBUTE_UNUSED,
21701 tree clobbers)
21703 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
21704 clobbers);
21705 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
21706 clobbers);
21707 return clobbers;
21710 /* Implements target vector targetm.asm.encode_section_info. This
21711 is not used by netware. */
21713 static void ATTRIBUTE_UNUSED
21714 ix86_encode_section_info (tree decl, rtx rtl, int first)
21716 default_encode_section_info (decl, rtl, first);
21718 if (TREE_CODE (decl) == VAR_DECL
21719 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
21720 && ix86_in_large_data_p (decl))
21721 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
21724 /* Worker function for REVERSE_CONDITION. */
21726 enum rtx_code
21727 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
21729 return (mode != CCFPmode && mode != CCFPUmode
21730 ? reverse_condition (code)
21731 : reverse_condition_maybe_unordered (code));
21734 /* Output code to perform an x87 FP register move, from OPERANDS[1]
21735 to OPERANDS[0]. */
21737 const char *
21738 output_387_reg_move (rtx insn, rtx *operands)
21740 if (REG_P (operands[0]))
21742 if (REG_P (operands[1])
21743 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
21745 if (REGNO (operands[0]) == FIRST_STACK_REG)
21746 return output_387_ffreep (operands, 0);
21747 return "fstp\t%y0";
21749 if (STACK_TOP_P (operands[0]))
21750 return "fld%z1\t%y1";
21751 return "fst\t%y0";
21753 else if (MEM_P (operands[0]))
21755 gcc_assert (REG_P (operands[1]));
21756 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
21757 return "fstp%z0\t%y0";
21758 else
21760 /* There is no non-popping store to memory for XFmode.
21761 So if we need one, follow the store with a load. */
21762 if (GET_MODE (operands[0]) == XFmode)
21763 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
21764 else
21765 return "fst%z0\t%y0";
21768 else
21769 gcc_unreachable();
21772 /* Output code to perform a conditional jump to LABEL, if C2 flag in
21773 FP status register is set. */
21775 void
21776 ix86_emit_fp_unordered_jump (rtx label)
21778 rtx reg = gen_reg_rtx (HImode);
21779 rtx temp;
21781 emit_insn (gen_x86_fnstsw_1 (reg));
21783 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_size))
21785 emit_insn (gen_x86_sahf_1 (reg));
21787 temp = gen_rtx_REG (CCmode, FLAGS_REG);
21788 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
21790 else
21792 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
21794 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21795 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
21798 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
21799 gen_rtx_LABEL_REF (VOIDmode, label),
21800 pc_rtx);
21801 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
21803 emit_jump_insn (temp);
21804 predict_jump (REG_BR_PROB_BASE * 10 / 100);
21807 /* Output code to perform a log1p XFmode calculation. */
21809 void ix86_emit_i387_log1p (rtx op0, rtx op1)
21811 rtx label1 = gen_label_rtx ();
21812 rtx label2 = gen_label_rtx ();
21814 rtx tmp = gen_reg_rtx (XFmode);
21815 rtx tmp2 = gen_reg_rtx (XFmode);
21817 emit_insn (gen_absxf2 (tmp, op1));
21818 emit_insn (gen_cmpxf (tmp,
21819 CONST_DOUBLE_FROM_REAL_VALUE (
21820 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
21821 XFmode)));
21822 emit_jump_insn (gen_bge (label1));
21824 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
21825 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
21826 emit_jump (label2);
21828 emit_label (label1);
21829 emit_move_insn (tmp, CONST1_RTX (XFmode));
21830 emit_insn (gen_addxf3 (tmp, op1, tmp));
21831 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
21832 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
21834 emit_label (label2);
21837 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
21839 static void ATTRIBUTE_UNUSED
21840 i386_solaris_elf_named_section (const char *name, unsigned int flags,
21841 tree decl)
21843 /* With Binutils 2.15, the "@unwind" marker must be specified on
21844 every occurrence of the ".eh_frame" section, not just the first
21845 one. */
21846 if (TARGET_64BIT
21847 && strcmp (name, ".eh_frame") == 0)
21849 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
21850 flags & SECTION_WRITE ? "aw" : "a");
21851 return;
21853 default_elf_asm_named_section (name, flags, decl);
21856 /* Return the mangling of TYPE if it is an extended fundamental type. */
21858 static const char *
21859 ix86_mangle_fundamental_type (tree type)
21861 switch (TYPE_MODE (type))
21863 case TFmode:
21864 /* __float128 is "g". */
21865 return "g";
21866 case XFmode:
21867 /* "long double" or __float80 is "e". */
21868 return "e";
21869 default:
21870 return NULL;
21874 /* For 32-bit code we can save PIC register setup by using
21875 __stack_chk_fail_local hidden function instead of calling
21876 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
21877 register, so it is better to call __stack_chk_fail directly. */
21879 static tree
21880 ix86_stack_protect_fail (void)
21882 return TARGET_64BIT
21883 ? default_external_stack_protect_fail ()
21884 : default_hidden_stack_protect_fail ();
21887 /* Select a format to encode pointers in exception handling data. CODE
21888 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
21889 true if the symbol may be affected by dynamic relocations.
21891 ??? All x86 object file formats are capable of representing this.
21892 After all, the relocation needed is the same as for the call insn.
21893 Whether or not a particular assembler allows us to enter such, I
21894 guess we'll have to see. */
21896 asm_preferred_eh_data_format (int code, int global)
21898 if (flag_pic)
21900 int type = DW_EH_PE_sdata8;
21901 if (!TARGET_64BIT
21902 || ix86_cmodel == CM_SMALL_PIC
21903 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
21904 type = DW_EH_PE_sdata4;
21905 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
21907 if (ix86_cmodel == CM_SMALL
21908 || (ix86_cmodel == CM_MEDIUM && code))
21909 return DW_EH_PE_udata4;
21910 return DW_EH_PE_absptr;
21913 /* Expand copysign from SIGN to the positive value ABS_VALUE
21914 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
21915 the sign-bit. */
21916 static void
21917 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
21919 enum machine_mode mode = GET_MODE (sign);
21920 rtx sgn = gen_reg_rtx (mode);
21921 if (mask == NULL_RTX)
21923 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
21924 if (!VECTOR_MODE_P (mode))
21926 /* We need to generate a scalar mode mask in this case. */
21927 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
21928 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
21929 mask = gen_reg_rtx (mode);
21930 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
21933 else
21934 mask = gen_rtx_NOT (mode, mask);
21935 emit_insn (gen_rtx_SET (VOIDmode, sgn,
21936 gen_rtx_AND (mode, mask, sign)));
21937 emit_insn (gen_rtx_SET (VOIDmode, result,
21938 gen_rtx_IOR (mode, abs_value, sgn)));
21941 /* Expand fabs (OP0) and return a new rtx that holds the result. The
21942 mask for masking out the sign-bit is stored in *SMASK, if that is
21943 non-null. */
21944 static rtx
21945 ix86_expand_sse_fabs (rtx op0, rtx *smask)
21947 enum machine_mode mode = GET_MODE (op0);
21948 rtx xa, mask;
21950 xa = gen_reg_rtx (mode);
21951 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
21952 if (!VECTOR_MODE_P (mode))
21954 /* We need to generate a scalar mode mask in this case. */
21955 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
21956 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
21957 mask = gen_reg_rtx (mode);
21958 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
21960 emit_insn (gen_rtx_SET (VOIDmode, xa,
21961 gen_rtx_AND (mode, op0, mask)));
21963 if (smask)
21964 *smask = mask;
21966 return xa;
21969 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
21970 swapping the operands if SWAP_OPERANDS is true. The expanded
21971 code is a forward jump to a newly created label in case the
21972 comparison is true. The generated label rtx is returned. */
21973 static rtx
21974 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
21975 bool swap_operands)
21977 rtx label, tmp;
21979 if (swap_operands)
21981 tmp = op0;
21982 op0 = op1;
21983 op1 = tmp;
21986 label = gen_label_rtx ();
21987 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
21988 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21989 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
21990 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
21991 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
21992 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
21993 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
21994 JUMP_LABEL (tmp) = label;
21996 return label;
21999 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
22000 using comparison code CODE. Operands are swapped for the comparison if
22001 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
22002 static rtx
22003 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
22004 bool swap_operands)
22006 enum machine_mode mode = GET_MODE (op0);
22007 rtx mask = gen_reg_rtx (mode);
22009 if (swap_operands)
22011 rtx tmp = op0;
22012 op0 = op1;
22013 op1 = tmp;
22016 if (mode == DFmode)
22017 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
22018 gen_rtx_fmt_ee (code, mode, op0, op1)));
22019 else
22020 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
22021 gen_rtx_fmt_ee (code, mode, op0, op1)));
22023 return mask;
22026 /* Generate and return a rtx of mode MODE for 2**n where n is the number
22027 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
22028 static rtx
22029 ix86_gen_TWO52 (enum machine_mode mode)
22031 REAL_VALUE_TYPE TWO52r;
22032 rtx TWO52;
22034 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
22035 TWO52 = const_double_from_real_value (TWO52r, mode);
22036 TWO52 = force_reg (mode, TWO52);
22038 return TWO52;
22041 /* Expand SSE sequence for computing lround from OP1 storing
22042 into OP0. */
22043 void
22044 ix86_expand_lround (rtx op0, rtx op1)
22046 /* C code for the stuff we're doing below:
22047 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
22048 return (long)tmp;
22050 enum machine_mode mode = GET_MODE (op1);
22051 const struct real_format *fmt;
22052 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
22053 rtx adj;
22055 /* load nextafter (0.5, 0.0) */
22056 fmt = REAL_MODE_FORMAT (mode);
22057 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
22058 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
22060 /* adj = copysign (0.5, op1) */
22061 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
22062 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
22064 /* adj = op1 + adj */
22065 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
22067 /* op0 = (imode)adj */
22068 expand_fix (op0, adj, 0);
22071 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
22072 into OPERAND0. */
22073 void
22074 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
22076 /* C code for the stuff we're doing below (for do_floor):
22077 xi = (long)op1;
22078 xi -= (double)xi > op1 ? 1 : 0;
22079 return xi;
22081 enum machine_mode fmode = GET_MODE (op1);
22082 enum machine_mode imode = GET_MODE (op0);
22083 rtx ireg, freg, label, tmp;
22085 /* reg = (long)op1 */
22086 ireg = gen_reg_rtx (imode);
22087 expand_fix (ireg, op1, 0);
22089 /* freg = (double)reg */
22090 freg = gen_reg_rtx (fmode);
22091 expand_float (freg, ireg, 0);
22093 /* ireg = (freg > op1) ? ireg - 1 : ireg */
22094 label = ix86_expand_sse_compare_and_jump (UNLE,
22095 freg, op1, !do_floor);
22096 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
22097 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
22098 emit_move_insn (ireg, tmp);
22100 emit_label (label);
22101 LABEL_NUSES (label) = 1;
22103 emit_move_insn (op0, ireg);
22106 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
22107 result in OPERAND0. */
22108 void
22109 ix86_expand_rint (rtx operand0, rtx operand1)
22111 /* C code for the stuff we're doing below:
22112 xa = fabs (operand1);
22113 if (!isless (xa, 2**52))
22114 return operand1;
22115 xa = xa + 2**52 - 2**52;
22116 return copysign (xa, operand1);
22118 enum machine_mode mode = GET_MODE (operand0);
22119 rtx res, xa, label, TWO52, mask;
22121 res = gen_reg_rtx (mode);
22122 emit_move_insn (res, operand1);
22124 /* xa = abs (operand1) */
22125 xa = ix86_expand_sse_fabs (res, &mask);
22127 /* if (!isless (xa, TWO52)) goto label; */
22128 TWO52 = ix86_gen_TWO52 (mode);
22129 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
22131 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
22132 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
22134 ix86_sse_copysign_to_positive (res, xa, res, mask);
22136 emit_label (label);
22137 LABEL_NUSES (label) = 1;
22139 emit_move_insn (operand0, res);
22142 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
22143 into OPERAND0. */
22144 void
22145 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
22147 /* C code for the stuff we expand below.
22148 double xa = fabs (x), x2;
22149 if (!isless (xa, TWO52))
22150 return x;
22151 xa = xa + TWO52 - TWO52;
22152 x2 = copysign (xa, x);
22153 Compensate. Floor:
22154 if (x2 > x)
22155 x2 -= 1;
22156 Compensate. Ceil:
22157 if (x2 < x)
22158 x2 -= -1;
22159 return x2;
22161 enum machine_mode mode = GET_MODE (operand0);
22162 rtx xa, TWO52, tmp, label, one, res, mask;
22164 TWO52 = ix86_gen_TWO52 (mode);
22166 /* Temporary for holding the result, initialized to the input
22167 operand to ease control flow. */
22168 res = gen_reg_rtx (mode);
22169 emit_move_insn (res, operand1);
22171 /* xa = abs (operand1) */
22172 xa = ix86_expand_sse_fabs (res, &mask);
22174 /* if (!isless (xa, TWO52)) goto label; */
22175 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
22177 /* xa = xa + TWO52 - TWO52; */
22178 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
22179 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
22181 /* xa = copysign (xa, operand1) */
22182 ix86_sse_copysign_to_positive (xa, xa, res, mask);
22184 /* generate 1.0 or -1.0 */
22185 one = force_reg (mode,
22186 const_double_from_real_value (do_floor
22187 ? dconst1 : dconstm1, mode));
22189 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
22190 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
22191 emit_insn (gen_rtx_SET (VOIDmode, tmp,
22192 gen_rtx_AND (mode, one, tmp)));
22193 /* We always need to subtract here to preserve signed zero. */
22194 tmp = expand_simple_binop (mode, MINUS,
22195 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
22196 emit_move_insn (res, tmp);
22198 emit_label (label);
22199 LABEL_NUSES (label) = 1;
22201 emit_move_insn (operand0, res);
22204 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
22205 into OPERAND0. */
22206 void
22207 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
22209 /* C code for the stuff we expand below.
22210 double xa = fabs (x), x2;
22211 if (!isless (xa, TWO52))
22212 return x;
22213 x2 = (double)(long)x;
22214 Compensate. Floor:
22215 if (x2 > x)
22216 x2 -= 1;
22217 Compensate. Ceil:
22218 if (x2 < x)
22219 x2 += 1;
22220 if (HONOR_SIGNED_ZEROS (mode))
22221 return copysign (x2, x);
22222 return x2;
22224 enum machine_mode mode = GET_MODE (operand0);
22225 rtx xa, xi, TWO52, tmp, label, one, res, mask;
22227 TWO52 = ix86_gen_TWO52 (mode);
22229 /* Temporary for holding the result, initialized to the input
22230 operand to ease control flow. */
22231 res = gen_reg_rtx (mode);
22232 emit_move_insn (res, operand1);
22234 /* xa = abs (operand1) */
22235 xa = ix86_expand_sse_fabs (res, &mask);
22237 /* if (!isless (xa, TWO52)) goto label; */
22238 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
22240 /* xa = (double)(long)x */
22241 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
22242 expand_fix (xi, res, 0);
22243 expand_float (xa, xi, 0);
22245 /* generate 1.0 */
22246 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
22248 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
22249 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
22250 emit_insn (gen_rtx_SET (VOIDmode, tmp,
22251 gen_rtx_AND (mode, one, tmp)));
22252 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
22253 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
22254 emit_move_insn (res, tmp);
22256 if (HONOR_SIGNED_ZEROS (mode))
22257 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
22259 emit_label (label);
22260 LABEL_NUSES (label) = 1;
22262 emit_move_insn (operand0, res);
22265 /* Expand SSE sequence for computing round from OPERAND1 storing
22266 into OPERAND0. Sequence that works without relying on DImode truncation
22267 via cvttsd2siq that is only available on 64bit targets. */
22268 void
22269 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
22271 /* C code for the stuff we expand below.
22272 double xa = fabs (x), xa2, x2;
22273 if (!isless (xa, TWO52))
22274 return x;
22275 Using the absolute value and copying back sign makes
22276 -0.0 -> -0.0 correct.
22277 xa2 = xa + TWO52 - TWO52;
22278 Compensate.
22279 dxa = xa2 - xa;
22280 if (dxa <= -0.5)
22281 xa2 += 1;
22282 else if (dxa > 0.5)
22283 xa2 -= 1;
22284 x2 = copysign (xa2, x);
22285 return x2;
22287 enum machine_mode mode = GET_MODE (operand0);
22288 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
22290 TWO52 = ix86_gen_TWO52 (mode);
22292 /* Temporary for holding the result, initialized to the input
22293 operand to ease control flow. */
22294 res = gen_reg_rtx (mode);
22295 emit_move_insn (res, operand1);
22297 /* xa = abs (operand1) */
22298 xa = ix86_expand_sse_fabs (res, &mask);
22300 /* if (!isless (xa, TWO52)) goto label; */
22301 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
22303 /* xa2 = xa + TWO52 - TWO52; */
22304 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
22305 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
22307 /* dxa = xa2 - xa; */
22308 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
22310 /* generate 0.5, 1.0 and -0.5 */
22311 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
22312 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
22313 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
22314 0, OPTAB_DIRECT);
22316 /* Compensate. */
22317 tmp = gen_reg_rtx (mode);
22318 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
22319 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
22320 emit_insn (gen_rtx_SET (VOIDmode, tmp,
22321 gen_rtx_AND (mode, one, tmp)));
22322 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
22323 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
22324 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
22325 emit_insn (gen_rtx_SET (VOIDmode, tmp,
22326 gen_rtx_AND (mode, one, tmp)));
22327 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
22329 /* res = copysign (xa2, operand1) */
22330 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
22332 emit_label (label);
22333 LABEL_NUSES (label) = 1;
22335 emit_move_insn (operand0, res);
22338 /* Expand SSE sequence for computing trunc from OPERAND1 storing
22339 into OPERAND0. */
22340 void
22341 ix86_expand_trunc (rtx operand0, rtx operand1)
22343 /* C code for SSE variant we expand below.
22344 double xa = fabs (x), x2;
22345 if (!isless (xa, TWO52))
22346 return x;
22347 x2 = (double)(long)x;
22348 if (HONOR_SIGNED_ZEROS (mode))
22349 return copysign (x2, x);
22350 return x2;
22352 enum machine_mode mode = GET_MODE (operand0);
22353 rtx xa, xi, TWO52, label, res, mask;
22355 TWO52 = ix86_gen_TWO52 (mode);
22357 /* Temporary for holding the result, initialized to the input
22358 operand to ease control flow. */
22359 res = gen_reg_rtx (mode);
22360 emit_move_insn (res, operand1);
22362 /* xa = abs (operand1) */
22363 xa = ix86_expand_sse_fabs (res, &mask);
22365 /* if (!isless (xa, TWO52)) goto label; */
22366 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
22368 /* x = (double)(long)x */
22369 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
22370 expand_fix (xi, res, 0);
22371 expand_float (res, xi, 0);
22373 if (HONOR_SIGNED_ZEROS (mode))
22374 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
22376 emit_label (label);
22377 LABEL_NUSES (label) = 1;
22379 emit_move_insn (operand0, res);
22382 /* Expand SSE sequence for computing trunc from OPERAND1 storing
22383 into OPERAND0. */
22384 void
22385 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
22387 enum machine_mode mode = GET_MODE (operand0);
22388 rtx xa, mask, TWO52, label, one, res, smask, tmp;
22390 /* C code for SSE variant we expand below.
22391 double xa = fabs (x), x2;
22392 if (!isless (xa, TWO52))
22393 return x;
22394 xa2 = xa + TWO52 - TWO52;
22395 Compensate:
22396 if (xa2 > xa)
22397 xa2 -= 1.0;
22398 x2 = copysign (xa2, x);
22399 return x2;
22402 TWO52 = ix86_gen_TWO52 (mode);
22404 /* Temporary for holding the result, initialized to the input
22405 operand to ease control flow. */
22406 res = gen_reg_rtx (mode);
22407 emit_move_insn (res, operand1);
22409 /* xa = abs (operand1) */
22410 xa = ix86_expand_sse_fabs (res, &smask);
22412 /* if (!isless (xa, TWO52)) goto label; */
22413 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
22415 /* res = xa + TWO52 - TWO52; */
22416 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
22417 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
22418 emit_move_insn (res, tmp);
22420 /* generate 1.0 */
22421 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
22423 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
22424 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
22425 emit_insn (gen_rtx_SET (VOIDmode, mask,
22426 gen_rtx_AND (mode, mask, one)));
22427 tmp = expand_simple_binop (mode, MINUS,
22428 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
22429 emit_move_insn (res, tmp);
22431 /* res = copysign (res, operand1) */
22432 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
22434 emit_label (label);
22435 LABEL_NUSES (label) = 1;
22437 emit_move_insn (operand0, res);
22440 /* Expand SSE sequence for computing round from OPERAND1 storing
22441 into OPERAND0. */
22442 void
22443 ix86_expand_round (rtx operand0, rtx operand1)
22445 /* C code for the stuff we're doing below:
22446 double xa = fabs (x);
22447 if (!isless (xa, TWO52))
22448 return x;
22449 xa = (double)(long)(xa + nextafter (0.5, 0.0));
22450 return copysign (xa, x);
22452 enum machine_mode mode = GET_MODE (operand0);
22453 rtx res, TWO52, xa, label, xi, half, mask;
22454 const struct real_format *fmt;
22455 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
22457 /* Temporary for holding the result, initialized to the input
22458 operand to ease control flow. */
22459 res = gen_reg_rtx (mode);
22460 emit_move_insn (res, operand1);
22462 TWO52 = ix86_gen_TWO52 (mode);
22463 xa = ix86_expand_sse_fabs (res, &mask);
22464 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
22466 /* load nextafter (0.5, 0.0) */
22467 fmt = REAL_MODE_FORMAT (mode);
22468 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
22469 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
22471 /* xa = xa + 0.5 */
22472 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
22473 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
22475 /* xa = (double)(int64_t)xa */
22476 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
22477 expand_fix (xi, xa, 0);
22478 expand_float (xa, xi, 0);
22480 /* res = copysign (xa, operand1) */
22481 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
22483 emit_label (label);
22484 LABEL_NUSES (label) = 1;
22486 emit_move_insn (operand0, res);
22490 /* Table of valid machine attributes. */
22491 static const struct attribute_spec ix86_attribute_table[] =
22493 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
22494 /* Stdcall attribute says callee is responsible for popping arguments
22495 if they are not variable. */
22496 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
22497 /* Fastcall attribute says callee is responsible for popping arguments
22498 if they are not variable. */
22499 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
22500 /* Cdecl attribute says the callee is a normal C declaration */
22501 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
22502 /* Regparm attribute specifies how many integer arguments are to be
22503 passed in registers. */
22504 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
22505 /* Sseregparm attribute says we are using x86_64 calling conventions
22506 for FP arguments. */
22507 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
22508 /* force_align_arg_pointer says this function realigns the stack at entry. */
22509 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
22510 false, true, true, ix86_handle_cconv_attribute },
22511 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
22512 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
22513 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
22514 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
22515 #endif
22516 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
22517 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
22518 #ifdef SUBTARGET_ATTRIBUTE_TABLE
22519 SUBTARGET_ATTRIBUTE_TABLE,
22520 #endif
22521 { NULL, 0, 0, false, false, false, NULL }
22524 /* Initialize the GCC target structure. */
22525 #undef TARGET_ATTRIBUTE_TABLE
22526 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
22527 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
22528 # undef TARGET_MERGE_DECL_ATTRIBUTES
22529 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
22530 #endif
22532 #undef TARGET_COMP_TYPE_ATTRIBUTES
22533 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
22535 #undef TARGET_INIT_BUILTINS
22536 #define TARGET_INIT_BUILTINS ix86_init_builtins
22537 #undef TARGET_EXPAND_BUILTIN
22538 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
22540 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
22541 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION ix86_builtin_vectorized_function
22542 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
22543 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_builtin_conversion
22545 #undef TARGET_ASM_FUNCTION_EPILOGUE
22546 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
22548 #undef TARGET_ENCODE_SECTION_INFO
22549 #ifndef SUBTARGET_ENCODE_SECTION_INFO
22550 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
22551 #else
22552 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
22553 #endif
22555 #undef TARGET_ASM_OPEN_PAREN
22556 #define TARGET_ASM_OPEN_PAREN ""
22557 #undef TARGET_ASM_CLOSE_PAREN
22558 #define TARGET_ASM_CLOSE_PAREN ""
22560 #undef TARGET_ASM_ALIGNED_HI_OP
22561 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
22562 #undef TARGET_ASM_ALIGNED_SI_OP
22563 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
22564 #ifdef ASM_QUAD
22565 #undef TARGET_ASM_ALIGNED_DI_OP
22566 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
22567 #endif
22569 #undef TARGET_ASM_UNALIGNED_HI_OP
22570 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
22571 #undef TARGET_ASM_UNALIGNED_SI_OP
22572 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
22573 #undef TARGET_ASM_UNALIGNED_DI_OP
22574 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
22576 #undef TARGET_SCHED_ADJUST_COST
22577 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
22578 #undef TARGET_SCHED_ISSUE_RATE
22579 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
22580 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
22581 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
22582 ia32_multipass_dfa_lookahead
22584 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
22585 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
22587 #ifdef HAVE_AS_TLS
22588 #undef TARGET_HAVE_TLS
22589 #define TARGET_HAVE_TLS true
22590 #endif
22591 #undef TARGET_CANNOT_FORCE_CONST_MEM
22592 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
22593 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
22594 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
22596 #undef TARGET_DELEGITIMIZE_ADDRESS
22597 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
22599 #undef TARGET_MS_BITFIELD_LAYOUT_P
22600 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
22602 #if TARGET_MACHO
22603 #undef TARGET_BINDS_LOCAL_P
22604 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
22605 #endif
22606 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
22607 #undef TARGET_BINDS_LOCAL_P
22608 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
22609 #endif
22611 #undef TARGET_ASM_OUTPUT_MI_THUNK
22612 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
22613 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
22614 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
22616 #undef TARGET_ASM_FILE_START
22617 #define TARGET_ASM_FILE_START x86_file_start
22619 #undef TARGET_DEFAULT_TARGET_FLAGS
22620 #define TARGET_DEFAULT_TARGET_FLAGS \
22621 (TARGET_DEFAULT \
22622 | TARGET_64BIT_DEFAULT \
22623 | TARGET_SUBTARGET_DEFAULT \
22624 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
22626 #undef TARGET_HANDLE_OPTION
22627 #define TARGET_HANDLE_OPTION ix86_handle_option
22629 #undef TARGET_RTX_COSTS
22630 #define TARGET_RTX_COSTS ix86_rtx_costs
22631 #undef TARGET_ADDRESS_COST
22632 #define TARGET_ADDRESS_COST ix86_address_cost
22634 #undef TARGET_FIXED_CONDITION_CODE_REGS
22635 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
22636 #undef TARGET_CC_MODES_COMPATIBLE
22637 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
22639 #undef TARGET_MACHINE_DEPENDENT_REORG
22640 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
22642 #undef TARGET_BUILD_BUILTIN_VA_LIST
22643 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
22645 #undef TARGET_MD_ASM_CLOBBERS
22646 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
22648 #undef TARGET_PROMOTE_PROTOTYPES
22649 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
22650 #undef TARGET_STRUCT_VALUE_RTX
22651 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
22652 #undef TARGET_SETUP_INCOMING_VARARGS
22653 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
22654 #undef TARGET_MUST_PASS_IN_STACK
22655 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
22656 #undef TARGET_PASS_BY_REFERENCE
22657 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
22658 #undef TARGET_INTERNAL_ARG_POINTER
22659 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
22660 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
22661 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
22662 #undef TARGET_STRICT_ARGUMENT_NAMING
22663 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
22665 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
22666 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
22668 #undef TARGET_SCALAR_MODE_SUPPORTED_P
22669 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
22671 #undef TARGET_VECTOR_MODE_SUPPORTED_P
22672 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
22674 #ifdef HAVE_AS_TLS
22675 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
22676 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
22677 #endif
22679 #ifdef SUBTARGET_INSERT_ATTRIBUTES
22680 #undef TARGET_INSERT_ATTRIBUTES
22681 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
22682 #endif
22684 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
22685 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
22687 #undef TARGET_STACK_PROTECT_FAIL
22688 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
22690 #undef TARGET_FUNCTION_VALUE
22691 #define TARGET_FUNCTION_VALUE ix86_function_value
22693 struct gcc_target targetm = TARGET_INITIALIZER;
22695 #include "gt-i386.h"