1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #include "tm-constrs.h"
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
71 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
74 struct processor_costs size_cost
= { /* costs for tuning for size */
75 COSTS_N_BYTES (2), /* cost of an add instruction */
76 COSTS_N_BYTES (3), /* cost of a lea instruction */
77 COSTS_N_BYTES (2), /* variable shift costs */
78 COSTS_N_BYTES (3), /* constant shift costs */
79 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
80 COSTS_N_BYTES (3), /* HI */
81 COSTS_N_BYTES (3), /* SI */
82 COSTS_N_BYTES (3), /* DI */
83 COSTS_N_BYTES (5)}, /* other */
84 0, /* cost of multiply per each bit set */
85 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 COSTS_N_BYTES (3), /* cost of movsx */
91 COSTS_N_BYTES (3), /* cost of movzx */
94 2, /* cost for loading QImode using movzbl */
95 {2, 2, 2}, /* cost of loading integer registers
96 in QImode, HImode and SImode.
97 Relative to reg-reg move (2). */
98 {2, 2, 2}, /* cost of storing integer registers */
99 2, /* cost of reg,reg fld/fst */
100 {2, 2, 2}, /* cost of loading fp registers
101 in SFmode, DFmode and XFmode */
102 {2, 2, 2}, /* cost of storing fp registers
103 in SFmode, DFmode and XFmode */
104 3, /* cost of moving MMX register */
105 {3, 3}, /* cost of loading MMX registers
106 in SImode and DImode */
107 {3, 3}, /* cost of storing MMX registers
108 in SImode and DImode */
109 3, /* cost of moving SSE register */
110 {3, 3, 3}, /* cost of loading SSE registers
111 in SImode, DImode and TImode */
112 {3, 3, 3}, /* cost of storing SSE registers
113 in SImode, DImode and TImode */
114 3, /* MMX or SSE register to integer */
115 0, /* size of prefetch block */
116 0, /* number of parallel prefetches */
118 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
119 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
120 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
121 COSTS_N_BYTES (2), /* cost of FABS instruction. */
122 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
123 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
124 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
125 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
126 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
127 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}}
130 /* Processor costs (relative to an add) */
132 struct processor_costs i386_cost
= { /* 386 specific costs */
133 COSTS_N_INSNS (1), /* cost of an add instruction */
134 COSTS_N_INSNS (1), /* cost of a lea instruction */
135 COSTS_N_INSNS (3), /* variable shift costs */
136 COSTS_N_INSNS (2), /* constant shift costs */
137 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
138 COSTS_N_INSNS (6), /* HI */
139 COSTS_N_INSNS (6), /* SI */
140 COSTS_N_INSNS (6), /* DI */
141 COSTS_N_INSNS (6)}, /* other */
142 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
143 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
144 COSTS_N_INSNS (23), /* HI */
145 COSTS_N_INSNS (23), /* SI */
146 COSTS_N_INSNS (23), /* DI */
147 COSTS_N_INSNS (23)}, /* other */
148 COSTS_N_INSNS (3), /* cost of movsx */
149 COSTS_N_INSNS (2), /* cost of movzx */
150 15, /* "large" insn */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of storing fp registers
161 in SFmode, DFmode and XFmode */
162 2, /* cost of moving MMX register */
163 {4, 8}, /* cost of loading MMX registers
164 in SImode and DImode */
165 {4, 8}, /* cost of storing MMX registers
166 in SImode and DImode */
167 2, /* cost of moving SSE register */
168 {4, 8, 16}, /* cost of loading SSE registers
169 in SImode, DImode and TImode */
170 {4, 8, 16}, /* cost of storing SSE registers
171 in SImode, DImode and TImode */
172 3, /* MMX or SSE register to integer */
173 0, /* size of prefetch block */
174 0, /* number of parallel prefetches */
176 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
177 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
178 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
179 COSTS_N_INSNS (22), /* cost of FABS instruction. */
180 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
181 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
182 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
183 DUMMY_STRINGOP_ALGS
},
184 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
185 DUMMY_STRINGOP_ALGS
},
189 struct processor_costs i486_cost
= { /* 486 specific costs */
190 COSTS_N_INSNS (1), /* cost of an add instruction */
191 COSTS_N_INSNS (1), /* cost of a lea instruction */
192 COSTS_N_INSNS (3), /* variable shift costs */
193 COSTS_N_INSNS (2), /* constant shift costs */
194 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
195 COSTS_N_INSNS (12), /* HI */
196 COSTS_N_INSNS (12), /* SI */
197 COSTS_N_INSNS (12), /* DI */
198 COSTS_N_INSNS (12)}, /* other */
199 1, /* cost of multiply per each bit set */
200 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
201 COSTS_N_INSNS (40), /* HI */
202 COSTS_N_INSNS (40), /* SI */
203 COSTS_N_INSNS (40), /* DI */
204 COSTS_N_INSNS (40)}, /* other */
205 COSTS_N_INSNS (3), /* cost of movsx */
206 COSTS_N_INSNS (2), /* cost of movzx */
207 15, /* "large" insn */
209 4, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {8, 8, 8}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {8, 8, 8}, /* cost of storing fp registers
218 in SFmode, DFmode and XFmode */
219 2, /* cost of moving MMX register */
220 {4, 8}, /* cost of loading MMX registers
221 in SImode and DImode */
222 {4, 8}, /* cost of storing MMX registers
223 in SImode and DImode */
224 2, /* cost of moving SSE register */
225 {4, 8, 16}, /* cost of loading SSE registers
226 in SImode, DImode and TImode */
227 {4, 8, 16}, /* cost of storing SSE registers
228 in SImode, DImode and TImode */
229 3, /* MMX or SSE register to integer */
230 0, /* size of prefetch block */
231 0, /* number of parallel prefetches */
233 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
234 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
235 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
236 COSTS_N_INSNS (3), /* cost of FABS instruction. */
237 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
238 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
239 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
240 DUMMY_STRINGOP_ALGS
},
241 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
246 struct processor_costs pentium_cost
= {
247 COSTS_N_INSNS (1), /* cost of an add instruction */
248 COSTS_N_INSNS (1), /* cost of a lea instruction */
249 COSTS_N_INSNS (4), /* variable shift costs */
250 COSTS_N_INSNS (1), /* constant shift costs */
251 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
252 COSTS_N_INSNS (11), /* HI */
253 COSTS_N_INSNS (11), /* SI */
254 COSTS_N_INSNS (11), /* DI */
255 COSTS_N_INSNS (11)}, /* other */
256 0, /* cost of multiply per each bit set */
257 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
258 COSTS_N_INSNS (25), /* HI */
259 COSTS_N_INSNS (25), /* SI */
260 COSTS_N_INSNS (25), /* DI */
261 COSTS_N_INSNS (25)}, /* other */
262 COSTS_N_INSNS (3), /* cost of movsx */
263 COSTS_N_INSNS (2), /* cost of movzx */
264 8, /* "large" insn */
266 6, /* cost for loading QImode using movzbl */
267 {2, 4, 2}, /* cost of loading integer registers
268 in QImode, HImode and SImode.
269 Relative to reg-reg move (2). */
270 {2, 4, 2}, /* cost of storing integer registers */
271 2, /* cost of reg,reg fld/fst */
272 {2, 2, 6}, /* cost of loading fp registers
273 in SFmode, DFmode and XFmode */
274 {4, 4, 6}, /* cost of storing fp registers
275 in SFmode, DFmode and XFmode */
276 8, /* cost of moving MMX register */
277 {8, 8}, /* cost of loading MMX registers
278 in SImode and DImode */
279 {8, 8}, /* cost of storing MMX registers
280 in SImode and DImode */
281 2, /* cost of moving SSE register */
282 {4, 8, 16}, /* cost of loading SSE registers
283 in SImode, DImode and TImode */
284 {4, 8, 16}, /* cost of storing SSE registers
285 in SImode, DImode and TImode */
286 3, /* MMX or SSE register to integer */
287 0, /* size of prefetch block */
288 0, /* number of parallel prefetches */
290 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
291 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
292 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
293 COSTS_N_INSNS (1), /* cost of FABS instruction. */
294 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
295 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
296 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
297 DUMMY_STRINGOP_ALGS
},
298 {{libcall
, {{-1, rep_prefix_4_byte
}}},
303 struct processor_costs pentiumpro_cost
= {
304 COSTS_N_INSNS (1), /* cost of an add instruction */
305 COSTS_N_INSNS (1), /* cost of a lea instruction */
306 COSTS_N_INSNS (1), /* variable shift costs */
307 COSTS_N_INSNS (1), /* constant shift costs */
308 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
309 COSTS_N_INSNS (4), /* HI */
310 COSTS_N_INSNS (4), /* SI */
311 COSTS_N_INSNS (4), /* DI */
312 COSTS_N_INSNS (4)}, /* other */
313 0, /* cost of multiply per each bit set */
314 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
315 COSTS_N_INSNS (17), /* HI */
316 COSTS_N_INSNS (17), /* SI */
317 COSTS_N_INSNS (17), /* DI */
318 COSTS_N_INSNS (17)}, /* other */
319 COSTS_N_INSNS (1), /* cost of movsx */
320 COSTS_N_INSNS (1), /* cost of movzx */
321 8, /* "large" insn */
323 2, /* cost for loading QImode using movzbl */
324 {4, 4, 4}, /* cost of loading integer registers
325 in QImode, HImode and SImode.
326 Relative to reg-reg move (2). */
327 {2, 2, 2}, /* cost of storing integer registers */
328 2, /* cost of reg,reg fld/fst */
329 {2, 2, 6}, /* cost of loading fp registers
330 in SFmode, DFmode and XFmode */
331 {4, 4, 6}, /* cost of storing fp registers
332 in SFmode, DFmode and XFmode */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {2, 2, 8}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 3, /* MMX or SSE register to integer */
344 32, /* size of prefetch block */
345 6, /* number of parallel prefetches */
347 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
348 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
349 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
350 COSTS_N_INSNS (2), /* cost of FABS instruction. */
351 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
352 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
353 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
354 the alignment). For small blocks inline loop is still a noticeable win, for bigger
355 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
356 more expensive startup time in CPU, but after 4K the difference is down in the noise.
358 {{rep_prefix_4_byte
, {{128, loop
}, {1024, unrolled_loop
},
359 {8192, rep_prefix_4_byte
}, {-1, rep_prefix_1_byte
}}},
360 DUMMY_STRINGOP_ALGS
},
361 {{rep_prefix_4_byte
, {{1024, unrolled_loop
},
362 {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
367 struct processor_costs geode_cost
= {
368 COSTS_N_INSNS (1), /* cost of an add instruction */
369 COSTS_N_INSNS (1), /* cost of a lea instruction */
370 COSTS_N_INSNS (2), /* variable shift costs */
371 COSTS_N_INSNS (1), /* constant shift costs */
372 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
373 COSTS_N_INSNS (4), /* HI */
374 COSTS_N_INSNS (7), /* SI */
375 COSTS_N_INSNS (7), /* DI */
376 COSTS_N_INSNS (7)}, /* other */
377 0, /* cost of multiply per each bit set */
378 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
379 COSTS_N_INSNS (23), /* HI */
380 COSTS_N_INSNS (39), /* SI */
381 COSTS_N_INSNS (39), /* DI */
382 COSTS_N_INSNS (39)}, /* other */
383 COSTS_N_INSNS (1), /* cost of movsx */
384 COSTS_N_INSNS (1), /* cost of movzx */
385 8, /* "large" insn */
387 1, /* cost for loading QImode using movzbl */
388 {1, 1, 1}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {1, 1, 1}, /* cost of storing integer registers */
392 1, /* cost of reg,reg fld/fst */
393 {1, 1, 1}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {4, 6, 6}, /* cost of storing fp registers
396 in SFmode, DFmode and XFmode */
398 1, /* cost of moving MMX register */
399 {1, 1}, /* cost of loading MMX registers
400 in SImode and DImode */
401 {1, 1}, /* cost of storing MMX registers
402 in SImode and DImode */
403 1, /* cost of moving SSE register */
404 {1, 1, 1}, /* cost of loading SSE registers
405 in SImode, DImode and TImode */
406 {1, 1, 1}, /* cost of storing SSE registers
407 in SImode, DImode and TImode */
408 1, /* MMX or SSE register to integer */
409 32, /* size of prefetch block */
410 1, /* number of parallel prefetches */
412 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
413 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
414 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
415 COSTS_N_INSNS (1), /* cost of FABS instruction. */
416 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
417 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
418 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
419 DUMMY_STRINGOP_ALGS
},
420 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
425 struct processor_costs k6_cost
= {
426 COSTS_N_INSNS (1), /* cost of an add instruction */
427 COSTS_N_INSNS (2), /* cost of a lea instruction */
428 COSTS_N_INSNS (1), /* variable shift costs */
429 COSTS_N_INSNS (1), /* constant shift costs */
430 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
431 COSTS_N_INSNS (3), /* HI */
432 COSTS_N_INSNS (3), /* SI */
433 COSTS_N_INSNS (3), /* DI */
434 COSTS_N_INSNS (3)}, /* other */
435 0, /* cost of multiply per each bit set */
436 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
437 COSTS_N_INSNS (18), /* HI */
438 COSTS_N_INSNS (18), /* SI */
439 COSTS_N_INSNS (18), /* DI */
440 COSTS_N_INSNS (18)}, /* other */
441 COSTS_N_INSNS (2), /* cost of movsx */
442 COSTS_N_INSNS (2), /* cost of movzx */
443 8, /* "large" insn */
445 3, /* cost for loading QImode using movzbl */
446 {4, 5, 4}, /* cost of loading integer registers
447 in QImode, HImode and SImode.
448 Relative to reg-reg move (2). */
449 {2, 3, 2}, /* cost of storing integer registers */
450 4, /* cost of reg,reg fld/fst */
451 {6, 6, 6}, /* cost of loading fp registers
452 in SFmode, DFmode and XFmode */
453 {4, 4, 4}, /* cost of storing fp registers
454 in SFmode, DFmode and XFmode */
455 2, /* cost of moving MMX register */
456 {2, 2}, /* cost of loading MMX registers
457 in SImode and DImode */
458 {2, 2}, /* cost of storing MMX registers
459 in SImode and DImode */
460 2, /* cost of moving SSE register */
461 {2, 2, 8}, /* cost of loading SSE registers
462 in SImode, DImode and TImode */
463 {2, 2, 8}, /* cost of storing SSE registers
464 in SImode, DImode and TImode */
465 6, /* MMX or SSE register to integer */
466 32, /* size of prefetch block */
467 1, /* number of parallel prefetches */
469 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
470 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
471 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
472 COSTS_N_INSNS (2), /* cost of FABS instruction. */
473 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
474 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
475 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
476 DUMMY_STRINGOP_ALGS
},
477 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
482 struct processor_costs athlon_cost
= {
483 COSTS_N_INSNS (1), /* cost of an add instruction */
484 COSTS_N_INSNS (2), /* cost of a lea instruction */
485 COSTS_N_INSNS (1), /* variable shift costs */
486 COSTS_N_INSNS (1), /* constant shift costs */
487 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
488 COSTS_N_INSNS (5), /* HI */
489 COSTS_N_INSNS (5), /* SI */
490 COSTS_N_INSNS (5), /* DI */
491 COSTS_N_INSNS (5)}, /* other */
492 0, /* cost of multiply per each bit set */
493 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
494 COSTS_N_INSNS (26), /* HI */
495 COSTS_N_INSNS (42), /* SI */
496 COSTS_N_INSNS (74), /* DI */
497 COSTS_N_INSNS (74)}, /* other */
498 COSTS_N_INSNS (1), /* cost of movsx */
499 COSTS_N_INSNS (1), /* cost of movzx */
500 8, /* "large" insn */
502 4, /* cost for loading QImode using movzbl */
503 {3, 4, 3}, /* cost of loading integer registers
504 in QImode, HImode and SImode.
505 Relative to reg-reg move (2). */
506 {3, 4, 3}, /* cost of storing integer registers */
507 4, /* cost of reg,reg fld/fst */
508 {4, 4, 12}, /* cost of loading fp registers
509 in SFmode, DFmode and XFmode */
510 {6, 6, 8}, /* cost of storing fp registers
511 in SFmode, DFmode and XFmode */
512 2, /* cost of moving MMX register */
513 {4, 4}, /* cost of loading MMX registers
514 in SImode and DImode */
515 {4, 4}, /* cost of storing MMX registers
516 in SImode and DImode */
517 2, /* cost of moving SSE register */
518 {4, 4, 6}, /* cost of loading SSE registers
519 in SImode, DImode and TImode */
520 {4, 4, 5}, /* cost of storing SSE registers
521 in SImode, DImode and TImode */
522 5, /* MMX or SSE register to integer */
523 64, /* size of prefetch block */
524 6, /* number of parallel prefetches */
526 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
527 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
528 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
529 COSTS_N_INSNS (2), /* cost of FABS instruction. */
530 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
531 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
532 /* For some reason, Athlon deals better with REP prefix (relative to loops)
533 compared to K8. Alignment becomes important after 8 bytes for memcpy and
534 128 bytes for memset. */
535 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
536 DUMMY_STRINGOP_ALGS
},
537 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
542 struct processor_costs k8_cost
= {
543 COSTS_N_INSNS (1), /* cost of an add instruction */
544 COSTS_N_INSNS (2), /* cost of a lea instruction */
545 COSTS_N_INSNS (1), /* variable shift costs */
546 COSTS_N_INSNS (1), /* constant shift costs */
547 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
548 COSTS_N_INSNS (4), /* HI */
549 COSTS_N_INSNS (3), /* SI */
550 COSTS_N_INSNS (4), /* DI */
551 COSTS_N_INSNS (5)}, /* other */
552 0, /* cost of multiply per each bit set */
553 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
554 COSTS_N_INSNS (26), /* HI */
555 COSTS_N_INSNS (42), /* SI */
556 COSTS_N_INSNS (74), /* DI */
557 COSTS_N_INSNS (74)}, /* other */
558 COSTS_N_INSNS (1), /* cost of movsx */
559 COSTS_N_INSNS (1), /* cost of movzx */
560 8, /* "large" insn */
562 4, /* cost for loading QImode using movzbl */
563 {3, 4, 3}, /* cost of loading integer registers
564 in QImode, HImode and SImode.
565 Relative to reg-reg move (2). */
566 {3, 4, 3}, /* cost of storing integer registers */
567 4, /* cost of reg,reg fld/fst */
568 {4, 4, 12}, /* cost of loading fp registers
569 in SFmode, DFmode and XFmode */
570 {6, 6, 8}, /* cost of storing fp registers
571 in SFmode, DFmode and XFmode */
572 2, /* cost of moving MMX register */
573 {3, 3}, /* cost of loading MMX registers
574 in SImode and DImode */
575 {4, 4}, /* cost of storing MMX registers
576 in SImode and DImode */
577 2, /* cost of moving SSE register */
578 {4, 3, 6}, /* cost of loading SSE registers
579 in SImode, DImode and TImode */
580 {4, 4, 5}, /* cost of storing SSE registers
581 in SImode, DImode and TImode */
582 5, /* MMX or SSE register to integer */
583 64, /* size of prefetch block */
584 /* New AMD processors never drop prefetches; if they cannot be performed
585 immediately, they are queued. We set number of simultaneous prefetches
586 to a large constant to reflect this (it probably is not a good idea not
587 to limit number of prefetches at all, as their execution also takes some
589 100, /* number of parallel prefetches */
591 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
592 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
593 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
594 COSTS_N_INSNS (2), /* cost of FABS instruction. */
595 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
596 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
597 /* K8 has optimized REP instruction for medium sized blocks, but for very small
598 blocks it is better to use loop. For large blocks, libcall can do
599 nontemporary accesses and beat inline considerably. */
600 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
601 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
602 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
603 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
604 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
607 struct processor_costs amdfam10_cost
= {
608 COSTS_N_INSNS (1), /* cost of an add instruction */
609 COSTS_N_INSNS (2), /* cost of a lea instruction */
610 COSTS_N_INSNS (1), /* variable shift costs */
611 COSTS_N_INSNS (1), /* constant shift costs */
612 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
613 COSTS_N_INSNS (4), /* HI */
614 COSTS_N_INSNS (3), /* SI */
615 COSTS_N_INSNS (4), /* DI */
616 COSTS_N_INSNS (5)}, /* other */
617 0, /* cost of multiply per each bit set */
618 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
619 COSTS_N_INSNS (35), /* HI */
620 COSTS_N_INSNS (51), /* SI */
621 COSTS_N_INSNS (83), /* DI */
622 COSTS_N_INSNS (83)}, /* other */
623 COSTS_N_INSNS (1), /* cost of movsx */
624 COSTS_N_INSNS (1), /* cost of movzx */
625 8, /* "large" insn */
627 4, /* cost for loading QImode using movzbl */
628 {3, 4, 3}, /* cost of loading integer registers
629 in QImode, HImode and SImode.
630 Relative to reg-reg move (2). */
631 {3, 4, 3}, /* cost of storing integer registers */
632 4, /* cost of reg,reg fld/fst */
633 {4, 4, 12}, /* cost of loading fp registers
634 in SFmode, DFmode and XFmode */
635 {6, 6, 8}, /* cost of storing fp registers
636 in SFmode, DFmode and XFmode */
637 2, /* cost of moving MMX register */
638 {3, 3}, /* cost of loading MMX registers
639 in SImode and DImode */
640 {4, 4}, /* cost of storing MMX registers
641 in SImode and DImode */
642 2, /* cost of moving SSE register */
643 {4, 4, 3}, /* cost of loading SSE registers
644 in SImode, DImode and TImode */
645 {4, 4, 5}, /* cost of storing SSE registers
646 in SImode, DImode and TImode */
647 3, /* MMX or SSE register to integer */
649 MOVD reg64, xmmreg Double FSTORE 4
650 MOVD reg32, xmmreg Double FSTORE 4
652 MOVD reg64, xmmreg Double FADD 3
654 MOVD reg32, xmmreg Double FADD 3
656 64, /* size of prefetch block */
657 /* New AMD processors never drop prefetches; if they cannot be performed
658 immediately, they are queued. We set number of simultaneous prefetches
659 to a large constant to reflect this (it probably is not a good idea not
660 to limit number of prefetches at all, as their execution also takes some
662 100, /* number of parallel prefetches */
664 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
665 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
666 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
667 COSTS_N_INSNS (2), /* cost of FABS instruction. */
668 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
669 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
671 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
672 very small blocks it is better to use loop. For large blocks, libcall can
673 do nontemporary accesses and beat inline considerably. */
674 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
675 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
676 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
677 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
678 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
682 struct processor_costs pentium4_cost
= {
683 COSTS_N_INSNS (1), /* cost of an add instruction */
684 COSTS_N_INSNS (3), /* cost of a lea instruction */
685 COSTS_N_INSNS (4), /* variable shift costs */
686 COSTS_N_INSNS (4), /* constant shift costs */
687 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
688 COSTS_N_INSNS (15), /* HI */
689 COSTS_N_INSNS (15), /* SI */
690 COSTS_N_INSNS (15), /* DI */
691 COSTS_N_INSNS (15)}, /* other */
692 0, /* cost of multiply per each bit set */
693 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
694 COSTS_N_INSNS (56), /* HI */
695 COSTS_N_INSNS (56), /* SI */
696 COSTS_N_INSNS (56), /* DI */
697 COSTS_N_INSNS (56)}, /* other */
698 COSTS_N_INSNS (1), /* cost of movsx */
699 COSTS_N_INSNS (1), /* cost of movzx */
700 16, /* "large" insn */
702 2, /* cost for loading QImode using movzbl */
703 {4, 5, 4}, /* cost of loading integer registers
704 in QImode, HImode and SImode.
705 Relative to reg-reg move (2). */
706 {2, 3, 2}, /* cost of storing integer registers */
707 2, /* cost of reg,reg fld/fst */
708 {2, 2, 6}, /* cost of loading fp registers
709 in SFmode, DFmode and XFmode */
710 {4, 4, 6}, /* cost of storing fp registers
711 in SFmode, DFmode and XFmode */
712 2, /* cost of moving MMX register */
713 {2, 2}, /* cost of loading MMX registers
714 in SImode and DImode */
715 {2, 2}, /* cost of storing MMX registers
716 in SImode and DImode */
717 12, /* cost of moving SSE register */
718 {12, 12, 12}, /* cost of loading SSE registers
719 in SImode, DImode and TImode */
720 {2, 2, 8}, /* cost of storing SSE registers
721 in SImode, DImode and TImode */
722 10, /* MMX or SSE register to integer */
723 64, /* size of prefetch block */
724 6, /* number of parallel prefetches */
726 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
727 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
728 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
729 COSTS_N_INSNS (2), /* cost of FABS instruction. */
730 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
731 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
732 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
733 DUMMY_STRINGOP_ALGS
},
734 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
736 DUMMY_STRINGOP_ALGS
},
740 struct processor_costs nocona_cost
= {
741 COSTS_N_INSNS (1), /* cost of an add instruction */
742 COSTS_N_INSNS (1), /* cost of a lea instruction */
743 COSTS_N_INSNS (1), /* variable shift costs */
744 COSTS_N_INSNS (1), /* constant shift costs */
745 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
746 COSTS_N_INSNS (10), /* HI */
747 COSTS_N_INSNS (10), /* SI */
748 COSTS_N_INSNS (10), /* DI */
749 COSTS_N_INSNS (10)}, /* other */
750 0, /* cost of multiply per each bit set */
751 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
752 COSTS_N_INSNS (66), /* HI */
753 COSTS_N_INSNS (66), /* SI */
754 COSTS_N_INSNS (66), /* DI */
755 COSTS_N_INSNS (66)}, /* other */
756 COSTS_N_INSNS (1), /* cost of movsx */
757 COSTS_N_INSNS (1), /* cost of movzx */
758 16, /* "large" insn */
760 4, /* cost for loading QImode using movzbl */
761 {4, 4, 4}, /* cost of loading integer registers
762 in QImode, HImode and SImode.
763 Relative to reg-reg move (2). */
764 {4, 4, 4}, /* cost of storing integer registers */
765 3, /* cost of reg,reg fld/fst */
766 {12, 12, 12}, /* cost of loading fp registers
767 in SFmode, DFmode and XFmode */
768 {4, 4, 4}, /* cost of storing fp registers
769 in SFmode, DFmode and XFmode */
770 6, /* cost of moving MMX register */
771 {12, 12}, /* cost of loading MMX registers
772 in SImode and DImode */
773 {12, 12}, /* cost of storing MMX registers
774 in SImode and DImode */
775 6, /* cost of moving SSE register */
776 {12, 12, 12}, /* cost of loading SSE registers
777 in SImode, DImode and TImode */
778 {12, 12, 12}, /* cost of storing SSE registers
779 in SImode, DImode and TImode */
780 8, /* MMX or SSE register to integer */
781 128, /* size of prefetch block */
782 8, /* number of parallel prefetches */
784 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
785 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
786 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
787 COSTS_N_INSNS (3), /* cost of FABS instruction. */
788 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
789 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
790 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
791 {libcall
, {{32, loop
}, {20000, rep_prefix_8_byte
},
792 {100000, unrolled_loop
}, {-1, libcall
}}}},
793 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
795 {libcall
, {{24, loop
}, {64, unrolled_loop
},
796 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
800 struct processor_costs core2_cost
= {
801 COSTS_N_INSNS (1), /* cost of an add instruction */
802 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
803 COSTS_N_INSNS (1), /* variable shift costs */
804 COSTS_N_INSNS (1), /* constant shift costs */
805 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
806 COSTS_N_INSNS (3), /* HI */
807 COSTS_N_INSNS (3), /* SI */
808 COSTS_N_INSNS (3), /* DI */
809 COSTS_N_INSNS (3)}, /* other */
810 0, /* cost of multiply per each bit set */
811 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
812 COSTS_N_INSNS (22), /* HI */
813 COSTS_N_INSNS (22), /* SI */
814 COSTS_N_INSNS (22), /* DI */
815 COSTS_N_INSNS (22)}, /* other */
816 COSTS_N_INSNS (1), /* cost of movsx */
817 COSTS_N_INSNS (1), /* cost of movzx */
818 8, /* "large" insn */
820 2, /* cost for loading QImode using movzbl */
821 {6, 6, 6}, /* cost of loading integer registers
822 in QImode, HImode and SImode.
823 Relative to reg-reg move (2). */
824 {4, 4, 4}, /* cost of storing integer registers */
825 2, /* cost of reg,reg fld/fst */
826 {6, 6, 6}, /* cost of loading fp registers
827 in SFmode, DFmode and XFmode */
828 {4, 4, 4}, /* cost of loading integer registers */
829 2, /* cost of moving MMX register */
830 {6, 6}, /* cost of loading MMX registers
831 in SImode and DImode */
832 {4, 4}, /* cost of storing MMX registers
833 in SImode and DImode */
834 2, /* cost of moving SSE register */
835 {6, 6, 6}, /* cost of loading SSE registers
836 in SImode, DImode and TImode */
837 {4, 4, 4}, /* cost of storing SSE registers
838 in SImode, DImode and TImode */
839 2, /* MMX or SSE register to integer */
840 128, /* size of prefetch block */
841 8, /* number of parallel prefetches */
843 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
844 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
845 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
846 COSTS_N_INSNS (1), /* cost of FABS instruction. */
847 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
848 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
849 {{libcall
, {{11, loop
}, {-1, rep_prefix_4_byte
}}},
850 {libcall
, {{32, loop
}, {64, rep_prefix_4_byte
},
851 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
852 {{libcall
, {{8, loop
}, {15, unrolled_loop
},
853 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
854 {libcall
, {{24, loop
}, {32, unrolled_loop
},
855 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
858 /* Generic64 should produce code tuned for Nocona and K8. */
860 struct processor_costs generic64_cost
= {
861 COSTS_N_INSNS (1), /* cost of an add instruction */
862 /* On all chips taken into consideration lea is 2 cycles and more. With
863 this cost however our current implementation of synth_mult results in
864 use of unnecessary temporary registers causing regression on several
865 SPECfp benchmarks. */
866 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
867 COSTS_N_INSNS (1), /* variable shift costs */
868 COSTS_N_INSNS (1), /* constant shift costs */
869 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
870 COSTS_N_INSNS (4), /* HI */
871 COSTS_N_INSNS (3), /* SI */
872 COSTS_N_INSNS (4), /* DI */
873 COSTS_N_INSNS (2)}, /* other */
874 0, /* cost of multiply per each bit set */
875 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
876 COSTS_N_INSNS (26), /* HI */
877 COSTS_N_INSNS (42), /* SI */
878 COSTS_N_INSNS (74), /* DI */
879 COSTS_N_INSNS (74)}, /* other */
880 COSTS_N_INSNS (1), /* cost of movsx */
881 COSTS_N_INSNS (1), /* cost of movzx */
882 8, /* "large" insn */
884 4, /* cost for loading QImode using movzbl */
885 {4, 4, 4}, /* cost of loading integer registers
886 in QImode, HImode and SImode.
887 Relative to reg-reg move (2). */
888 {4, 4, 4}, /* cost of storing integer registers */
889 4, /* cost of reg,reg fld/fst */
890 {12, 12, 12}, /* cost of loading fp registers
891 in SFmode, DFmode and XFmode */
892 {6, 6, 8}, /* cost of storing fp registers
893 in SFmode, DFmode and XFmode */
894 2, /* cost of moving MMX register */
895 {8, 8}, /* cost of loading MMX registers
896 in SImode and DImode */
897 {8, 8}, /* cost of storing MMX registers
898 in SImode and DImode */
899 2, /* cost of moving SSE register */
900 {8, 8, 8}, /* cost of loading SSE registers
901 in SImode, DImode and TImode */
902 {8, 8, 8}, /* cost of storing SSE registers
903 in SImode, DImode and TImode */
904 5, /* MMX or SSE register to integer */
905 64, /* size of prefetch block */
906 6, /* number of parallel prefetches */
907 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
908 is increased to perhaps more appropriate value of 5. */
910 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
911 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
912 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
913 COSTS_N_INSNS (8), /* cost of FABS instruction. */
914 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
915 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
916 {DUMMY_STRINGOP_ALGS
,
917 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
918 {DUMMY_STRINGOP_ALGS
,
919 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
922 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
924 struct processor_costs generic32_cost
= {
925 COSTS_N_INSNS (1), /* cost of an add instruction */
926 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
927 COSTS_N_INSNS (1), /* variable shift costs */
928 COSTS_N_INSNS (1), /* constant shift costs */
929 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
930 COSTS_N_INSNS (4), /* HI */
931 COSTS_N_INSNS (3), /* SI */
932 COSTS_N_INSNS (4), /* DI */
933 COSTS_N_INSNS (2)}, /* other */
934 0, /* cost of multiply per each bit set */
935 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
936 COSTS_N_INSNS (26), /* HI */
937 COSTS_N_INSNS (42), /* SI */
938 COSTS_N_INSNS (74), /* DI */
939 COSTS_N_INSNS (74)}, /* other */
940 COSTS_N_INSNS (1), /* cost of movsx */
941 COSTS_N_INSNS (1), /* cost of movzx */
942 8, /* "large" insn */
944 4, /* cost for loading QImode using movzbl */
945 {4, 4, 4}, /* cost of loading integer registers
946 in QImode, HImode and SImode.
947 Relative to reg-reg move (2). */
948 {4, 4, 4}, /* cost of storing integer registers */
949 4, /* cost of reg,reg fld/fst */
950 {12, 12, 12}, /* cost of loading fp registers
951 in SFmode, DFmode and XFmode */
952 {6, 6, 8}, /* cost of storing fp registers
953 in SFmode, DFmode and XFmode */
954 2, /* cost of moving MMX register */
955 {8, 8}, /* cost of loading MMX registers
956 in SImode and DImode */
957 {8, 8}, /* cost of storing MMX registers
958 in SImode and DImode */
959 2, /* cost of moving SSE register */
960 {8, 8, 8}, /* cost of loading SSE registers
961 in SImode, DImode and TImode */
962 {8, 8, 8}, /* cost of storing SSE registers
963 in SImode, DImode and TImode */
964 5, /* MMX or SSE register to integer */
965 64, /* size of prefetch block */
966 6, /* number of parallel prefetches */
968 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
969 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
970 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
971 COSTS_N_INSNS (8), /* cost of FABS instruction. */
972 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
973 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
974 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
975 DUMMY_STRINGOP_ALGS
},
976 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
977 DUMMY_STRINGOP_ALGS
},
980 const struct processor_costs
*ix86_cost
= &pentium_cost
;
982 /* Processor feature/optimization bitmasks. */
983 #define m_386 (1<<PROCESSOR_I386)
984 #define m_486 (1<<PROCESSOR_I486)
985 #define m_PENT (1<<PROCESSOR_PENTIUM)
986 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
987 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
988 #define m_NOCONA (1<<PROCESSOR_NOCONA)
989 #define m_CORE2 (1<<PROCESSOR_CORE2)
991 #define m_GEODE (1<<PROCESSOR_GEODE)
992 #define m_K6 (1<<PROCESSOR_K6)
993 #define m_K6_GEODE (m_K6 | m_GEODE)
994 #define m_K8 (1<<PROCESSOR_K8)
995 #define m_ATHLON (1<<PROCESSOR_ATHLON)
996 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
997 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
998 #define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
1000 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1001 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1003 /* Generic instruction choice should be common subset of supported CPUs
1004 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1005 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1007 /* Feature tests against the various tunings. */
1008 unsigned int ix86_tune_features
[X86_TUNE_LAST
] = {
1009 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1010 negatively, so enabling for Generic64 seems like good code size
1011 tradeoff. We can't enable it for 32bit generic because it does not
1012 work well with PPro base chips. */
1013 m_386
| m_K6_GEODE
| m_ATHLON_K8_AMDFAM10
| m_CORE2
| m_GENERIC64
,
1015 /* X86_TUNE_PUSH_MEMORY */
1016 m_386
| m_K6_GEODE
| m_ATHLON_K8_AMDFAM10
| m_PENT4
1017 | m_NOCONA
| m_CORE2
| m_GENERIC
,
1019 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1022 /* X86_TUNE_USE_BIT_TEST */
1025 /* X86_TUNE_UNROLL_STRLEN */
1026 m_486
| m_PENT
| m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_K6
| m_CORE2
| m_GENERIC
,
1028 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1029 m_PPRO
| m_K6_GEODE
| m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_GENERIC
,
1031 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1032 on simulation result. But after P4 was made, no performance benefit
1033 was observed with branch hints. It also increases the code size.
1034 As a result, icc never generates branch hints. */
1037 /* X86_TUNE_DOUBLE_WITH_ADD */
1040 /* X86_TUNE_USE_SAHF */
1041 m_PPRO
| m_K6_GEODE
| m_K8
| m_AMDFAM10
| m_PENT4
1042 | m_NOCONA
| m_CORE2
| m_GENERIC
,
1044 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1045 partial dependencies. */
1046 m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_PENT4
| m_NOCONA
1047 | m_CORE2
| m_GENERIC
| m_GEODE
/* m_386 | m_K6 */,
1049 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1050 register stalls on Generic32 compilation setting as well. However
1051 in current implementation the partial register stalls are not eliminated
1052 very well - they can be introduced via subregs synthesized by combine
1053 and can happen in caller/callee saving sequences. Because this option
1054 pays back little on PPro based chips and is in conflict with partial reg
1055 dependencies used by Athlon/P4 based chips, it is better to leave it off
1056 for generic32 for now. */
1059 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1060 m_CORE2
| m_GENERIC
,
1062 /* X86_TUNE_USE_HIMODE_FIOP */
1063 m_386
| m_486
| m_K6_GEODE
,
1065 /* X86_TUNE_USE_SIMODE_FIOP */
1066 ~(m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_PENT
| m_CORE2
| m_GENERIC
),
1068 /* X86_TUNE_USE_MOV0 */
1071 /* X86_TUNE_USE_CLTD */
1072 ~(m_PENT
| m_K6
| m_CORE2
| m_GENERIC
),
1074 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1077 /* X86_TUNE_SPLIT_LONG_MOVES */
1080 /* X86_TUNE_READ_MODIFY_WRITE */
1083 /* X86_TUNE_READ_MODIFY */
1086 /* X86_TUNE_PROMOTE_QIMODE */
1087 m_K6_GEODE
| m_PENT
| m_386
| m_486
| m_ATHLON_K8_AMDFAM10
| m_CORE2
1088 | m_GENERIC
/* | m_PENT4 ? */,
1090 /* X86_TUNE_FAST_PREFIX */
1091 ~(m_PENT
| m_486
| m_386
),
1093 /* X86_TUNE_SINGLE_STRINGOP */
1094 m_386
| m_PENT4
| m_NOCONA
,
1096 /* X86_TUNE_QIMODE_MATH */
1099 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1100 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1101 might be considered for Generic32 if our scheme for avoiding partial
1102 stalls was more effective. */
1105 /* X86_TUNE_PROMOTE_QI_REGS */
1108 /* X86_TUNE_PROMOTE_HI_REGS */
1111 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1112 m_ATHLON_K8_AMDFAM10
| m_K6_GEODE
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1114 /* X86_TUNE_ADD_ESP_8 */
1115 m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_K6_GEODE
| m_386
1116 | m_486
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1118 /* X86_TUNE_SUB_ESP_4 */
1119 m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1121 /* X86_TUNE_SUB_ESP_8 */
1122 m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_386
| m_486
1123 | m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1125 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1126 for DFmode copies */
1127 ~(m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
1128 | m_GENERIC
| m_GEODE
),
1130 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1131 m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1133 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1134 conflict here in between PPro/Pentium4 based chips that thread 128bit
1135 SSE registers as single units versus K8 based chips that divide SSE
1136 registers to two 64bit halves. This knob promotes all store destinations
1137 to be 128bit to allow register renaming on 128bit SSE units, but usually
1138 results in one extra microop on 64bit SSE units. Experimental results
1139 shows that disabling this option on P4 brings over 20% SPECfp regression,
1140 while enabling it on K8 brings roughly 2.4% regression that can be partly
1141 masked by careful scheduling of moves. */
1142 m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
| m_AMDFAM10
,
1144 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1147 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1148 are resolved on SSE register parts instead of whole registers, so we may
1149 maintain just lower part of scalar values in proper format leaving the
1150 upper part undefined. */
1153 /* X86_TUNE_SSE_TYPELESS_STORES */
1154 m_ATHLON_K8_AMDFAM10
,
1156 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1157 m_PPRO
| m_PENT4
| m_NOCONA
,
1159 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1160 m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1162 /* X86_TUNE_PROLOGUE_USING_MOVE */
1163 m_ATHLON_K8
| m_PPRO
| m_CORE2
| m_GENERIC
,
1165 /* X86_TUNE_EPILOGUE_USING_MOVE */
1166 m_ATHLON_K8
| m_PPRO
| m_CORE2
| m_GENERIC
,
1168 /* X86_TUNE_SHIFT1 */
1171 /* X86_TUNE_USE_FFREEP */
1172 m_ATHLON_K8_AMDFAM10
,
1174 /* X86_TUNE_INTER_UNIT_MOVES */
1175 ~(m_ATHLON_K8_AMDFAM10
| m_GENERIC
),
1177 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1178 than 4 branch instructions in the 16 byte window. */
1179 m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1181 /* X86_TUNE_SCHEDULE */
1182 m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_K6_GEODE
| m_PENT
| m_CORE2
| m_GENERIC
,
1184 /* X86_TUNE_USE_BT */
1185 m_ATHLON_K8_AMDFAM10
,
1187 /* X86_TUNE_USE_INCDEC */
1188 ~(m_PENT4
| m_NOCONA
| m_GENERIC
),
1190 /* X86_TUNE_PAD_RETURNS */
1191 m_ATHLON_K8_AMDFAM10
| m_CORE2
| m_GENERIC
,
1193 /* X86_TUNE_EXT_80387_CONSTANTS */
1194 m_K6_GEODE
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
,
1196 /* X86_TUNE_SHORTEN_X87_SSE */
1199 /* X86_TUNE_AVOID_VECTOR_DECODE */
1202 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1203 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1206 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1207 vector path on AMD machines. */
1208 m_K8
| m_GENERIC64
| m_AMDFAM10
,
1210 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1212 m_K8
| m_GENERIC64
| m_AMDFAM10
,
1214 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1218 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1219 but one byte longer. */
1222 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1223 operand that cannot be represented using a modRM byte. The XOR
1224 replacement is long decoded, so this split helps here as well. */
1228 /* Feature tests against the various architecture variations. */
1229 unsigned int ix86_arch_features
[X86_ARCH_LAST
] = {
1230 /* X86_ARCH_CMOVE */
1231 m_PPRO
| m_GEODE
| m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
,
1233 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1236 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1239 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1242 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1246 static const unsigned int x86_accumulate_outgoing_args
1247 = m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
;
1249 static const unsigned int x86_arch_always_fancy_math_387
1250 = m_PENT
| m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_PENT4
1251 | m_NOCONA
| m_CORE2
| m_GENERIC
;
1253 static enum stringop_alg stringop_alg
= no_stringop
;
1255 /* In case the average insn count for single function invocation is
1256 lower than this constant, emit fast (but longer) prologue and
1258 #define FAST_PROLOGUE_INSN_COUNT 20
1260 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1261 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
1262 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
1263 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
1265 /* Array of the smallest class containing reg number REGNO, indexed by
1266 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1268 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
1270 /* ax, dx, cx, bx */
1271 AREG
, DREG
, CREG
, BREG
,
1272 /* si, di, bp, sp */
1273 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
1275 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
1276 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
1279 /* flags, fpsr, fpcr, frame */
1280 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
1281 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
1283 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
1285 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
1286 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
1287 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
1291 /* The "default" register map used in 32bit mode. */
1293 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
1295 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1296 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1297 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1298 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1299 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1300 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1301 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1304 static int const x86_64_int_parameter_registers
[6] =
1306 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1307 FIRST_REX_INT_REG
/*R8 */, FIRST_REX_INT_REG
+ 1 /*R9 */
1310 static int const x86_64_ms_abi_int_parameter_registers
[4] =
1312 2 /*RCX*/, 1 /*RDX*/,
1313 FIRST_REX_INT_REG
/*R8 */, FIRST_REX_INT_REG
+ 1 /*R9 */
1316 static int const x86_64_int_return_registers
[4] =
1318 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1321 /* The "default" register map used in 64bit mode. */
1322 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
1324 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1325 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1326 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1327 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1328 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1329 8,9,10,11,12,13,14,15, /* extended integer registers */
1330 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1333 /* Define the register numbers to be used in Dwarf debugging information.
1334 The SVR4 reference port C compiler uses the following register numbers
1335 in its Dwarf output code:
1336 0 for %eax (gcc regno = 0)
1337 1 for %ecx (gcc regno = 2)
1338 2 for %edx (gcc regno = 1)
1339 3 for %ebx (gcc regno = 3)
1340 4 for %esp (gcc regno = 7)
1341 5 for %ebp (gcc regno = 6)
1342 6 for %esi (gcc regno = 4)
1343 7 for %edi (gcc regno = 5)
1344 The following three DWARF register numbers are never generated by
1345 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1346 believes these numbers have these meanings.
1347 8 for %eip (no gcc equivalent)
1348 9 for %eflags (gcc regno = 17)
1349 10 for %trapno (no gcc equivalent)
1350 It is not at all clear how we should number the FP stack registers
1351 for the x86 architecture. If the version of SDB on x86/svr4 were
1352 a bit less brain dead with respect to floating-point then we would
1353 have a precedent to follow with respect to DWARF register numbers
1354 for x86 FP registers, but the SDB on x86/svr4 is so completely
1355 broken with respect to FP registers that it is hardly worth thinking
1356 of it as something to strive for compatibility with.
1357 The version of x86/svr4 SDB I have at the moment does (partially)
1358 seem to believe that DWARF register number 11 is associated with
1359 the x86 register %st(0), but that's about all. Higher DWARF
1360 register numbers don't seem to be associated with anything in
1361 particular, and even for DWARF regno 11, SDB only seems to under-
1362 stand that it should say that a variable lives in %st(0) (when
1363 asked via an `=' command) if we said it was in DWARF regno 11,
1364 but SDB still prints garbage when asked for the value of the
1365 variable in question (via a `/' command).
1366 (Also note that the labels SDB prints for various FP stack regs
1367 when doing an `x' command are all wrong.)
1368 Note that these problems generally don't affect the native SVR4
1369 C compiler because it doesn't allow the use of -O with -g and
1370 because when it is *not* optimizing, it allocates a memory
1371 location for each floating-point variable, and the memory
1372 location is what gets described in the DWARF AT_location
1373 attribute for the variable in question.
1374 Regardless of the severe mental illness of the x86/svr4 SDB, we
1375 do something sensible here and we use the following DWARF
1376 register numbers. Note that these are all stack-top-relative
1378 11 for %st(0) (gcc regno = 8)
1379 12 for %st(1) (gcc regno = 9)
1380 13 for %st(2) (gcc regno = 10)
1381 14 for %st(3) (gcc regno = 11)
1382 15 for %st(4) (gcc regno = 12)
1383 16 for %st(5) (gcc regno = 13)
1384 17 for %st(6) (gcc regno = 14)
1385 18 for %st(7) (gcc regno = 15)
1387 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
1389 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1390 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1391 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1392 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1393 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1394 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1395 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1398 /* Test and compare insns in i386.md store the information needed to
1399 generate branch and scc insns here. */
1401 rtx ix86_compare_op0
= NULL_RTX
;
1402 rtx ix86_compare_op1
= NULL_RTX
;
1403 rtx ix86_compare_emitted
= NULL_RTX
;
1405 /* Size of the register save area. */
1406 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1408 /* Define the structure for the machine field in struct function. */
1410 struct stack_local_entry
GTY(())
1412 unsigned short mode
;
1415 struct stack_local_entry
*next
;
1418 /* Structure describing stack frame layout.
1419 Stack grows downward:
1425 saved frame pointer if frame_pointer_needed
1426 <- HARD_FRAME_POINTER
1431 [va_arg registers] (
1432 > to_allocate <- FRAME_POINTER
1442 HOST_WIDE_INT frame
;
1444 int outgoing_arguments_size
;
1447 HOST_WIDE_INT to_allocate
;
1448 /* The offsets relative to ARG_POINTER. */
1449 HOST_WIDE_INT frame_pointer_offset
;
1450 HOST_WIDE_INT hard_frame_pointer_offset
;
1451 HOST_WIDE_INT stack_pointer_offset
;
1453 /* When save_regs_using_mov is set, emit prologue using
1454 move instead of push instructions. */
1455 bool save_regs_using_mov
;
1458 /* Code model option. */
1459 enum cmodel ix86_cmodel
;
1461 enum asm_dialect ix86_asm_dialect
= ASM_ATT
;
1463 enum tls_dialect ix86_tls_dialect
= TLS_DIALECT_GNU
;
1465 /* Which unit we are generating floating point math for. */
1466 enum fpmath_unit ix86_fpmath
;
1468 /* Which cpu are we scheduling for. */
1469 enum processor_type ix86_tune
;
1471 /* Which instruction set architecture to use. */
1472 enum processor_type ix86_arch
;
1474 /* true if sse prefetch instruction is not NOOP. */
1475 int x86_prefetch_sse
;
1477 /* ix86_regparm_string as a number */
1478 static int ix86_regparm
;
1480 /* -mstackrealign option */
1481 extern int ix86_force_align_arg_pointer
;
1482 static const char ix86_force_align_arg_pointer_string
[] = "force_align_arg_pointer";
1484 /* Preferred alignment for stack boundary in bits. */
1485 unsigned int ix86_preferred_stack_boundary
;
1487 /* Values 1-5: see jump.c */
1488 int ix86_branch_cost
;
1490 /* Variables which are this size or smaller are put in the data/bss
1491 or ldata/lbss sections. */
1493 int ix86_section_threshold
= 65536;
1495 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1496 char internal_label_prefix
[16];
1497 int internal_label_prefix_len
;
1499 /* Register class used for passing given 64bit part of the argument.
1500 These represent classes as documented by the PS ABI, with the exception
1501 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1502 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1504 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1505 whenever possible (upper half does contain padding). */
1506 enum x86_64_reg_class
1509 X86_64_INTEGER_CLASS
,
1510 X86_64_INTEGERSI_CLASS
,
1517 X86_64_COMPLEX_X87_CLASS
,
1520 static const char * const x86_64_reg_class_name
[] =
1522 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1523 "sseup", "x87", "x87up", "cplx87", "no"
1526 #define MAX_CLASSES 4
1528 /* Table of constants used by fldpi, fldln2, etc.... */
1529 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
1530 static bool ext_80387_constants_init
= 0;
1533 static struct machine_function
* ix86_init_machine_status (void);
1534 static rtx
ix86_function_value (tree
, tree
, bool);
1535 static int ix86_function_regparm (tree
, tree
);
1536 static void ix86_compute_frame_layout (struct ix86_frame
*);
1537 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode
,
1541 /* The svr4 ABI for the i386 says that records and unions are returned
1543 #ifndef DEFAULT_PCC_STRUCT_RETURN
1544 #define DEFAULT_PCC_STRUCT_RETURN 1
1547 /* Implement TARGET_HANDLE_OPTION. */
1550 ix86_handle_option (size_t code
, const char *arg ATTRIBUTE_UNUSED
, int value
)
1557 target_flags
&= ~MASK_3DNOW_A
;
1558 target_flags_explicit
|= MASK_3DNOW_A
;
1565 target_flags
&= ~(MASK_3DNOW
| MASK_3DNOW_A
);
1566 target_flags_explicit
|= MASK_3DNOW
| MASK_3DNOW_A
;
1573 target_flags
&= ~(MASK_SSE2
| MASK_SSE3
| MASK_SSSE3
1575 target_flags_explicit
|= (MASK_SSE2
| MASK_SSE3
| MASK_SSSE3
1583 target_flags
&= ~(MASK_SSE3
| MASK_SSSE3
| MASK_SSE4A
);
1584 target_flags_explicit
|= MASK_SSE3
| MASK_SSSE3
| MASK_SSE4A
;
1591 target_flags
&= ~(MASK_SSSE3
| MASK_SSE4A
);
1592 target_flags_explicit
|= MASK_SSSE3
| MASK_SSE4A
;
1599 target_flags
&= ~(MASK_SSE4_1
| MASK_SSE4A
);
1600 target_flags_explicit
|= MASK_SSE4_1
| MASK_SSE4A
;
1607 target_flags
&= ~MASK_SSE4A
;
1608 target_flags_explicit
|= MASK_SSE4A
;
1615 target_flags
&= ~MASK_SSE4_1
;
1616 target_flags_explicit
|= MASK_SSE4_1
;
1625 /* Sometimes certain combinations of command options do not make
1626 sense on a particular target machine. You can define a macro
1627 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1628 defined, is executed once just after all the command options have
1631 Don't use this macro to turn on various extra optimizations for
1632 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1635 override_options (void)
1638 int ix86_tune_defaulted
= 0;
1639 unsigned int ix86_arch_mask
, ix86_tune_mask
;
1641 /* Comes from final.c -- no real reason to change it. */
1642 #define MAX_CODE_ALIGN 16
1646 const struct processor_costs
*cost
; /* Processor costs */
1647 const int target_enable
; /* Target flags to enable. */
1648 const int target_disable
; /* Target flags to disable. */
1649 const int align_loop
; /* Default alignments. */
1650 const int align_loop_max_skip
;
1651 const int align_jump
;
1652 const int align_jump_max_skip
;
1653 const int align_func
;
1655 const processor_target_table
[PROCESSOR_max
] =
1657 {&i386_cost
, 0, 0, 4, 3, 4, 3, 4},
1658 {&i486_cost
, 0, 0, 16, 15, 16, 15, 16},
1659 {&pentium_cost
, 0, 0, 16, 7, 16, 7, 16},
1660 {&pentiumpro_cost
, 0, 0, 16, 15, 16, 7, 16},
1661 {&geode_cost
, 0, 0, 0, 0, 0, 0, 0},
1662 {&k6_cost
, 0, 0, 32, 7, 32, 7, 32},
1663 {&athlon_cost
, 0, 0, 16, 7, 16, 7, 16},
1664 {&pentium4_cost
, 0, 0, 0, 0, 0, 0, 0},
1665 {&k8_cost
, 0, 0, 16, 7, 16, 7, 16},
1666 {&nocona_cost
, 0, 0, 0, 0, 0, 0, 0},
1667 {&core2_cost
, 0, 0, 16, 7, 16, 7, 16},
1668 {&generic32_cost
, 0, 0, 16, 7, 16, 7, 16},
1669 {&generic64_cost
, 0, 0, 16, 7, 16, 7, 16},
1670 {&amdfam10_cost
, 0, 0, 32, 24, 32, 7, 32}
1673 static const char * const cpu_names
[] = TARGET_CPU_DEFAULT_NAMES
;
1676 const char *const name
; /* processor name or nickname. */
1677 const enum processor_type processor
;
1678 const enum pta_flags
1684 PTA_PREFETCH_SSE
= 1 << 4,
1686 PTA_3DNOW_A
= 1 << 6,
1690 PTA_POPCNT
= 1 << 10,
1692 PTA_SSE4A
= 1 << 12,
1693 PTA_NO_SAHF
= 1 << 13,
1694 PTA_SSE4_1
= 1 << 14
1697 const processor_alias_table
[] =
1699 {"i386", PROCESSOR_I386
, 0},
1700 {"i486", PROCESSOR_I486
, 0},
1701 {"i586", PROCESSOR_PENTIUM
, 0},
1702 {"pentium", PROCESSOR_PENTIUM
, 0},
1703 {"pentium-mmx", PROCESSOR_PENTIUM
, PTA_MMX
},
1704 {"winchip-c6", PROCESSOR_I486
, PTA_MMX
},
1705 {"winchip2", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1706 {"c3", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1707 {"c3-2", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_SSE
},
1708 {"i686", PROCESSOR_PENTIUMPRO
, 0},
1709 {"pentiumpro", PROCESSOR_PENTIUMPRO
, 0},
1710 {"pentium2", PROCESSOR_PENTIUMPRO
, PTA_MMX
},
1711 {"pentium3", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1712 {"pentium3m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1713 {"pentium-m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
| PTA_SSE2
},
1714 {"pentium4", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1715 | PTA_MMX
| PTA_PREFETCH_SSE
},
1716 {"pentium4m", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1717 | PTA_MMX
| PTA_PREFETCH_SSE
},
1718 {"prescott", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
1719 | PTA_MMX
| PTA_PREFETCH_SSE
},
1720 {"nocona", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_64BIT
1721 | PTA_MMX
| PTA_PREFETCH_SSE
1722 | PTA_CX16
| PTA_NO_SAHF
},
1723 {"core2", PROCESSOR_CORE2
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_SSSE3
1724 | PTA_64BIT
| PTA_MMX
1725 | PTA_PREFETCH_SSE
| PTA_CX16
},
1726 {"geode", PROCESSOR_GEODE
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1728 {"k6", PROCESSOR_K6
, PTA_MMX
},
1729 {"k6-2", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1730 {"k6-3", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1731 {"athlon", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1733 {"athlon-tbird", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
1734 | PTA_3DNOW
| PTA_3DNOW_A
},
1735 {"athlon-4", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1736 | PTA_3DNOW_A
| PTA_SSE
},
1737 {"athlon-xp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1738 | PTA_3DNOW_A
| PTA_SSE
},
1739 {"athlon-mp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1740 | PTA_3DNOW_A
| PTA_SSE
},
1741 {"x86-64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_64BIT
1742 | PTA_SSE
| PTA_SSE2
| PTA_NO_SAHF
},
1743 {"k8", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1744 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
1746 {"k8-sse3", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1747 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
1748 | PTA_SSE3
| PTA_NO_SAHF
},
1749 {"opteron", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1750 | PTA_64BIT
| PTA_3DNOW_A
| PTA_SSE
1751 | PTA_SSE2
| PTA_NO_SAHF
},
1752 {"opteron-sse3", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1753 | PTA_64BIT
| PTA_3DNOW_A
| PTA_SSE
1754 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
1755 {"athlon64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1756 | PTA_64BIT
| PTA_3DNOW_A
| PTA_SSE
1757 | PTA_SSE2
| PTA_NO_SAHF
},
1758 {"athlon64-sse3", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1759 | PTA_64BIT
| PTA_3DNOW_A
| PTA_SSE
1760 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
1761 {"athlon-fx", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1762 | PTA_64BIT
| PTA_3DNOW_A
| PTA_SSE
1763 | PTA_SSE2
| PTA_NO_SAHF
},
1764 {"amdfam10", PROCESSOR_AMDFAM10
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1765 | PTA_64BIT
| PTA_3DNOW_A
| PTA_SSE
1766 | PTA_SSE2
| PTA_SSE3
| PTA_POPCNT
1767 | PTA_ABM
| PTA_SSE4A
| PTA_CX16
},
1768 {"barcelona", PROCESSOR_AMDFAM10
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1769 | PTA_64BIT
| PTA_3DNOW_A
| PTA_SSE
1770 | PTA_SSE2
| PTA_SSE3
| PTA_POPCNT
1771 | PTA_ABM
| PTA_SSE4A
| PTA_CX16
},
1772 {"generic32", PROCESSOR_GENERIC32
, 0 /* flags are only used for -march switch. */ },
1773 {"generic64", PROCESSOR_GENERIC64
, PTA_64BIT
/* flags are only used for -march switch. */ },
1776 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
1778 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1779 SUBTARGET_OVERRIDE_OPTIONS
;
1782 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1783 SUBSUBTARGET_OVERRIDE_OPTIONS
;
1786 /* -fPIC is the default for x86_64. */
1787 if (TARGET_MACHO
&& TARGET_64BIT
)
1790 /* Set the default values for switches whose default depends on TARGET_64BIT
1791 in case they weren't overwritten by command line options. */
1794 /* Mach-O doesn't support omitting the frame pointer for now. */
1795 if (flag_omit_frame_pointer
== 2)
1796 flag_omit_frame_pointer
= (TARGET_MACHO
? 0 : 1);
1797 if (flag_asynchronous_unwind_tables
== 2)
1798 flag_asynchronous_unwind_tables
= 1;
1799 if (flag_pcc_struct_return
== 2)
1800 flag_pcc_struct_return
= 0;
1804 if (flag_omit_frame_pointer
== 2)
1805 flag_omit_frame_pointer
= 0;
1806 if (flag_asynchronous_unwind_tables
== 2)
1807 flag_asynchronous_unwind_tables
= 0;
1808 if (flag_pcc_struct_return
== 2)
1809 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
1812 /* Need to check -mtune=generic first. */
1813 if (ix86_tune_string
)
1815 if (!strcmp (ix86_tune_string
, "generic")
1816 || !strcmp (ix86_tune_string
, "i686")
1817 /* As special support for cross compilers we read -mtune=native
1818 as -mtune=generic. With native compilers we won't see the
1819 -mtune=native, as it was changed by the driver. */
1820 || !strcmp (ix86_tune_string
, "native"))
1823 ix86_tune_string
= "generic64";
1825 ix86_tune_string
= "generic32";
1827 else if (!strncmp (ix86_tune_string
, "generic", 7))
1828 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
1832 if (ix86_arch_string
)
1833 ix86_tune_string
= ix86_arch_string
;
1834 if (!ix86_tune_string
)
1836 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
1837 ix86_tune_defaulted
= 1;
1840 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1841 need to use a sensible tune option. */
1842 if (!strcmp (ix86_tune_string
, "generic")
1843 || !strcmp (ix86_tune_string
, "x86-64")
1844 || !strcmp (ix86_tune_string
, "i686"))
1847 ix86_tune_string
= "generic64";
1849 ix86_tune_string
= "generic32";
1852 if (ix86_stringop_string
)
1854 if (!strcmp (ix86_stringop_string
, "rep_byte"))
1855 stringop_alg
= rep_prefix_1_byte
;
1856 else if (!strcmp (ix86_stringop_string
, "libcall"))
1857 stringop_alg
= libcall
;
1858 else if (!strcmp (ix86_stringop_string
, "rep_4byte"))
1859 stringop_alg
= rep_prefix_4_byte
;
1860 else if (!strcmp (ix86_stringop_string
, "rep_8byte"))
1861 stringop_alg
= rep_prefix_8_byte
;
1862 else if (!strcmp (ix86_stringop_string
, "byte_loop"))
1863 stringop_alg
= loop_1_byte
;
1864 else if (!strcmp (ix86_stringop_string
, "loop"))
1865 stringop_alg
= loop
;
1866 else if (!strcmp (ix86_stringop_string
, "unrolled_loop"))
1867 stringop_alg
= unrolled_loop
;
1869 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string
);
1871 if (!strcmp (ix86_tune_string
, "x86-64"))
1872 warning (OPT_Wdeprecated
, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1873 "-mtune=generic instead as appropriate.");
1875 if (!ix86_arch_string
)
1876 ix86_arch_string
= TARGET_64BIT
? "x86-64" : "i386";
1877 if (!strcmp (ix86_arch_string
, "generic"))
1878 error ("generic CPU can be used only for -mtune= switch");
1879 if (!strncmp (ix86_arch_string
, "generic", 7))
1880 error ("bad value (%s) for -march= switch", ix86_arch_string
);
1882 if (ix86_cmodel_string
!= 0)
1884 if (!strcmp (ix86_cmodel_string
, "small"))
1885 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
1886 else if (!strcmp (ix86_cmodel_string
, "medium"))
1887 ix86_cmodel
= flag_pic
? CM_MEDIUM_PIC
: CM_MEDIUM
;
1888 else if (!strcmp (ix86_cmodel_string
, "large"))
1889 ix86_cmodel
= flag_pic
? CM_LARGE_PIC
: CM_LARGE
;
1891 error ("code model %s does not support PIC mode", ix86_cmodel_string
);
1892 else if (!strcmp (ix86_cmodel_string
, "32"))
1893 ix86_cmodel
= CM_32
;
1894 else if (!strcmp (ix86_cmodel_string
, "kernel") && !flag_pic
)
1895 ix86_cmodel
= CM_KERNEL
;
1897 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string
);
1901 /* For TARGET_64BIT_MS_ABI, force pic on, in order to enable the
1902 use of rip-relative addressing. This eliminates fixups that
1903 would otherwise be needed if this object is to be placed in a
1904 DLL, and is essentially just as efficient as direct addressing. */
1905 if (TARGET_64BIT_MS_ABI
)
1906 ix86_cmodel
= CM_SMALL_PIC
, flag_pic
= 1;
1907 else if (TARGET_64BIT
)
1908 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
1910 ix86_cmodel
= CM_32
;
1912 if (ix86_asm_string
!= 0)
1915 && !strcmp (ix86_asm_string
, "intel"))
1916 ix86_asm_dialect
= ASM_INTEL
;
1917 else if (!strcmp (ix86_asm_string
, "att"))
1918 ix86_asm_dialect
= ASM_ATT
;
1920 error ("bad value (%s) for -masm= switch", ix86_asm_string
);
1922 if ((TARGET_64BIT
== 0) != (ix86_cmodel
== CM_32
))
1923 error ("code model %qs not supported in the %s bit mode",
1924 ix86_cmodel_string
, TARGET_64BIT
? "64" : "32");
1925 if ((TARGET_64BIT
!= 0) != ((target_flags
& MASK_64BIT
) != 0))
1926 sorry ("%i-bit mode not compiled in",
1927 (target_flags
& MASK_64BIT
) ? 64 : 32);
1929 for (i
= 0; i
< pta_size
; i
++)
1930 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
1932 ix86_arch
= processor_alias_table
[i
].processor
;
1933 /* Default cpu tuning to the architecture. */
1934 ix86_tune
= ix86_arch
;
1935 if (processor_alias_table
[i
].flags
& PTA_MMX
1936 && !(target_flags_explicit
& MASK_MMX
))
1937 target_flags
|= MASK_MMX
;
1938 if (processor_alias_table
[i
].flags
& PTA_3DNOW
1939 && !(target_flags_explicit
& MASK_3DNOW
))
1940 target_flags
|= MASK_3DNOW
;
1941 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
1942 && !(target_flags_explicit
& MASK_3DNOW_A
))
1943 target_flags
|= MASK_3DNOW_A
;
1944 if (processor_alias_table
[i
].flags
& PTA_SSE
1945 && !(target_flags_explicit
& MASK_SSE
))
1946 target_flags
|= MASK_SSE
;
1947 if (processor_alias_table
[i
].flags
& PTA_SSE2
1948 && !(target_flags_explicit
& MASK_SSE2
))
1949 target_flags
|= MASK_SSE2
;
1950 if (processor_alias_table
[i
].flags
& PTA_SSE3
1951 && !(target_flags_explicit
& MASK_SSE3
))
1952 target_flags
|= MASK_SSE3
;
1953 if (processor_alias_table
[i
].flags
& PTA_SSSE3
1954 && !(target_flags_explicit
& MASK_SSSE3
))
1955 target_flags
|= MASK_SSSE3
;
1956 if (processor_alias_table
[i
].flags
& PTA_SSE4_1
1957 && !(target_flags_explicit
& MASK_SSE4_1
))
1958 target_flags
|= MASK_SSE4_1
;
1959 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
1960 x86_prefetch_sse
= true;
1961 if (processor_alias_table
[i
].flags
& PTA_CX16
)
1962 x86_cmpxchg16b
= true;
1963 if (processor_alias_table
[i
].flags
& PTA_POPCNT
1964 && !(target_flags_explicit
& MASK_POPCNT
))
1965 target_flags
|= MASK_POPCNT
;
1966 if (processor_alias_table
[i
].flags
& PTA_ABM
1967 && !(target_flags_explicit
& MASK_ABM
))
1968 target_flags
|= MASK_ABM
;
1969 if (processor_alias_table
[i
].flags
& PTA_SSE4A
1970 && !(target_flags_explicit
& MASK_SSE4A
))
1971 target_flags
|= MASK_SSE4A
;
1972 if (!(TARGET_64BIT
&& (processor_alias_table
[i
].flags
& PTA_NO_SAHF
)))
1974 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
1975 error ("CPU you selected does not support x86-64 "
1981 error ("bad value (%s) for -march= switch", ix86_arch_string
);
1983 ix86_arch_mask
= 1u << ix86_arch
;
1984 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
1985 ix86_arch_features
[i
] &= ix86_arch_mask
;
1987 for (i
= 0; i
< pta_size
; i
++)
1988 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
1990 ix86_tune
= processor_alias_table
[i
].processor
;
1991 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
1993 if (ix86_tune_defaulted
)
1995 ix86_tune_string
= "x86-64";
1996 for (i
= 0; i
< pta_size
; i
++)
1997 if (! strcmp (ix86_tune_string
,
1998 processor_alias_table
[i
].name
))
2000 ix86_tune
= processor_alias_table
[i
].processor
;
2003 error ("CPU you selected does not support x86-64 "
2006 /* Intel CPUs have always interpreted SSE prefetch instructions as
2007 NOPs; so, we can enable SSE prefetch instructions even when
2008 -mtune (rather than -march) points us to a processor that has them.
2009 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2010 higher processors. */
2011 if (TARGET_CMOVE
&& (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
))
2012 x86_prefetch_sse
= true;
2016 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
2018 ix86_tune_mask
= 1u << ix86_tune
;
2019 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
2020 ix86_tune_features
[i
] &= ix86_tune_mask
;
2023 ix86_cost
= &size_cost
;
2025 ix86_cost
= processor_target_table
[ix86_tune
].cost
;
2026 target_flags
|= processor_target_table
[ix86_tune
].target_enable
;
2027 target_flags
&= ~processor_target_table
[ix86_tune
].target_disable
;
2029 /* Arrange to set up i386_stack_locals for all functions. */
2030 init_machine_status
= ix86_init_machine_status
;
2032 /* Validate -mregparm= value. */
2033 if (ix86_regparm_string
)
2036 warning (0, "-mregparm is ignored in 64-bit mode");
2037 i
= atoi (ix86_regparm_string
);
2038 if (i
< 0 || i
> REGPARM_MAX
)
2039 error ("-mregparm=%d is not between 0 and %d", i
, REGPARM_MAX
);
2044 ix86_regparm
= REGPARM_MAX
;
2046 /* If the user has provided any of the -malign-* options,
2047 warn and use that value only if -falign-* is not set.
2048 Remove this code in GCC 3.2 or later. */
2049 if (ix86_align_loops_string
)
2051 warning (0, "-malign-loops is obsolete, use -falign-loops");
2052 if (align_loops
== 0)
2054 i
= atoi (ix86_align_loops_string
);
2055 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2056 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2058 align_loops
= 1 << i
;
2062 if (ix86_align_jumps_string
)
2064 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2065 if (align_jumps
== 0)
2067 i
= atoi (ix86_align_jumps_string
);
2068 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2069 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2071 align_jumps
= 1 << i
;
2075 if (ix86_align_funcs_string
)
2077 warning (0, "-malign-functions is obsolete, use -falign-functions");
2078 if (align_functions
== 0)
2080 i
= atoi (ix86_align_funcs_string
);
2081 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2082 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2084 align_functions
= 1 << i
;
2088 /* Default align_* from the processor table. */
2089 if (align_loops
== 0)
2091 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
2092 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
2094 if (align_jumps
== 0)
2096 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
2097 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
2099 if (align_functions
== 0)
2101 align_functions
= processor_target_table
[ix86_tune
].align_func
;
2104 /* Validate -mbranch-cost= value, or provide default. */
2105 ix86_branch_cost
= ix86_cost
->branch_cost
;
2106 if (ix86_branch_cost_string
)
2108 i
= atoi (ix86_branch_cost_string
);
2110 error ("-mbranch-cost=%d is not between 0 and 5", i
);
2112 ix86_branch_cost
= i
;
2114 if (ix86_section_threshold_string
)
2116 i
= atoi (ix86_section_threshold_string
);
2118 error ("-mlarge-data-threshold=%d is negative", i
);
2120 ix86_section_threshold
= i
;
2123 if (ix86_tls_dialect_string
)
2125 if (strcmp (ix86_tls_dialect_string
, "gnu") == 0)
2126 ix86_tls_dialect
= TLS_DIALECT_GNU
;
2127 else if (strcmp (ix86_tls_dialect_string
, "gnu2") == 0)
2128 ix86_tls_dialect
= TLS_DIALECT_GNU2
;
2129 else if (strcmp (ix86_tls_dialect_string
, "sun") == 0)
2130 ix86_tls_dialect
= TLS_DIALECT_SUN
;
2132 error ("bad value (%s) for -mtls-dialect= switch",
2133 ix86_tls_dialect_string
);
2136 if (ix87_precision_string
)
2138 i
= atoi (ix87_precision_string
);
2139 if (i
!= 32 && i
!= 64 && i
!= 80)
2140 error ("pc%d is not valid precision setting (32, 64 or 80)", i
);
2143 /* Keep nonleaf frame pointers. */
2144 if (flag_omit_frame_pointer
)
2145 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
2146 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
2147 flag_omit_frame_pointer
= 1;
2149 /* If we're doing fast math, we don't care about comparison order
2150 wrt NaNs. This lets us use a shorter comparison sequence. */
2151 if (flag_finite_math_only
)
2152 target_flags
&= ~MASK_IEEE_FP
;
2154 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2155 since the insns won't need emulation. */
2156 if (x86_arch_always_fancy_math_387
& ix86_arch_mask
)
2157 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
2159 /* Likewise, if the target doesn't have a 387, or we've specified
2160 software floating point, don't use 387 inline intrinsics. */
2162 target_flags
|= MASK_NO_FANCY_MATH_387
;
2164 /* Turn on SSSE3 builtins for -msse4.1. */
2166 target_flags
|= MASK_SSSE3
;
2168 /* Turn on SSE3 builtins for -mssse3. */
2170 target_flags
|= MASK_SSE3
;
2172 /* Turn on SSE3 builtins for -msse4a. */
2174 target_flags
|= MASK_SSE3
;
2176 /* Turn on SSE2 builtins for -msse3. */
2178 target_flags
|= MASK_SSE2
;
2180 /* Turn on SSE builtins for -msse2. */
2182 target_flags
|= MASK_SSE
;
2184 /* Turn on MMX builtins for -msse. */
2187 target_flags
|= MASK_MMX
& ~target_flags_explicit
;
2188 x86_prefetch_sse
= true;
2191 /* Turn on MMX builtins for 3Dnow. */
2193 target_flags
|= MASK_MMX
;
2195 /* Turn on POPCNT builtins for -mabm. */
2197 target_flags
|= MASK_POPCNT
;
2202 warning (0, "-mrtd is ignored in 64bit mode");
2204 /* Enable by default the SSE and MMX builtins. Do allow the user to
2205 explicitly disable any of these. In particular, disabling SSE and
2206 MMX for kernel code is extremely useful. */
2208 |= ((MASK_SSE2
| MASK_SSE
| MASK_MMX
| TARGET_SUBTARGET64_DEFAULT
)
2209 & ~target_flags_explicit
);
2213 /* i386 ABI does not specify red zone. It still makes sense to use it
2214 when programmer takes care to stack from being destroyed. */
2215 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
2216 target_flags
|= MASK_NO_RED_ZONE
;
2219 /* Validate -mpreferred-stack-boundary= value, or provide default.
2220 The default of 128 bits is for Pentium III's SSE __m128. We can't
2221 change it because of optimize_size. Otherwise, we can't mix object
2222 files compiled with -Os and -On. */
2223 ix86_preferred_stack_boundary
= 128;
2224 if (ix86_preferred_stack_boundary_string
)
2226 i
= atoi (ix86_preferred_stack_boundary_string
);
2227 if (i
< (TARGET_64BIT
? 4 : 2) || i
> 12)
2228 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i
,
2229 TARGET_64BIT
? 4 : 2);
2231 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
2234 /* Accept -msseregparm only if at least SSE support is enabled. */
2235 if (TARGET_SSEREGPARM
2237 error ("-msseregparm used without SSE enabled");
2239 ix86_fpmath
= TARGET_FPMATH_DEFAULT
;
2240 if (ix86_fpmath_string
!= 0)
2242 if (! strcmp (ix86_fpmath_string
, "387"))
2243 ix86_fpmath
= FPMATH_387
;
2244 else if (! strcmp (ix86_fpmath_string
, "sse"))
2248 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2249 ix86_fpmath
= FPMATH_387
;
2252 ix86_fpmath
= FPMATH_SSE
;
2254 else if (! strcmp (ix86_fpmath_string
, "387,sse")
2255 || ! strcmp (ix86_fpmath_string
, "sse,387"))
2259 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2260 ix86_fpmath
= FPMATH_387
;
2262 else if (!TARGET_80387
)
2264 warning (0, "387 instruction set disabled, using SSE arithmetics");
2265 ix86_fpmath
= FPMATH_SSE
;
2268 ix86_fpmath
= FPMATH_SSE
| FPMATH_387
;
2271 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string
);
2274 /* If the i387 is disabled, then do not return values in it. */
2276 target_flags
&= ~MASK_FLOAT_RETURNS
;
2278 if ((x86_accumulate_outgoing_args
& ix86_tune_mask
)
2279 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
2281 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
2283 /* ??? Unwind info is not correct around the CFG unless either a frame
2284 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2285 unwind info generation to be aware of the CFG and propagating states
2287 if ((flag_unwind_tables
|| flag_asynchronous_unwind_tables
2288 || flag_exceptions
|| flag_non_call_exceptions
)
2289 && flag_omit_frame_pointer
2290 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
2292 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
2293 warning (0, "unwind tables currently require either a frame pointer "
2294 "or -maccumulate-outgoing-args for correctness");
2295 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
2298 /* For sane SSE instruction set generation we need fcomi instruction.
2299 It is safe to enable all CMOVE instructions. */
2303 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2306 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
2307 p
= strchr (internal_label_prefix
, 'X');
2308 internal_label_prefix_len
= p
- internal_label_prefix
;
2312 /* When scheduling description is not available, disable scheduler pass
2313 so it won't slow down the compilation and make x87 code slower. */
2314 if (!TARGET_SCHEDULE
)
2315 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
2317 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES
))
2318 set_param_value ("simultaneous-prefetches",
2319 ix86_cost
->simultaneous_prefetches
);
2320 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE
))
2321 set_param_value ("l1-cache-line-size", ix86_cost
->prefetch_block
);
2324 /* Return true if this goes in large data/bss. */
2327 ix86_in_large_data_p (tree exp
)
2329 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
2332 /* Functions are never large data. */
2333 if (TREE_CODE (exp
) == FUNCTION_DECL
)
2336 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
2338 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
2339 if (strcmp (section
, ".ldata") == 0
2340 || strcmp (section
, ".lbss") == 0)
2346 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
2348 /* If this is an incomplete type with size 0, then we can't put it
2349 in data because it might be too big when completed. */
2350 if (!size
|| size
> ix86_section_threshold
)
2357 /* Switch to the appropriate section for output of DECL.
2358 DECL is either a `VAR_DECL' node or a constant of some sort.
2359 RELOC indicates whether forming the initial value of DECL requires
2360 link-time relocations. */
2362 static section
* x86_64_elf_select_section (tree
, int, unsigned HOST_WIDE_INT
)
2366 x86_64_elf_select_section (tree decl
, int reloc
,
2367 unsigned HOST_WIDE_INT align
)
2369 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2370 && ix86_in_large_data_p (decl
))
2372 const char *sname
= NULL
;
2373 unsigned int flags
= SECTION_WRITE
;
2374 switch (categorize_decl_for_section (decl
, reloc
))
2379 case SECCAT_DATA_REL
:
2380 sname
= ".ldata.rel";
2382 case SECCAT_DATA_REL_LOCAL
:
2383 sname
= ".ldata.rel.local";
2385 case SECCAT_DATA_REL_RO
:
2386 sname
= ".ldata.rel.ro";
2388 case SECCAT_DATA_REL_RO_LOCAL
:
2389 sname
= ".ldata.rel.ro.local";
2393 flags
|= SECTION_BSS
;
2396 case SECCAT_RODATA_MERGE_STR
:
2397 case SECCAT_RODATA_MERGE_STR_INIT
:
2398 case SECCAT_RODATA_MERGE_CONST
:
2402 case SECCAT_SRODATA
:
2409 /* We don't split these for medium model. Place them into
2410 default sections and hope for best. */
2415 /* We might get called with string constants, but get_named_section
2416 doesn't like them as they are not DECLs. Also, we need to set
2417 flags in that case. */
2419 return get_section (sname
, flags
, NULL
);
2420 return get_named_section (decl
, sname
, reloc
);
2423 return default_elf_select_section (decl
, reloc
, align
);
2426 /* Build up a unique section name, expressed as a
2427 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2428 RELOC indicates whether the initial value of EXP requires
2429 link-time relocations. */
2431 static void ATTRIBUTE_UNUSED
2432 x86_64_elf_unique_section (tree decl
, int reloc
)
2434 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2435 && ix86_in_large_data_p (decl
))
2437 const char *prefix
= NULL
;
2438 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2439 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
2441 switch (categorize_decl_for_section (decl
, reloc
))
2444 case SECCAT_DATA_REL
:
2445 case SECCAT_DATA_REL_LOCAL
:
2446 case SECCAT_DATA_REL_RO
:
2447 case SECCAT_DATA_REL_RO_LOCAL
:
2448 prefix
= one_only
? ".gnu.linkonce.ld." : ".ldata.";
2451 prefix
= one_only
? ".gnu.linkonce.lb." : ".lbss.";
2454 case SECCAT_RODATA_MERGE_STR
:
2455 case SECCAT_RODATA_MERGE_STR_INIT
:
2456 case SECCAT_RODATA_MERGE_CONST
:
2457 prefix
= one_only
? ".gnu.linkonce.lr." : ".lrodata.";
2459 case SECCAT_SRODATA
:
2466 /* We don't split these for medium model. Place them into
2467 default sections and hope for best. */
2475 plen
= strlen (prefix
);
2477 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
2478 name
= targetm
.strip_name_encoding (name
);
2479 nlen
= strlen (name
);
2481 string
= alloca (nlen
+ plen
+ 1);
2482 memcpy (string
, prefix
, plen
);
2483 memcpy (string
+ plen
, name
, nlen
+ 1);
2485 DECL_SECTION_NAME (decl
) = build_string (nlen
+ plen
, string
);
2489 default_unique_section (decl
, reloc
);
2492 #ifdef COMMON_ASM_OP
2493 /* This says how to output assembler code to declare an
2494 uninitialized external linkage data object.
2496 For medium model x86-64 we need to use .largecomm opcode for
2499 x86_elf_aligned_common (FILE *file
,
2500 const char *name
, unsigned HOST_WIDE_INT size
,
2503 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2504 && size
> (unsigned int)ix86_section_threshold
)
2505 fprintf (file
, ".largecomm\t");
2507 fprintf (file
, "%s", COMMON_ASM_OP
);
2508 assemble_name (file
, name
);
2509 fprintf (file
, ","HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
2510 size
, align
/ BITS_PER_UNIT
);
2514 /* Utility function for targets to use in implementing
2515 ASM_OUTPUT_ALIGNED_BSS. */
2518 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
2519 const char *name
, unsigned HOST_WIDE_INT size
,
2522 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2523 && size
> (unsigned int)ix86_section_threshold
)
2524 switch_to_section (get_named_section (decl
, ".lbss", 0));
2526 switch_to_section (bss_section
);
2527 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
2528 #ifdef ASM_DECLARE_OBJECT_NAME
2529 last_assemble_variable_decl
= decl
;
2530 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
2532 /* Standard thing is just output label for the object. */
2533 ASM_OUTPUT_LABEL (file
, name
);
2534 #endif /* ASM_DECLARE_OBJECT_NAME */
2535 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
2539 optimization_options (int level
, int size ATTRIBUTE_UNUSED
)
2541 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2542 make the problem with not enough registers even worse. */
2543 #ifdef INSN_SCHEDULING
2545 flag_schedule_insns
= 0;
2549 /* The Darwin libraries never set errno, so we might as well
2550 avoid calling them when that's the only reason we would. */
2551 flag_errno_math
= 0;
2553 /* The default values of these switches depend on the TARGET_64BIT
2554 that is not known at this moment. Mark these values with 2 and
2555 let user the to override these. In case there is no command line option
2556 specifying them, we will set the defaults in override_options. */
2558 flag_omit_frame_pointer
= 2;
2559 flag_pcc_struct_return
= 2;
2560 flag_asynchronous_unwind_tables
= 2;
2561 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2562 SUBTARGET_OPTIMIZATION_OPTIONS
;
2566 /* Decide whether we can make a sibling call to a function. DECL is the
2567 declaration of the function being targeted by the call and EXP is the
2568 CALL_EXPR representing the call. */
2571 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
2576 /* If we are generating position-independent code, we cannot sibcall
2577 optimize any indirect call, or a direct call to a global function,
2578 as the PLT requires %ebx be live. */
2579 if (!TARGET_64BIT
&& flag_pic
&& (!decl
|| !targetm
.binds_local_p (decl
)))
2586 func
= TREE_TYPE (CALL_EXPR_FN (exp
));
2587 if (POINTER_TYPE_P (func
))
2588 func
= TREE_TYPE (func
);
2591 /* Check that the return value locations are the same. Like
2592 if we are returning floats on the 80387 register stack, we cannot
2593 make a sibcall from a function that doesn't return a float to a
2594 function that does or, conversely, from a function that does return
2595 a float to a function that doesn't; the necessary stack adjustment
2596 would not be executed. This is also the place we notice
2597 differences in the return value ABI. Note that it is ok for one
2598 of the functions to have void return type as long as the return
2599 value of the other is passed in a register. */
2600 a
= ix86_function_value (TREE_TYPE (exp
), func
, false);
2601 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
2603 if (STACK_REG_P (a
) || STACK_REG_P (b
))
2605 if (!rtx_equal_p (a
, b
))
2608 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
2610 else if (!rtx_equal_p (a
, b
))
2613 /* If this call is indirect, we'll need to be able to use a call-clobbered
2614 register for the address of the target function. Make sure that all
2615 such registers are not used for passing parameters. */
2616 if (!decl
&& !TARGET_64BIT
)
2620 /* We're looking at the CALL_EXPR, we need the type of the function. */
2621 type
= CALL_EXPR_FN (exp
); /* pointer expression */
2622 type
= TREE_TYPE (type
); /* pointer type */
2623 type
= TREE_TYPE (type
); /* function type */
2625 if (ix86_function_regparm (type
, NULL
) >= 3)
2627 /* ??? Need to count the actual number of registers to be used,
2628 not the possible number of registers. Fix later. */
2633 /* Dllimport'd functions are also called indirectly. */
2634 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
2635 && decl
&& DECL_DLLIMPORT_P (decl
)
2636 && ix86_function_regparm (TREE_TYPE (decl
), NULL
) >= 3)
2639 /* If we forced aligned the stack, then sibcalling would unalign the
2640 stack, which may break the called function. */
2641 if (cfun
->machine
->force_align_arg_pointer
)
2644 /* Otherwise okay. That also includes certain types of indirect calls. */
2648 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2649 calling convention attributes;
2650 arguments as in struct attribute_spec.handler. */
2653 ix86_handle_cconv_attribute (tree
*node
, tree name
,
2655 int flags ATTRIBUTE_UNUSED
,
2658 if (TREE_CODE (*node
) != FUNCTION_TYPE
2659 && TREE_CODE (*node
) != METHOD_TYPE
2660 && TREE_CODE (*node
) != FIELD_DECL
2661 && TREE_CODE (*node
) != TYPE_DECL
)
2663 warning (OPT_Wattributes
, "%qs attribute only applies to functions",
2664 IDENTIFIER_POINTER (name
));
2665 *no_add_attrs
= true;
2669 /* Can combine regparm with all attributes but fastcall. */
2670 if (is_attribute_p ("regparm", name
))
2674 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2676 error ("fastcall and regparm attributes are not compatible");
2679 cst
= TREE_VALUE (args
);
2680 if (TREE_CODE (cst
) != INTEGER_CST
)
2682 warning (OPT_Wattributes
,
2683 "%qs attribute requires an integer constant argument",
2684 IDENTIFIER_POINTER (name
));
2685 *no_add_attrs
= true;
2687 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
2689 warning (OPT_Wattributes
, "argument to %qs attribute larger than %d",
2690 IDENTIFIER_POINTER (name
), REGPARM_MAX
);
2691 *no_add_attrs
= true;
2695 && lookup_attribute (ix86_force_align_arg_pointer_string
,
2696 TYPE_ATTRIBUTES (*node
))
2697 && compare_tree_int (cst
, REGPARM_MAX
-1))
2699 error ("%s functions limited to %d register parameters",
2700 ix86_force_align_arg_pointer_string
, REGPARM_MAX
-1);
2708 /* Do not warn when emulating the MS ABI. */
2709 if (!TARGET_64BIT_MS_ABI
)
2710 warning (OPT_Wattributes
, "%qs attribute ignored",
2711 IDENTIFIER_POINTER (name
));
2712 *no_add_attrs
= true;
2716 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2717 if (is_attribute_p ("fastcall", name
))
2719 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
2721 error ("fastcall and cdecl attributes are not compatible");
2723 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
2725 error ("fastcall and stdcall attributes are not compatible");
2727 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
2729 error ("fastcall and regparm attributes are not compatible");
2733 /* Can combine stdcall with fastcall (redundant), regparm and
2735 else if (is_attribute_p ("stdcall", name
))
2737 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
2739 error ("stdcall and cdecl attributes are not compatible");
2741 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2743 error ("stdcall and fastcall attributes are not compatible");
2747 /* Can combine cdecl with regparm and sseregparm. */
2748 else if (is_attribute_p ("cdecl", name
))
2750 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
2752 error ("stdcall and cdecl attributes are not compatible");
2754 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2756 error ("fastcall and cdecl attributes are not compatible");
2760 /* Can combine sseregparm with all attributes. */
2765 /* Return 0 if the attributes for two types are incompatible, 1 if they
2766 are compatible, and 2 if they are nearly compatible (which causes a
2767 warning to be generated). */
2770 ix86_comp_type_attributes (tree type1
, tree type2
)
2772 /* Check for mismatch of non-default calling convention. */
2773 const char *const rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
2775 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
2778 /* Check for mismatched fastcall/regparm types. */
2779 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1
))
2780 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2
)))
2781 || (ix86_function_regparm (type1
, NULL
)
2782 != ix86_function_regparm (type2
, NULL
)))
2785 /* Check for mismatched sseregparm types. */
2786 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1
))
2787 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2
)))
2790 /* Check for mismatched return types (cdecl vs stdcall). */
2791 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
2792 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
2798 /* Return the regparm value for a function with the indicated TYPE and DECL.
2799 DECL may be NULL when calling function indirectly
2800 or considering a libcall. */
2803 ix86_function_regparm (tree type
, tree decl
)
2806 int regparm
= ix86_regparm
;
2811 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
2813 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
2815 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
2818 /* Use register calling convention for local functions when possible. */
2819 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
2820 && flag_unit_at_a_time
&& !profile_flag
)
2822 struct cgraph_local_info
*i
= cgraph_local_info (decl
);
2825 int local_regparm
, globals
= 0, regno
;
2828 /* Make sure no regparm register is taken by a
2829 global register variable. */
2830 for (local_regparm
= 0; local_regparm
< 3; local_regparm
++)
2831 if (global_regs
[local_regparm
])
2834 /* We can't use regparm(3) for nested functions as these use
2835 static chain pointer in third argument. */
2836 if (local_regparm
== 3
2837 && decl_function_context (decl
)
2838 && !DECL_NO_STATIC_CHAIN (decl
))
2841 /* If the function realigns its stackpointer, the prologue will
2842 clobber %ecx. If we've already generated code for the callee,
2843 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
2844 scanning the attributes for the self-realigning property. */
2845 f
= DECL_STRUCT_FUNCTION (decl
);
2846 if (local_regparm
== 3
2847 && (f
? !!f
->machine
->force_align_arg_pointer
2848 : !!lookup_attribute (ix86_force_align_arg_pointer_string
,
2849 TYPE_ATTRIBUTES (TREE_TYPE (decl
)))))
2852 /* Each global register variable increases register preassure,
2853 so the more global reg vars there are, the smaller regparm
2854 optimization use, unless requested by the user explicitly. */
2855 for (regno
= 0; regno
< 6; regno
++)
2856 if (global_regs
[regno
])
2859 = globals
< local_regparm
? local_regparm
- globals
: 0;
2861 if (local_regparm
> regparm
)
2862 regparm
= local_regparm
;
2869 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2870 DFmode (2) arguments in SSE registers for a function with the
2871 indicated TYPE and DECL. DECL may be NULL when calling function
2872 indirectly or considering a libcall. Otherwise return 0. */
2875 ix86_function_sseregparm (tree type
, tree decl
)
2877 gcc_assert (!TARGET_64BIT
);
2879 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2880 by the sseregparm attribute. */
2881 if (TARGET_SSEREGPARM
2882 || (type
&& lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
2887 error ("Calling %qD with attribute sseregparm without "
2888 "SSE/SSE2 enabled", decl
);
2890 error ("Calling %qT with attribute sseregparm without "
2891 "SSE/SSE2 enabled", type
);
2898 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
2899 (and DFmode for SSE2) arguments in SSE registers. */
2900 if (decl
&& TARGET_SSE_MATH
&& flag_unit_at_a_time
&& !profile_flag
)
2902 struct cgraph_local_info
*i
= cgraph_local_info (decl
);
2904 return TARGET_SSE2
? 2 : 1;
2910 /* Return true if EAX is live at the start of the function. Used by
2911 ix86_expand_prologue to determine if we need special help before
2912 calling allocate_stack_worker. */
2915 ix86_eax_live_at_start_p (void)
2917 /* Cheat. Don't bother working forward from ix86_function_regparm
2918 to the function type to whether an actual argument is located in
2919 eax. Instead just look at cfg info, which is still close enough
2920 to correct at this point. This gives false positives for broken
2921 functions that might use uninitialized data that happens to be
2922 allocated in eax, but who cares? */
2923 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR
->il
.rtl
->global_live_at_end
, 0);
2926 /* Return true if TYPE has a variable argument list. */
2929 type_has_variadic_args_p (tree type
)
2931 tree n
, t
= TYPE_ARG_TYPES (type
);
2936 while ((n
= TREE_CHAIN (t
)) != NULL
)
2939 return TREE_VALUE (t
) != void_type_node
;
2942 /* Value is the number of bytes of arguments automatically
2943 popped when returning from a subroutine call.
2944 FUNDECL is the declaration node of the function (as a tree),
2945 FUNTYPE is the data type of the function (as a tree),
2946 or for a library call it is an identifier node for the subroutine name.
2947 SIZE is the number of bytes of arguments passed on the stack.
2949 On the 80386, the RTD insn may be used to pop them if the number
2950 of args is fixed, but if the number is variable then the caller
2951 must pop them all. RTD can't be used for library calls now
2952 because the library is compiled with the Unix compiler.
2953 Use of RTD is a selectable option, since it is incompatible with
2954 standard Unix calling sequences. If the option is not selected,
2955 the caller must always pop the args.
2957 The attribute stdcall is equivalent to RTD on a per module basis. */
2960 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
2964 /* None of the 64-bit ABIs pop arguments. */
2968 rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
2970 /* Cdecl functions override -mrtd, and never pop the stack. */
2971 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
)))
2973 /* Stdcall and fastcall functions will pop the stack if not
2975 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
))
2976 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype
)))
2979 if (rtd
&& ! type_has_variadic_args_p (funtype
))
2983 /* Lose any fake structure return argument if it is passed on the stack. */
2984 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
2985 && !KEEP_AGGREGATE_RETURN_POINTER
)
2987 int nregs
= ix86_function_regparm (funtype
, fundecl
);
2989 return GET_MODE_SIZE (Pmode
);
2995 /* Argument support functions. */
2997 /* Return true when register may be used to pass function parameters. */
2999 ix86_function_arg_regno_p (int regno
)
3002 const int *parm_regs
;
3007 return (regno
< REGPARM_MAX
3008 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
3010 return (regno
< REGPARM_MAX
3011 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
3012 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
3013 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
3014 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
3019 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
3024 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
3025 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
3029 /* RAX is used as hidden argument to va_arg functions. */
3030 if (!TARGET_64BIT_MS_ABI
&& regno
== 0)
3033 if (TARGET_64BIT_MS_ABI
)
3034 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
3036 parm_regs
= x86_64_int_parameter_registers
;
3037 for (i
= 0; i
< REGPARM_MAX
; i
++)
3038 if (regno
== parm_regs
[i
])
3043 /* Return if we do not know how to pass TYPE solely in registers. */
3046 ix86_must_pass_in_stack (enum machine_mode mode
, tree type
)
3048 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
3051 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3052 The layout_type routine is crafty and tries to trick us into passing
3053 currently unsupported vector types on the stack by using TImode. */
3054 return (!TARGET_64BIT
&& mode
== TImode
3055 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
3058 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3059 for a call to a function whose data type is FNTYPE.
3060 For a library call, FNTYPE is 0. */
3063 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
3064 tree fntype
, /* tree ptr for function decl */
3065 rtx libname
, /* SYMBOL_REF of library name or 0 */
3068 memset (cum
, 0, sizeof (*cum
));
3070 /* Set up the number of registers to use for passing arguments. */
3071 cum
->nregs
= ix86_regparm
;
3073 cum
->sse_nregs
= SSE_REGPARM_MAX
;
3075 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
3076 cum
->warn_sse
= true;
3077 cum
->warn_mmx
= true;
3078 cum
->maybe_vaarg
= (fntype
3079 ? (!TYPE_ARG_TYPES (fntype
)
3080 || type_has_variadic_args_p (fntype
))
3085 /* If there are variable arguments, then we won't pass anything
3086 in registers in 32-bit mode. */
3087 if (cum
->maybe_vaarg
)
3097 /* Use ecx and edx registers if function has fastcall attribute,
3098 else look for regparm information. */
3101 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)))
3107 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
3110 /* Set up the number of SSE registers used for passing SFmode
3111 and DFmode arguments. Warn for mismatching ABI. */
3112 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
);
3116 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3117 But in the case of vector types, it is some vector mode.
3119 When we have only some of our vector isa extensions enabled, then there
3120 are some modes for which vector_mode_supported_p is false. For these
3121 modes, the generic vector support in gcc will choose some non-vector mode
3122 in order to implement the type. By computing the natural mode, we'll
3123 select the proper ABI location for the operand and not depend on whatever
3124 the middle-end decides to do with these vector types. */
3126 static enum machine_mode
3127 type_natural_mode (tree type
)
3129 enum machine_mode mode
= TYPE_MODE (type
);
3131 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
3133 HOST_WIDE_INT size
= int_size_in_bytes (type
);
3134 if ((size
== 8 || size
== 16)
3135 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3136 && TYPE_VECTOR_SUBPARTS (type
) > 1)
3138 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
3140 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
3141 mode
= MIN_MODE_VECTOR_FLOAT
;
3143 mode
= MIN_MODE_VECTOR_INT
;
3145 /* Get the mode which has this inner mode and number of units. */
3146 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
3147 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
3148 && GET_MODE_INNER (mode
) == innermode
)
3158 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3159 this may not agree with the mode that the type system has chosen for the
3160 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3161 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3164 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
3169 if (orig_mode
!= BLKmode
)
3170 tmp
= gen_rtx_REG (orig_mode
, regno
);
3173 tmp
= gen_rtx_REG (mode
, regno
);
3174 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
3175 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
3181 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3182 of this code is to classify each 8bytes of incoming argument by the register
3183 class and assign registers accordingly. */
3185 /* Return the union class of CLASS1 and CLASS2.
3186 See the x86-64 PS ABI for details. */
3188 static enum x86_64_reg_class
3189 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
3191 /* Rule #1: If both classes are equal, this is the resulting class. */
3192 if (class1
== class2
)
3195 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3197 if (class1
== X86_64_NO_CLASS
)
3199 if (class2
== X86_64_NO_CLASS
)
3202 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3203 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
3204 return X86_64_MEMORY_CLASS
;
3206 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3207 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
3208 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
3209 return X86_64_INTEGERSI_CLASS
;
3210 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
3211 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
3212 return X86_64_INTEGER_CLASS
;
3214 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3216 if (class1
== X86_64_X87_CLASS
3217 || class1
== X86_64_X87UP_CLASS
3218 || class1
== X86_64_COMPLEX_X87_CLASS
3219 || class2
== X86_64_X87_CLASS
3220 || class2
== X86_64_X87UP_CLASS
3221 || class2
== X86_64_COMPLEX_X87_CLASS
)
3222 return X86_64_MEMORY_CLASS
;
3224 /* Rule #6: Otherwise class SSE is used. */
3225 return X86_64_SSE_CLASS
;
3228 /* Classify the argument of type TYPE and mode MODE.
3229 CLASSES will be filled by the register class used to pass each word
3230 of the operand. The number of words is returned. In case the parameter
3231 should be passed in memory, 0 is returned. As a special case for zero
3232 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3234 BIT_OFFSET is used internally for handling records and specifies offset
3235 of the offset in bits modulo 256 to avoid overflow cases.
3237 See the x86-64 PS ABI for details.
3241 classify_argument (enum machine_mode mode
, tree type
,
3242 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
3244 HOST_WIDE_INT bytes
=
3245 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3246 int words
= (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3248 /* Variable sized entities are always passed/returned in memory. */
3252 if (mode
!= VOIDmode
3253 && targetm
.calls
.must_pass_in_stack (mode
, type
))
3256 if (type
&& AGGREGATE_TYPE_P (type
))
3260 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
3262 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3266 for (i
= 0; i
< words
; i
++)
3267 classes
[i
] = X86_64_NO_CLASS
;
3269 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3270 signalize memory class, so handle it as special case. */
3273 classes
[0] = X86_64_NO_CLASS
;
3277 /* Classify each field of record and merge classes. */
3278 switch (TREE_CODE (type
))
3281 /* And now merge the fields of structure. */
3282 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3284 if (TREE_CODE (field
) == FIELD_DECL
)
3288 if (TREE_TYPE (field
) == error_mark_node
)
3291 /* Bitfields are always classified as integer. Handle them
3292 early, since later code would consider them to be
3293 misaligned integers. */
3294 if (DECL_BIT_FIELD (field
))
3296 for (i
= (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
3297 i
< ((int_bit_position (field
) + (bit_offset
% 64))
3298 + tree_low_cst (DECL_SIZE (field
), 0)
3301 merge_classes (X86_64_INTEGER_CLASS
,
3306 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
3307 TREE_TYPE (field
), subclasses
,
3308 (int_bit_position (field
)
3309 + bit_offset
) % 256);
3312 for (i
= 0; i
< num
; i
++)
3315 (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
3317 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
3325 /* Arrays are handled as small records. */
3328 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
3329 TREE_TYPE (type
), subclasses
, bit_offset
);
3333 /* The partial classes are now full classes. */
3334 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
3335 subclasses
[0] = X86_64_SSE_CLASS
;
3336 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
&& bytes
!= 4)
3337 subclasses
[0] = X86_64_INTEGER_CLASS
;
3339 for (i
= 0; i
< words
; i
++)
3340 classes
[i
] = subclasses
[i
% num
];
3345 case QUAL_UNION_TYPE
:
3346 /* Unions are similar to RECORD_TYPE but offset is always 0.
3348 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3350 if (TREE_CODE (field
) == FIELD_DECL
)
3354 if (TREE_TYPE (field
) == error_mark_node
)
3357 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
3358 TREE_TYPE (field
), subclasses
,
3362 for (i
= 0; i
< num
; i
++)
3363 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
3372 /* Final merger cleanup. */
3373 for (i
= 0; i
< words
; i
++)
3375 /* If one class is MEMORY, everything should be passed in
3377 if (classes
[i
] == X86_64_MEMORY_CLASS
)
3380 /* The X86_64_SSEUP_CLASS should be always preceded by
3381 X86_64_SSE_CLASS. */
3382 if (classes
[i
] == X86_64_SSEUP_CLASS
3383 && (i
== 0 || classes
[i
- 1] != X86_64_SSE_CLASS
))
3384 classes
[i
] = X86_64_SSE_CLASS
;
3386 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3387 if (classes
[i
] == X86_64_X87UP_CLASS
3388 && (i
== 0 || classes
[i
- 1] != X86_64_X87_CLASS
))
3389 classes
[i
] = X86_64_SSE_CLASS
;
3394 /* Compute alignment needed. We align all types to natural boundaries with
3395 exception of XFmode that is aligned to 64bits. */
3396 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
3398 int mode_alignment
= GET_MODE_BITSIZE (mode
);
3401 mode_alignment
= 128;
3402 else if (mode
== XCmode
)
3403 mode_alignment
= 256;
3404 if (COMPLEX_MODE_P (mode
))
3405 mode_alignment
/= 2;
3406 /* Misaligned fields are always returned in memory. */
3407 if (bit_offset
% mode_alignment
)
3411 /* for V1xx modes, just use the base mode */
3412 if (VECTOR_MODE_P (mode
)
3413 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
3414 mode
= GET_MODE_INNER (mode
);
3416 /* Classification of atomic types. */
3421 classes
[0] = X86_64_SSE_CLASS
;
3424 classes
[0] = X86_64_SSE_CLASS
;
3425 classes
[1] = X86_64_SSEUP_CLASS
;
3434 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
3435 classes
[0] = X86_64_INTEGERSI_CLASS
;
3437 classes
[0] = X86_64_INTEGER_CLASS
;
3441 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
3446 if (!(bit_offset
% 64))
3447 classes
[0] = X86_64_SSESF_CLASS
;
3449 classes
[0] = X86_64_SSE_CLASS
;
3452 classes
[0] = X86_64_SSEDF_CLASS
;
3455 classes
[0] = X86_64_X87_CLASS
;
3456 classes
[1] = X86_64_X87UP_CLASS
;
3459 classes
[0] = X86_64_SSE_CLASS
;
3460 classes
[1] = X86_64_SSEUP_CLASS
;
3463 classes
[0] = X86_64_SSE_CLASS
;
3466 classes
[0] = X86_64_SSEDF_CLASS
;
3467 classes
[1] = X86_64_SSEDF_CLASS
;
3470 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
3473 /* This modes is larger than 16 bytes. */
3481 classes
[0] = X86_64_SSE_CLASS
;
3482 classes
[1] = X86_64_SSEUP_CLASS
;
3488 classes
[0] = X86_64_SSE_CLASS
;
3494 gcc_assert (VECTOR_MODE_P (mode
));
3499 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
3501 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
3502 classes
[0] = X86_64_INTEGERSI_CLASS
;
3504 classes
[0] = X86_64_INTEGER_CLASS
;
3505 classes
[1] = X86_64_INTEGER_CLASS
;
3506 return 1 + (bytes
> 8);
3510 /* Examine the argument and return set number of register required in each
3511 class. Return 0 iff parameter should be passed in memory. */
3513 examine_argument (enum machine_mode mode
, tree type
, int in_return
,
3514 int *int_nregs
, int *sse_nregs
)
3516 enum x86_64_reg_class
class[MAX_CLASSES
];
3517 int n
= classify_argument (mode
, type
, class, 0);
3523 for (n
--; n
>= 0; n
--)
3526 case X86_64_INTEGER_CLASS
:
3527 case X86_64_INTEGERSI_CLASS
:
3530 case X86_64_SSE_CLASS
:
3531 case X86_64_SSESF_CLASS
:
3532 case X86_64_SSEDF_CLASS
:
3535 case X86_64_NO_CLASS
:
3536 case X86_64_SSEUP_CLASS
:
3538 case X86_64_X87_CLASS
:
3539 case X86_64_X87UP_CLASS
:
3543 case X86_64_COMPLEX_X87_CLASS
:
3544 return in_return
? 2 : 0;
3545 case X86_64_MEMORY_CLASS
:
3551 /* Construct container for the argument used by GCC interface. See
3552 FUNCTION_ARG for the detailed description. */
3555 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
3556 tree type
, int in_return
, int nintregs
, int nsseregs
,
3557 const int *intreg
, int sse_regno
)
3559 /* The following variables hold the static issued_error state. */
3560 static bool issued_sse_arg_error
;
3561 static bool issued_sse_ret_error
;
3562 static bool issued_x87_ret_error
;
3564 enum machine_mode tmpmode
;
3566 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3567 enum x86_64_reg_class
class[MAX_CLASSES
];
3571 int needed_sseregs
, needed_intregs
;
3572 rtx exp
[MAX_CLASSES
];
3575 n
= classify_argument (mode
, type
, class, 0);
3578 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
3581 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
3584 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3585 some less clueful developer tries to use floating-point anyway. */
3586 if (needed_sseregs
&& !TARGET_SSE
)
3590 if (!issued_sse_ret_error
)
3592 error ("SSE register return with SSE disabled");
3593 issued_sse_ret_error
= true;
3596 else if (!issued_sse_arg_error
)
3598 error ("SSE register argument with SSE disabled");
3599 issued_sse_arg_error
= true;
3604 /* Likewise, error if the ABI requires us to return values in the
3605 x87 registers and the user specified -mno-80387. */
3606 if (!TARGET_80387
&& in_return
)
3607 for (i
= 0; i
< n
; i
++)
3608 if (class[i
] == X86_64_X87_CLASS
3609 || class[i
] == X86_64_X87UP_CLASS
3610 || class[i
] == X86_64_COMPLEX_X87_CLASS
)
3612 if (!issued_x87_ret_error
)
3614 error ("x87 register return with x87 disabled");
3615 issued_x87_ret_error
= true;
3620 /* First construct simple cases. Avoid SCmode, since we want to use
3621 single register to pass this type. */
3622 if (n
== 1 && mode
!= SCmode
)
3625 case X86_64_INTEGER_CLASS
:
3626 case X86_64_INTEGERSI_CLASS
:
3627 return gen_rtx_REG (mode
, intreg
[0]);
3628 case X86_64_SSE_CLASS
:
3629 case X86_64_SSESF_CLASS
:
3630 case X86_64_SSEDF_CLASS
:
3631 return gen_reg_or_parallel (mode
, orig_mode
, SSE_REGNO (sse_regno
));
3632 case X86_64_X87_CLASS
:
3633 case X86_64_COMPLEX_X87_CLASS
:
3634 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
3635 case X86_64_NO_CLASS
:
3636 /* Zero sized array, struct or class. */
3641 if (n
== 2 && class[0] == X86_64_SSE_CLASS
&& class[1] == X86_64_SSEUP_CLASS
3643 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
3646 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
)
3647 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
3648 if (n
== 2 && class[0] == X86_64_INTEGER_CLASS
3649 && class[1] == X86_64_INTEGER_CLASS
3650 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
3651 && intreg
[0] + 1 == intreg
[1])
3652 return gen_rtx_REG (mode
, intreg
[0]);
3654 /* Otherwise figure out the entries of the PARALLEL. */
3655 for (i
= 0; i
< n
; i
++)
3659 case X86_64_NO_CLASS
:
3661 case X86_64_INTEGER_CLASS
:
3662 case X86_64_INTEGERSI_CLASS
:
3663 /* Merge TImodes on aligned occasions here too. */
3664 if (i
* 8 + 8 > bytes
)
3665 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
3666 else if (class[i
] == X86_64_INTEGERSI_CLASS
)
3670 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3671 if (tmpmode
== BLKmode
)
3673 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3674 gen_rtx_REG (tmpmode
, *intreg
),
3678 case X86_64_SSESF_CLASS
:
3679 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3680 gen_rtx_REG (SFmode
,
3681 SSE_REGNO (sse_regno
)),
3685 case X86_64_SSEDF_CLASS
:
3686 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3687 gen_rtx_REG (DFmode
,
3688 SSE_REGNO (sse_regno
)),
3692 case X86_64_SSE_CLASS
:
3693 if (i
< n
- 1 && class[i
+ 1] == X86_64_SSEUP_CLASS
)
3697 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3698 gen_rtx_REG (tmpmode
,
3699 SSE_REGNO (sse_regno
)),
3701 if (tmpmode
== TImode
)
3710 /* Empty aligned struct, union or class. */
3714 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
3715 for (i
= 0; i
< nexps
; i
++)
3716 XVECEXP (ret
, 0, i
) = exp
[i
];
3720 /* Update the data in CUM to advance over an argument of mode MODE
3721 and data type TYPE. (TYPE is null for libcalls where that information
3722 may not be available.) */
3725 function_arg_advance_32 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3726 tree type
, HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
3742 cum
->words
+= words
;
3743 cum
->nregs
-= words
;
3744 cum
->regno
+= words
;
3746 if (cum
->nregs
<= 0)
3754 if (cum
->float_in_sse
< 2)
3757 if (cum
->float_in_sse
< 1)
3768 if (!type
|| !AGGREGATE_TYPE_P (type
))
3770 cum
->sse_words
+= words
;
3771 cum
->sse_nregs
-= 1;
3772 cum
->sse_regno
+= 1;
3773 if (cum
->sse_nregs
<= 0)
3785 if (!type
|| !AGGREGATE_TYPE_P (type
))
3787 cum
->mmx_words
+= words
;
3788 cum
->mmx_nregs
-= 1;
3789 cum
->mmx_regno
+= 1;
3790 if (cum
->mmx_nregs
<= 0)
3801 function_arg_advance_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3802 tree type
, HOST_WIDE_INT words
)
3804 int int_nregs
, sse_nregs
;
3806 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
))
3807 cum
->words
+= words
;
3808 else if (sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
3810 cum
->nregs
-= int_nregs
;
3811 cum
->sse_nregs
-= sse_nregs
;
3812 cum
->regno
+= int_nregs
;
3813 cum
->sse_regno
+= sse_nregs
;
3816 cum
->words
+= words
;
3820 function_arg_advance_ms_64 (CUMULATIVE_ARGS
*cum
, HOST_WIDE_INT bytes
,
3821 HOST_WIDE_INT words
)
3823 /* Otherwise, this should be passed indirect. */
3824 gcc_assert (bytes
== 1 || bytes
== 2 || bytes
== 4 || bytes
== 8);
3826 cum
->words
+= words
;
3835 function_arg_advance (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3836 tree type
, int named ATTRIBUTE_UNUSED
)
3838 HOST_WIDE_INT bytes
, words
;
3840 if (mode
== BLKmode
)
3841 bytes
= int_size_in_bytes (type
);
3843 bytes
= GET_MODE_SIZE (mode
);
3844 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3847 mode
= type_natural_mode (type
);
3849 if (TARGET_64BIT_MS_ABI
)
3850 function_arg_advance_ms_64 (cum
, bytes
, words
);
3851 else if (TARGET_64BIT
)
3852 function_arg_advance_64 (cum
, mode
, type
, words
);
3854 function_arg_advance_32 (cum
, mode
, type
, bytes
, words
);
3857 /* Define where to put the arguments to a function.
3858 Value is zero to push the argument on the stack,
3859 or a hard register in which to store the argument.
3861 MODE is the argument's machine mode.
3862 TYPE is the data type of the argument (as a tree).
3863 This is null for libcalls where that information may
3865 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3866 the preceding args and about the function being called.
3867 NAMED is nonzero if this argument is a named parameter
3868 (otherwise it is an extra parameter matching an ellipsis). */
3871 function_arg_32 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3872 enum machine_mode orig_mode
, tree type
,
3873 HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
3875 static bool warnedsse
, warnedmmx
;
3877 /* Avoid the AL settings for the Unix64 ABI. */
3878 if (mode
== VOIDmode
)
3894 if (words
<= cum
->nregs
)
3896 int regno
= cum
->regno
;
3898 /* Fastcall allocates the first two DWORD (SImode) or
3899 smaller arguments to ECX and EDX. */
3902 if (mode
== BLKmode
|| mode
== DImode
)
3905 /* ECX not EAX is the first allocated register. */
3909 return gen_rtx_REG (mode
, regno
);
3914 if (cum
->float_in_sse
< 2)
3917 if (cum
->float_in_sse
< 1)
3927 if (!type
|| !AGGREGATE_TYPE_P (type
))
3929 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
3932 warning (0, "SSE vector argument without SSE enabled "
3936 return gen_reg_or_parallel (mode
, orig_mode
,
3937 cum
->sse_regno
+ FIRST_SSE_REG
);
3945 if (!type
|| !AGGREGATE_TYPE_P (type
))
3947 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
3950 warning (0, "MMX vector argument without MMX enabled "
3954 return gen_reg_or_parallel (mode
, orig_mode
,
3955 cum
->mmx_regno
+ FIRST_MMX_REG
);
3964 function_arg_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3965 enum machine_mode orig_mode
, tree type
)
3967 /* Handle a hidden AL argument containing number of registers
3968 for varargs x86-64 functions. */
3969 if (mode
== VOIDmode
)
3970 return GEN_INT (cum
->maybe_vaarg
3971 ? (cum
->sse_nregs
< 0
3976 return construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
3978 &x86_64_int_parameter_registers
[cum
->regno
],
3983 function_arg_ms_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3984 enum machine_mode orig_mode
, int named
)
3988 /* Avoid the AL settings for the Unix64 ABI. */
3989 if (mode
== VOIDmode
)
3992 /* If we've run out of registers, it goes on the stack. */
3993 if (cum
->nregs
== 0)
3996 regno
= x86_64_ms_abi_int_parameter_registers
[cum
->regno
];
3998 /* Only floating point modes are passed in anything but integer regs. */
3999 if (TARGET_SSE
&& (mode
== SFmode
|| mode
== DFmode
))
4002 regno
= cum
->regno
+ FIRST_SSE_REG
;
4007 /* Unnamed floating parameters are passed in both the
4008 SSE and integer registers. */
4009 t1
= gen_rtx_REG (mode
, cum
->regno
+ FIRST_SSE_REG
);
4010 t2
= gen_rtx_REG (mode
, regno
);
4011 t1
= gen_rtx_EXPR_LIST (VOIDmode
, t1
, const0_rtx
);
4012 t2
= gen_rtx_EXPR_LIST (VOIDmode
, t2
, const0_rtx
);
4013 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, t1
, t2
));
4017 return gen_reg_or_parallel (mode
, orig_mode
, regno
);
4021 function_arg (CUMULATIVE_ARGS
*cum
, enum machine_mode omode
,
4022 tree type
, int named
)
4024 enum machine_mode mode
= omode
;
4025 HOST_WIDE_INT bytes
, words
;
4027 if (mode
== BLKmode
)
4028 bytes
= int_size_in_bytes (type
);
4030 bytes
= GET_MODE_SIZE (mode
);
4031 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
4033 /* To simplify the code below, represent vector types with a vector mode
4034 even if MMX/SSE are not active. */
4035 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
4036 mode
= type_natural_mode (type
);
4038 if (TARGET_64BIT_MS_ABI
)
4039 return function_arg_ms_64 (cum
, mode
, omode
, named
);
4040 else if (TARGET_64BIT
)
4041 return function_arg_64 (cum
, mode
, omode
, type
);
4043 return function_arg_32 (cum
, mode
, omode
, type
, bytes
, words
);
4046 /* A C expression that indicates when an argument must be passed by
4047 reference. If nonzero for an argument, a copy of that argument is
4048 made in memory and a pointer to the argument is passed instead of
4049 the argument itself. The pointer is passed in whatever way is
4050 appropriate for passing a pointer to that type. */
4053 ix86_pass_by_reference (CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
,
4054 enum machine_mode mode ATTRIBUTE_UNUSED
,
4055 tree type
, bool named ATTRIBUTE_UNUSED
)
4057 if (TARGET_64BIT_MS_ABI
)
4061 /* Arrays are passed by reference. */
4062 if (TREE_CODE (type
) == ARRAY_TYPE
)
4065 if (AGGREGATE_TYPE_P (type
))
4067 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
4068 are passed by reference. */
4069 int el2
= exact_log2 (int_size_in_bytes (type
));
4070 return !(el2
>= 0 && el2
<= 3);
4074 /* __m128 is passed by reference. */
4075 /* ??? How to handle complex? For now treat them as structs,
4076 and pass them by reference if they're too large. */
4077 if (GET_MODE_SIZE (mode
) > 8)
4080 else if (TARGET_64BIT
&& type
&& int_size_in_bytes (type
) == -1)
4086 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4087 ABI. Only called if TARGET_SSE. */
4089 contains_128bit_aligned_vector_p (tree type
)
4091 enum machine_mode mode
= TYPE_MODE (type
);
4092 if (SSE_REG_MODE_P (mode
)
4093 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
4095 if (TYPE_ALIGN (type
) < 128)
4098 if (AGGREGATE_TYPE_P (type
))
4100 /* Walk the aggregates recursively. */
4101 switch (TREE_CODE (type
))
4105 case QUAL_UNION_TYPE
:
4109 /* Walk all the structure fields. */
4110 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
4112 if (TREE_CODE (field
) == FIELD_DECL
4113 && contains_128bit_aligned_vector_p (TREE_TYPE (field
)))
4120 /* Just for use if some languages passes arrays by value. */
4121 if (contains_128bit_aligned_vector_p (TREE_TYPE (type
)))
4132 /* Gives the alignment boundary, in bits, of an argument with the
4133 specified mode and type. */
4136 ix86_function_arg_boundary (enum machine_mode mode
, tree type
)
4140 align
= TYPE_ALIGN (type
);
4142 align
= GET_MODE_ALIGNMENT (mode
);
4143 if (align
< PARM_BOUNDARY
)
4144 align
= PARM_BOUNDARY
;
4147 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4148 make an exception for SSE modes since these require 128bit
4151 The handling here differs from field_alignment. ICC aligns MMX
4152 arguments to 4 byte boundaries, while structure fields are aligned
4153 to 8 byte boundaries. */
4155 align
= PARM_BOUNDARY
;
4158 if (!SSE_REG_MODE_P (mode
))
4159 align
= PARM_BOUNDARY
;
4163 if (!contains_128bit_aligned_vector_p (type
))
4164 align
= PARM_BOUNDARY
;
4172 /* Return true if N is a possible register number of function value. */
4175 ix86_function_value_regno_p (int regno
)
4182 case FIRST_FLOAT_REG
:
4183 if (TARGET_64BIT_MS_ABI
)
4185 return TARGET_FLOAT_RETURNS_IN_80387
;
4191 if (TARGET_MACHO
|| TARGET_64BIT
)
4199 /* Define how to find the value returned by a function.
4200 VALTYPE is the data type of the value (as a tree).
4201 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4202 otherwise, FUNC is 0. */
4205 function_value_32 (enum machine_mode orig_mode
, enum machine_mode mode
,
4206 tree fntype
, tree fn
)
4210 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4211 we normally prevent this case when mmx is not available. However
4212 some ABIs may require the result to be returned like DImode. */
4213 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
4214 regno
= TARGET_MMX
? FIRST_MMX_REG
: 0;
4216 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4217 we prevent this case when sse is not available. However some ABIs
4218 may require the result to be returned like integer TImode. */
4219 else if (mode
== TImode
4220 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
4221 regno
= TARGET_SSE
? FIRST_SSE_REG
: 0;
4223 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4224 else if (X87_FLOAT_MODE_P (mode
) && TARGET_FLOAT_RETURNS_IN_80387
)
4225 regno
= FIRST_FLOAT_REG
;
4227 /* Most things go in %eax. */
4230 /* Override FP return register with %xmm0 for local functions when
4231 SSE math is enabled or for functions with sseregparm attribute. */
4232 if ((fn
|| fntype
) && (mode
== SFmode
|| mode
== DFmode
))
4234 int sse_level
= ix86_function_sseregparm (fntype
, fn
);
4235 if ((sse_level
>= 1 && mode
== SFmode
)
4236 || (sse_level
== 2 && mode
== DFmode
))
4237 regno
= FIRST_SSE_REG
;
4240 return gen_rtx_REG (orig_mode
, regno
);
4244 function_value_64 (enum machine_mode orig_mode
, enum machine_mode mode
,
4249 /* Handle libcalls, which don't provide a type node. */
4250 if (valtype
== NULL
)
4262 return gen_rtx_REG (mode
, FIRST_SSE_REG
);
4265 return gen_rtx_REG (mode
, FIRST_FLOAT_REG
);
4269 return gen_rtx_REG (mode
, 0);
4273 ret
= construct_container (mode
, orig_mode
, valtype
, 1,
4274 REGPARM_MAX
, SSE_REGPARM_MAX
,
4275 x86_64_int_return_registers
, 0);
4277 /* For zero sized structures, construct_container returns NULL, but we
4278 need to keep rest of compiler happy by returning meaningful value. */
4280 ret
= gen_rtx_REG (orig_mode
, 0);
4286 function_value_ms_64 (enum machine_mode orig_mode
, enum machine_mode mode
)
4288 unsigned int regno
= 0;
4292 if (mode
== SFmode
|| mode
== DFmode
)
4293 regno
= FIRST_SSE_REG
;
4294 else if (VECTOR_MODE_P (mode
) || GET_MODE_SIZE (mode
) == 16)
4295 regno
= FIRST_SSE_REG
;
4298 return gen_rtx_REG (orig_mode
, regno
);
4302 ix86_function_value_1 (tree valtype
, tree fntype_or_decl
,
4303 enum machine_mode orig_mode
, enum machine_mode mode
)
4308 if (fntype_or_decl
&& DECL_P (fntype_or_decl
))
4309 fn
= fntype_or_decl
;
4310 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
4312 if (TARGET_64BIT_MS_ABI
)
4313 return function_value_ms_64 (orig_mode
, mode
);
4314 else if (TARGET_64BIT
)
4315 return function_value_64 (orig_mode
, mode
, valtype
);
4317 return function_value_32 (orig_mode
, mode
, fntype
, fn
);
4321 ix86_function_value (tree valtype
, tree fntype_or_decl
,
4322 bool outgoing ATTRIBUTE_UNUSED
)
4324 enum machine_mode mode
, orig_mode
;
4326 orig_mode
= TYPE_MODE (valtype
);
4327 mode
= type_natural_mode (valtype
);
4328 return ix86_function_value_1 (valtype
, fntype_or_decl
, orig_mode
, mode
);
4332 ix86_libcall_value (enum machine_mode mode
)
4334 return ix86_function_value_1 (NULL
, NULL
, mode
, mode
);
4337 /* Return true iff type is returned in memory. */
4340 return_in_memory_32 (tree type
, enum machine_mode mode
)
4344 if (mode
== BLKmode
)
4347 size
= int_size_in_bytes (type
);
4349 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
4352 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
4354 /* User-created vectors small enough to fit in EAX. */
4358 /* MMX/3dNow values are returned in MM0,
4359 except when it doesn't exits. */
4361 return (TARGET_MMX
? 0 : 1);
4363 /* SSE values are returned in XMM0, except when it doesn't exist. */
4365 return (TARGET_SSE
? 0 : 1);
4380 return_in_memory_64 (tree type
, enum machine_mode mode
)
4382 int needed_intregs
, needed_sseregs
;
4383 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
4387 return_in_memory_ms_64 (tree type
, enum machine_mode mode
)
4389 HOST_WIDE_INT size
= int_size_in_bytes (type
);
4391 /* __m128 and friends are returned in xmm0. */
4392 if (size
== 16 && VECTOR_MODE_P (mode
))
4395 /* Otherwise, the size must be exactly in [1248]. */
4396 return (size
!= 1 && size
!= 2 && size
!= 4 && size
!= 8);
4400 ix86_return_in_memory (tree type
)
4402 enum machine_mode mode
= type_natural_mode (type
);
4404 if (TARGET_64BIT_MS_ABI
)
4405 return return_in_memory_ms_64 (type
, mode
);
4406 else if (TARGET_64BIT
)
4407 return return_in_memory_64 (type
, mode
);
4409 return return_in_memory_32 (type
, mode
);
4412 /* Return false iff TYPE is returned in memory. This version is used
4413 on Solaris 10. It is similar to the generic ix86_return_in_memory,
4414 but differs notably in that when MMX is available, 8-byte vectors
4415 are returned in memory, rather than in MMX registers. */
4418 ix86_sol10_return_in_memory (tree type
)
4421 enum machine_mode mode
= type_natural_mode (type
);
4424 return return_in_memory_64 (type
, mode
);
4426 if (mode
== BLKmode
)
4429 size
= int_size_in_bytes (type
);
4431 if (VECTOR_MODE_P (mode
))
4433 /* Return in memory only if MMX registers *are* available. This
4434 seems backwards, but it is consistent with the existing
4441 else if (mode
== TImode
)
4443 else if (mode
== XFmode
)
4449 /* When returning SSE vector types, we have a choice of either
4450 (1) being abi incompatible with a -march switch, or
4451 (2) generating an error.
4452 Given no good solution, I think the safest thing is one warning.
4453 The user won't be able to use -Werror, but....
4455 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4456 called in response to actually generating a caller or callee that
4457 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4458 via aggregate_value_p for general type probing from tree-ssa. */
4461 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
4463 static bool warnedsse
, warnedmmx
;
4465 if (!TARGET_64BIT
&& type
)
4467 /* Look at the return type of the function, not the function type. */
4468 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
4470 if (!TARGET_SSE
&& !warnedsse
)
4473 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
4476 warning (0, "SSE vector return without SSE enabled "
4481 if (!TARGET_MMX
&& !warnedmmx
)
4483 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
4486 warning (0, "MMX vector return without MMX enabled "
4496 /* Create the va_list data type. */
4499 ix86_build_builtin_va_list (void)
4501 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
4503 /* For i386 we use plain pointer to argument area. */
4504 if (!TARGET_64BIT
|| TARGET_64BIT_MS_ABI
)
4505 return build_pointer_type (char_type_node
);
4507 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
4508 type_decl
= build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
4510 f_gpr
= build_decl (FIELD_DECL
, get_identifier ("gp_offset"),
4511 unsigned_type_node
);
4512 f_fpr
= build_decl (FIELD_DECL
, get_identifier ("fp_offset"),
4513 unsigned_type_node
);
4514 f_ovf
= build_decl (FIELD_DECL
, get_identifier ("overflow_arg_area"),
4516 f_sav
= build_decl (FIELD_DECL
, get_identifier ("reg_save_area"),
4519 va_list_gpr_counter_field
= f_gpr
;
4520 va_list_fpr_counter_field
= f_fpr
;
4522 DECL_FIELD_CONTEXT (f_gpr
) = record
;
4523 DECL_FIELD_CONTEXT (f_fpr
) = record
;
4524 DECL_FIELD_CONTEXT (f_ovf
) = record
;
4525 DECL_FIELD_CONTEXT (f_sav
) = record
;
4527 TREE_CHAIN (record
) = type_decl
;
4528 TYPE_NAME (record
) = type_decl
;
4529 TYPE_FIELDS (record
) = f_gpr
;
4530 TREE_CHAIN (f_gpr
) = f_fpr
;
4531 TREE_CHAIN (f_fpr
) = f_ovf
;
4532 TREE_CHAIN (f_ovf
) = f_sav
;
4534 layout_type (record
);
4536 /* The correct type is an array type of one element. */
4537 return build_array_type (record
, build_index_type (size_zero_node
));
4540 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4543 setup_incoming_varargs_64 (CUMULATIVE_ARGS
*cum
)
4553 if (! cfun
->va_list_gpr_size
&& ! cfun
->va_list_fpr_size
)
4556 /* Indicate to allocate space on the stack for varargs save area. */
4557 ix86_save_varrargs_registers
= 1;
4558 cfun
->stack_alignment_needed
= 128;
4560 save_area
= frame_pointer_rtx
;
4561 set
= get_varargs_alias_set ();
4563 for (i
= cum
->regno
;
4565 && i
< cum
->regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
4568 mem
= gen_rtx_MEM (Pmode
,
4569 plus_constant (save_area
, i
* UNITS_PER_WORD
));
4570 MEM_NOTRAP_P (mem
) = 1;
4571 set_mem_alias_set (mem
, set
);
4572 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
4573 x86_64_int_parameter_registers
[i
]));
4576 if (cum
->sse_nregs
&& cfun
->va_list_fpr_size
)
4578 /* Now emit code to save SSE registers. The AX parameter contains number
4579 of SSE parameter registers used to call this function. We use
4580 sse_prologue_save insn template that produces computed jump across
4581 SSE saves. We need some preparation work to get this working. */
4583 label
= gen_label_rtx ();
4584 label_ref
= gen_rtx_LABEL_REF (Pmode
, label
);
4586 /* Compute address to jump to :
4587 label - 5*eax + nnamed_sse_arguments*5 */
4588 tmp_reg
= gen_reg_rtx (Pmode
);
4589 nsse_reg
= gen_reg_rtx (Pmode
);
4590 emit_insn (gen_zero_extendqidi2 (nsse_reg
, gen_rtx_REG (QImode
, 0)));
4591 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
4592 gen_rtx_MULT (Pmode
, nsse_reg
,
4597 gen_rtx_CONST (DImode
,
4598 gen_rtx_PLUS (DImode
,
4600 GEN_INT (cum
->sse_regno
* 4))));
4602 emit_move_insn (nsse_reg
, label_ref
);
4603 emit_insn (gen_subdi3 (nsse_reg
, nsse_reg
, tmp_reg
));
4605 /* Compute address of memory block we save into. We always use pointer
4606 pointing 127 bytes after first byte to store - this is needed to keep
4607 instruction size limited by 4 bytes. */
4608 tmp_reg
= gen_reg_rtx (Pmode
);
4609 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
4610 plus_constant (save_area
,
4611 8 * REGPARM_MAX
+ 127)));
4612 mem
= gen_rtx_MEM (BLKmode
, plus_constant (tmp_reg
, -127));
4613 MEM_NOTRAP_P (mem
) = 1;
4614 set_mem_alias_set (mem
, set
);
4615 set_mem_align (mem
, BITS_PER_WORD
);
4617 /* And finally do the dirty job! */
4618 emit_insn (gen_sse_prologue_save (mem
, nsse_reg
,
4619 GEN_INT (cum
->sse_regno
), label
));
4624 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS
*cum
)
4626 int set
= get_varargs_alias_set ();
4629 for (i
= cum
->regno
; i
< REGPARM_MAX
; i
++)
4633 mem
= gen_rtx_MEM (Pmode
,
4634 plus_constant (virtual_incoming_args_rtx
,
4635 i
* UNITS_PER_WORD
));
4636 MEM_NOTRAP_P (mem
) = 1;
4637 set_mem_alias_set (mem
, set
);
4639 reg
= gen_rtx_REG (Pmode
, x86_64_ms_abi_int_parameter_registers
[i
]);
4640 emit_move_insn (mem
, reg
);
4645 ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4646 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
4649 CUMULATIVE_ARGS next_cum
;
4653 /* This argument doesn't appear to be used anymore. Which is good,
4654 because the old code here didn't suppress rtl generation. */
4655 gcc_assert (!no_rtl
);
4660 fntype
= TREE_TYPE (current_function_decl
);
4661 stdarg_p
= (TYPE_ARG_TYPES (fntype
) != 0
4662 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype
)))
4663 != void_type_node
));
4665 /* For varargs, we do not want to skip the dummy va_dcl argument.
4666 For stdargs, we do want to skip the last named argument. */
4669 function_arg_advance (&next_cum
, mode
, type
, 1);
4671 if (TARGET_64BIT_MS_ABI
)
4672 setup_incoming_varargs_ms_64 (&next_cum
);
4674 setup_incoming_varargs_64 (&next_cum
);
4677 /* Implement va_start. */
4680 ix86_va_start (tree valist
, rtx nextarg
)
4682 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
4683 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4684 tree gpr
, fpr
, ovf
, sav
, t
;
4687 /* Only 64bit target needs something special. */
4688 if (!TARGET_64BIT
|| TARGET_64BIT_MS_ABI
)
4690 std_expand_builtin_va_start (valist
, nextarg
);
4694 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4695 f_fpr
= TREE_CHAIN (f_gpr
);
4696 f_ovf
= TREE_CHAIN (f_fpr
);
4697 f_sav
= TREE_CHAIN (f_ovf
);
4699 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
4700 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
4701 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
4702 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
4703 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
4705 /* Count number of gp and fp argument registers used. */
4706 words
= current_function_args_info
.words
;
4707 n_gpr
= current_function_args_info
.regno
;
4708 n_fpr
= current_function_args_info
.sse_regno
;
4710 if (cfun
->va_list_gpr_size
)
4712 type
= TREE_TYPE (gpr
);
4713 t
= build2 (GIMPLE_MODIFY_STMT
, type
, gpr
,
4714 build_int_cst (type
, n_gpr
* 8));
4715 TREE_SIDE_EFFECTS (t
) = 1;
4716 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4719 if (cfun
->va_list_fpr_size
)
4721 type
= TREE_TYPE (fpr
);
4722 t
= build2 (GIMPLE_MODIFY_STMT
, type
, fpr
,
4723 build_int_cst (type
, n_fpr
* 16 + 8*REGPARM_MAX
));
4724 TREE_SIDE_EFFECTS (t
) = 1;
4725 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4728 /* Find the overflow area. */
4729 type
= TREE_TYPE (ovf
);
4730 t
= make_tree (type
, virtual_incoming_args_rtx
);
4732 t
= build2 (PLUS_EXPR
, type
, t
,
4733 build_int_cst (type
, words
* UNITS_PER_WORD
));
4734 t
= build2 (GIMPLE_MODIFY_STMT
, type
, ovf
, t
);
4735 TREE_SIDE_EFFECTS (t
) = 1;
4736 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4738 if (cfun
->va_list_gpr_size
|| cfun
->va_list_fpr_size
)
4740 /* Find the register save area.
4741 Prologue of the function save it right above stack frame. */
4742 type
= TREE_TYPE (sav
);
4743 t
= make_tree (type
, frame_pointer_rtx
);
4744 t
= build2 (GIMPLE_MODIFY_STMT
, type
, sav
, t
);
4745 TREE_SIDE_EFFECTS (t
) = 1;
4746 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4750 /* Implement va_arg. */
4753 ix86_gimplify_va_arg (tree valist
, tree type
, tree
*pre_p
, tree
*post_p
)
4755 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
4756 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4757 tree gpr
, fpr
, ovf
, sav
, t
;
4759 tree lab_false
, lab_over
= NULL_TREE
;
4764 enum machine_mode nat_mode
;
4766 /* Only 64bit target needs something special. */
4767 if (!TARGET_64BIT
|| TARGET_64BIT_MS_ABI
)
4768 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
4770 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4771 f_fpr
= TREE_CHAIN (f_gpr
);
4772 f_ovf
= TREE_CHAIN (f_fpr
);
4773 f_sav
= TREE_CHAIN (f_ovf
);
4775 valist
= build_va_arg_indirect_ref (valist
);
4776 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
4777 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
4778 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
4779 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
4781 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
4783 type
= build_pointer_type (type
);
4784 size
= int_size_in_bytes (type
);
4785 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
4787 nat_mode
= type_natural_mode (type
);
4788 container
= construct_container (nat_mode
, TYPE_MODE (type
), type
, 0,
4789 REGPARM_MAX
, SSE_REGPARM_MAX
, intreg
, 0);
4791 /* Pull the value out of the saved registers. */
4793 addr
= create_tmp_var (ptr_type_node
, "addr");
4794 DECL_POINTER_ALIAS_SET (addr
) = get_varargs_alias_set ();
4798 int needed_intregs
, needed_sseregs
;
4800 tree int_addr
, sse_addr
;
4802 lab_false
= create_artificial_label ();
4803 lab_over
= create_artificial_label ();
4805 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
4807 need_temp
= (!REG_P (container
)
4808 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
4809 || TYPE_ALIGN (type
) > 128));
4811 /* In case we are passing structure, verify that it is consecutive block
4812 on the register save area. If not we need to do moves. */
4813 if (!need_temp
&& !REG_P (container
))
4815 /* Verify that all registers are strictly consecutive */
4816 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
4820 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4822 rtx slot
= XVECEXP (container
, 0, i
);
4823 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
4824 || INTVAL (XEXP (slot
, 1)) != i
* 16)
4832 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4834 rtx slot
= XVECEXP (container
, 0, i
);
4835 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
4836 || INTVAL (XEXP (slot
, 1)) != i
* 8)
4848 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
4849 DECL_POINTER_ALIAS_SET (int_addr
) = get_varargs_alias_set ();
4850 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
4851 DECL_POINTER_ALIAS_SET (sse_addr
) = get_varargs_alias_set ();
4854 /* First ensure that we fit completely in registers. */
4857 t
= build_int_cst (TREE_TYPE (gpr
),
4858 (REGPARM_MAX
- needed_intregs
+ 1) * 8);
4859 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
4860 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
4861 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
4862 gimplify_and_add (t
, pre_p
);
4866 t
= build_int_cst (TREE_TYPE (fpr
),
4867 (SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
4869 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
4870 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
4871 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
4872 gimplify_and_add (t
, pre_p
);
4875 /* Compute index to start of area used for integer regs. */
4878 /* int_addr = gpr + sav; */
4879 t
= fold_convert (ptr_type_node
, gpr
);
4880 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, t
);
4881 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, int_addr
, t
);
4882 gimplify_and_add (t
, pre_p
);
4886 /* sse_addr = fpr + sav; */
4887 t
= fold_convert (ptr_type_node
, fpr
);
4888 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, t
);
4889 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, sse_addr
, t
);
4890 gimplify_and_add (t
, pre_p
);
4895 tree temp
= create_tmp_var (type
, "va_arg_tmp");
4898 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
4899 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, addr
, t
);
4900 gimplify_and_add (t
, pre_p
);
4902 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
4904 rtx slot
= XVECEXP (container
, 0, i
);
4905 rtx reg
= XEXP (slot
, 0);
4906 enum machine_mode mode
= GET_MODE (reg
);
4907 tree piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
4908 tree addr_type
= build_pointer_type (piece_type
);
4911 tree dest_addr
, dest
;
4913 if (SSE_REGNO_P (REGNO (reg
)))
4915 src_addr
= sse_addr
;
4916 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
4920 src_addr
= int_addr
;
4921 src_offset
= REGNO (reg
) * 8;
4923 src_addr
= fold_convert (addr_type
, src_addr
);
4924 src_addr
= fold_build2 (PLUS_EXPR
, addr_type
, src_addr
,
4925 size_int (src_offset
));
4926 src
= build_va_arg_indirect_ref (src_addr
);
4928 dest_addr
= fold_convert (addr_type
, addr
);
4929 dest_addr
= fold_build2 (PLUS_EXPR
, addr_type
, dest_addr
,
4930 size_int (INTVAL (XEXP (slot
, 1))));
4931 dest
= build_va_arg_indirect_ref (dest_addr
);
4933 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, dest
, src
);
4934 gimplify_and_add (t
, pre_p
);
4940 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
4941 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
4942 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (gpr
), gpr
, t
);
4943 gimplify_and_add (t
, pre_p
);
4947 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
4948 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
4949 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (fpr
), fpr
, t
);
4950 gimplify_and_add (t
, pre_p
);
4953 t
= build1 (GOTO_EXPR
, void_type_node
, lab_over
);
4954 gimplify_and_add (t
, pre_p
);
4956 t
= build1 (LABEL_EXPR
, void_type_node
, lab_false
);
4957 append_to_statement_list (t
, pre_p
);
4960 /* ... otherwise out of the overflow area. */
4962 /* Care for on-stack alignment if needed. */
4963 if (FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) <= 64
4964 || integer_zerop (TYPE_SIZE (type
)))
4968 HOST_WIDE_INT align
= FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) / 8;
4969 t
= build2 (PLUS_EXPR
, TREE_TYPE (ovf
), ovf
,
4970 build_int_cst (TREE_TYPE (ovf
), align
- 1));
4971 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
4972 build_int_cst (TREE_TYPE (t
), -align
));
4974 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
4976 t2
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, addr
, t
);
4977 gimplify_and_add (t2
, pre_p
);
4979 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
4980 build_int_cst (TREE_TYPE (t
), rsize
* UNITS_PER_WORD
));
4981 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (ovf
), ovf
, t
);
4982 gimplify_and_add (t
, pre_p
);
4986 t
= build1 (LABEL_EXPR
, void_type_node
, lab_over
);
4987 append_to_statement_list (t
, pre_p
);
4990 ptrtype
= build_pointer_type (type
);
4991 addr
= fold_convert (ptrtype
, addr
);
4994 addr
= build_va_arg_indirect_ref (addr
);
4995 return build_va_arg_indirect_ref (addr
);
4998 /* Return nonzero if OPNUM's MEM should be matched
4999 in movabs* patterns. */
5002 ix86_check_movabs (rtx insn
, int opnum
)
5006 set
= PATTERN (insn
);
5007 if (GET_CODE (set
) == PARALLEL
)
5008 set
= XVECEXP (set
, 0, 0);
5009 gcc_assert (GET_CODE (set
) == SET
);
5010 mem
= XEXP (set
, opnum
);
5011 while (GET_CODE (mem
) == SUBREG
)
5012 mem
= SUBREG_REG (mem
);
5013 gcc_assert (MEM_P (mem
));
5014 return (volatile_ok
|| !MEM_VOLATILE_P (mem
));
5017 /* Initialize the table of extra 80387 mathematical constants. */
5020 init_ext_80387_constants (void)
5022 static const char * cst
[5] =
5024 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5025 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5026 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5027 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5028 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5032 for (i
= 0; i
< 5; i
++)
5034 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
5035 /* Ensure each constant is rounded to XFmode precision. */
5036 real_convert (&ext_80387_constants_table
[i
],
5037 XFmode
, &ext_80387_constants_table
[i
]);
5040 ext_80387_constants_init
= 1;
5043 /* Return true if the constant is something that can be loaded with
5044 a special instruction. */
5047 standard_80387_constant_p (rtx x
)
5049 enum machine_mode mode
= GET_MODE (x
);
5053 if (!(X87_FLOAT_MODE_P (mode
) && (GET_CODE (x
) == CONST_DOUBLE
)))
5056 if (x
== CONST0_RTX (mode
))
5058 if (x
== CONST1_RTX (mode
))
5061 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
5063 /* For XFmode constants, try to find a special 80387 instruction when
5064 optimizing for size or on those CPUs that benefit from them. */
5066 && (optimize_size
|| TARGET_EXT_80387_CONSTANTS
))
5070 if (! ext_80387_constants_init
)
5071 init_ext_80387_constants ();
5073 for (i
= 0; i
< 5; i
++)
5074 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
5078 /* Load of the constant -0.0 or -1.0 will be split as
5079 fldz;fchs or fld1;fchs sequence. */
5080 if (real_isnegzero (&r
))
5082 if (real_identical (&r
, &dconstm1
))
5088 /* Return the opcode of the special instruction to be used to load
5092 standard_80387_constant_opcode (rtx x
)
5094 switch (standard_80387_constant_p (x
))
5118 /* Return the CONST_DOUBLE representing the 80387 constant that is
5119 loaded by the specified special instruction. The argument IDX
5120 matches the return value from standard_80387_constant_p. */
5123 standard_80387_constant_rtx (int idx
)
5127 if (! ext_80387_constants_init
)
5128 init_ext_80387_constants ();
5144 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
5148 /* Return 1 if mode is a valid mode for sse. */
5150 standard_sse_mode_p (enum machine_mode mode
)
5167 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5170 standard_sse_constant_p (rtx x
)
5172 enum machine_mode mode
= GET_MODE (x
);
5174 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
5176 if (vector_all_ones_operand (x
, mode
)
5177 && standard_sse_mode_p (mode
))
5178 return TARGET_SSE2
? 2 : -1;
5183 /* Return the opcode of the special instruction to be used to load
5187 standard_sse_constant_opcode (rtx insn
, rtx x
)
5189 switch (standard_sse_constant_p (x
))
5192 if (get_attr_mode (insn
) == MODE_V4SF
)
5193 return "xorps\t%0, %0";
5194 else if (get_attr_mode (insn
) == MODE_V2DF
)
5195 return "xorpd\t%0, %0";
5197 return "pxor\t%0, %0";
5199 return "pcmpeqd\t%0, %0";
5204 /* Returns 1 if OP contains a symbol reference */
5207 symbolic_reference_mentioned_p (rtx op
)
5212 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
5215 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
5216 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
5222 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
5223 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
5227 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
5234 /* Return 1 if it is appropriate to emit `ret' instructions in the
5235 body of a function. Do this only if the epilogue is simple, needing a
5236 couple of insns. Prior to reloading, we can't tell how many registers
5237 must be saved, so return 0 then. Return 0 if there is no frame
5238 marker to de-allocate. */
5241 ix86_can_use_return_insn_p (void)
5243 struct ix86_frame frame
;
5245 if (! reload_completed
|| frame_pointer_needed
)
5248 /* Don't allow more than 32 pop, since that's all we can do
5249 with one instruction. */
5250 if (current_function_pops_args
5251 && current_function_args_size
>= 32768)
5254 ix86_compute_frame_layout (&frame
);
5255 return frame
.to_allocate
== 0 && frame
.nregs
== 0;
5258 /* Value should be nonzero if functions must have frame pointers.
5259 Zero means the frame pointer need not be set up (and parms may
5260 be accessed via the stack pointer) in functions that seem suitable. */
5263 ix86_frame_pointer_required (void)
5265 /* If we accessed previous frames, then the generated code expects
5266 to be able to access the saved ebp value in our frame. */
5267 if (cfun
->machine
->accesses_prev_frame
)
5270 /* Several x86 os'es need a frame pointer for other reasons,
5271 usually pertaining to setjmp. */
5272 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
5275 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5276 the frame pointer by default. Turn it back on now if we've not
5277 got a leaf function. */
5278 if (TARGET_OMIT_LEAF_FRAME_POINTER
5279 && (!current_function_is_leaf
5280 || ix86_current_function_calls_tls_descriptor
))
5283 if (current_function_profile
)
5289 /* Record that the current function accesses previous call frames. */
5292 ix86_setup_frame_addresses (void)
5294 cfun
->machine
->accesses_prev_frame
= 1;
5297 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5298 # define USE_HIDDEN_LINKONCE 1
5300 # define USE_HIDDEN_LINKONCE 0
5303 static int pic_labels_used
;
5305 /* Fills in the label name that should be used for a pc thunk for
5306 the given register. */
5309 get_pc_thunk_name (char name
[32], unsigned int regno
)
5311 gcc_assert (!TARGET_64BIT
);
5313 if (USE_HIDDEN_LINKONCE
)
5314 sprintf (name
, "__i686.get_pc_thunk.%s", reg_names
[regno
]);
5316 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
5320 /* This function generates code for -fpic that loads %ebx with
5321 the return address of the caller and then returns. */
5324 ix86_file_end (void)
5329 for (regno
= 0; regno
< 8; ++regno
)
5333 if (! ((pic_labels_used
>> regno
) & 1))
5336 get_pc_thunk_name (name
, regno
);
5341 switch_to_section (darwin_sections
[text_coal_section
]);
5342 fputs ("\t.weak_definition\t", asm_out_file
);
5343 assemble_name (asm_out_file
, name
);
5344 fputs ("\n\t.private_extern\t", asm_out_file
);
5345 assemble_name (asm_out_file
, name
);
5346 fputs ("\n", asm_out_file
);
5347 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5351 if (USE_HIDDEN_LINKONCE
)
5355 decl
= build_decl (FUNCTION_DECL
, get_identifier (name
),
5357 TREE_PUBLIC (decl
) = 1;
5358 TREE_STATIC (decl
) = 1;
5359 DECL_ONE_ONLY (decl
) = 1;
5361 (*targetm
.asm_out
.unique_section
) (decl
, 0);
5362 switch_to_section (get_named_section (decl
, NULL
, 0));
5364 (*targetm
.asm_out
.globalize_label
) (asm_out_file
, name
);
5365 fputs ("\t.hidden\t", asm_out_file
);
5366 assemble_name (asm_out_file
, name
);
5367 fputc ('\n', asm_out_file
);
5368 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
5372 switch_to_section (text_section
);
5373 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5376 xops
[0] = gen_rtx_REG (SImode
, regno
);
5377 xops
[1] = gen_rtx_MEM (SImode
, stack_pointer_rtx
);
5378 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
5379 output_asm_insn ("ret", xops
);
5382 if (NEED_INDICATE_EXEC_STACK
)
5383 file_end_indicate_exec_stack ();
5386 /* Emit code for the SET_GOT patterns. */
5389 output_set_got (rtx dest
, rtx label ATTRIBUTE_UNUSED
)
5395 if (TARGET_VXWORKS_RTP
&& flag_pic
)
5397 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5398 xops
[2] = gen_rtx_MEM (Pmode
,
5399 gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
));
5400 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
5402 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5403 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5404 an unadorned address. */
5405 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
5406 SYMBOL_REF_FLAGS (xops
[2]) |= SYMBOL_FLAG_LOCAL
;
5407 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops
);
5411 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
5413 if (! TARGET_DEEP_BRANCH_PREDICTION
|| !flag_pic
)
5415 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
5418 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
5420 output_asm_insn ("call\t%a2", xops
);
5423 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5424 is what will be referenced by the Mach-O PIC subsystem. */
5426 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
5429 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "L",
5430 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
5433 output_asm_insn ("pop{l}\t%0", xops
);
5438 get_pc_thunk_name (name
, REGNO (dest
));
5439 pic_labels_used
|= 1 << REGNO (dest
);
5441 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
5442 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
5443 output_asm_insn ("call\t%X2", xops
);
5444 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5445 is what will be referenced by the Mach-O PIC subsystem. */
5448 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
5450 targetm
.asm_out
.internal_label (asm_out_file
, "L",
5451 CODE_LABEL_NUMBER (label
));
5458 if (!flag_pic
|| TARGET_DEEP_BRANCH_PREDICTION
)
5459 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops
);
5461 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops
);
5466 /* Generate an "push" pattern for input ARG. */
5471 return gen_rtx_SET (VOIDmode
,
5473 gen_rtx_PRE_DEC (Pmode
,
5474 stack_pointer_rtx
)),
5478 /* Return >= 0 if there is an unused call-clobbered register available
5479 for the entire function. */
5482 ix86_select_alt_pic_regnum (void)
5484 if (current_function_is_leaf
&& !current_function_profile
5485 && !ix86_current_function_calls_tls_descriptor
)
5488 for (i
= 2; i
>= 0; --i
)
5489 if (!regs_ever_live
[i
])
5493 return INVALID_REGNUM
;
5496 /* Return 1 if we need to save REGNO. */
5498 ix86_save_reg (unsigned int regno
, int maybe_eh_return
)
5500 if (pic_offset_table_rtx
5501 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
5502 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
5503 || current_function_profile
5504 || current_function_calls_eh_return
5505 || current_function_uses_const_pool
))
5507 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM
)
5512 if (current_function_calls_eh_return
&& maybe_eh_return
)
5517 unsigned test
= EH_RETURN_DATA_REGNO (i
);
5518 if (test
== INVALID_REGNUM
)
5525 if (cfun
->machine
->force_align_arg_pointer
5526 && regno
== REGNO (cfun
->machine
->force_align_arg_pointer
))
5529 return (regs_ever_live
[regno
]
5530 && !call_used_regs
[regno
]
5531 && !fixed_regs
[regno
]
5532 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
5535 /* Return number of registers to be saved on the stack. */
5538 ix86_nsaved_regs (void)
5543 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
5544 if (ix86_save_reg (regno
, true))
5549 /* Return the offset between two registers, one to be eliminated, and the other
5550 its replacement, at the start of a routine. */
5553 ix86_initial_elimination_offset (int from
, int to
)
5555 struct ix86_frame frame
;
5556 ix86_compute_frame_layout (&frame
);
5558 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
5559 return frame
.hard_frame_pointer_offset
;
5560 else if (from
== FRAME_POINTER_REGNUM
5561 && to
== HARD_FRAME_POINTER_REGNUM
)
5562 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
5565 gcc_assert (to
== STACK_POINTER_REGNUM
);
5567 if (from
== ARG_POINTER_REGNUM
)
5568 return frame
.stack_pointer_offset
;
5570 gcc_assert (from
== FRAME_POINTER_REGNUM
);
5571 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
5575 /* Fill structure ix86_frame about frame of currently computed function. */
5578 ix86_compute_frame_layout (struct ix86_frame
*frame
)
5580 HOST_WIDE_INT total_size
;
5581 unsigned int stack_alignment_needed
;
5582 HOST_WIDE_INT offset
;
5583 unsigned int preferred_alignment
;
5584 HOST_WIDE_INT size
= get_frame_size ();
5586 frame
->nregs
= ix86_nsaved_regs ();
5589 stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
5590 preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
5592 /* During reload iteration the amount of registers saved can change.
5593 Recompute the value as needed. Do not recompute when amount of registers
5594 didn't change as reload does multiple calls to the function and does not
5595 expect the decision to change within single iteration. */
5597 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
5599 int count
= frame
->nregs
;
5601 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
5602 /* The fast prologue uses move instead of push to save registers. This
5603 is significantly longer, but also executes faster as modern hardware
5604 can execute the moves in parallel, but can't do that for push/pop.
5606 Be careful about choosing what prologue to emit: When function takes
5607 many instructions to execute we may use slow version as well as in
5608 case function is known to be outside hot spot (this is known with
5609 feedback only). Weight the size of function by number of registers
5610 to save as it is cheap to use one or two push instructions but very
5611 slow to use many of them. */
5613 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
5614 if (cfun
->function_frequency
< FUNCTION_FREQUENCY_NORMAL
5615 || (flag_branch_probabilities
5616 && cfun
->function_frequency
< FUNCTION_FREQUENCY_HOT
))
5617 cfun
->machine
->use_fast_prologue_epilogue
= false;
5619 cfun
->machine
->use_fast_prologue_epilogue
5620 = !expensive_function_p (count
);
5622 if (TARGET_PROLOGUE_USING_MOVE
5623 && cfun
->machine
->use_fast_prologue_epilogue
)
5624 frame
->save_regs_using_mov
= true;
5626 frame
->save_regs_using_mov
= false;
5629 /* Skip return address and saved base pointer. */
5630 offset
= frame_pointer_needed
? UNITS_PER_WORD
* 2 : UNITS_PER_WORD
;
5632 frame
->hard_frame_pointer_offset
= offset
;
5634 /* Do some sanity checking of stack_alignment_needed and
5635 preferred_alignment, since i386 port is the only using those features
5636 that may break easily. */
5638 gcc_assert (!size
|| stack_alignment_needed
);
5639 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
5640 gcc_assert (preferred_alignment
<= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
5641 gcc_assert (stack_alignment_needed
5642 <= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
5644 if (stack_alignment_needed
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
5645 stack_alignment_needed
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
5647 /* Register save area */
5648 offset
+= frame
->nregs
* UNITS_PER_WORD
;
5651 if (ix86_save_varrargs_registers
)
5653 offset
+= X86_64_VARARGS_SIZE
;
5654 frame
->va_arg_size
= X86_64_VARARGS_SIZE
;
5657 frame
->va_arg_size
= 0;
5659 /* Align start of frame for local function. */
5660 frame
->padding1
= ((offset
+ stack_alignment_needed
- 1)
5661 & -stack_alignment_needed
) - offset
;
5663 offset
+= frame
->padding1
;
5665 /* Frame pointer points here. */
5666 frame
->frame_pointer_offset
= offset
;
5670 /* Add outgoing arguments area. Can be skipped if we eliminated
5671 all the function calls as dead code.
5672 Skipping is however impossible when function calls alloca. Alloca
5673 expander assumes that last current_function_outgoing_args_size
5674 of stack frame are unused. */
5675 if (ACCUMULATE_OUTGOING_ARGS
5676 && (!current_function_is_leaf
|| current_function_calls_alloca
5677 || ix86_current_function_calls_tls_descriptor
))
5679 offset
+= current_function_outgoing_args_size
;
5680 frame
->outgoing_arguments_size
= current_function_outgoing_args_size
;
5683 frame
->outgoing_arguments_size
= 0;
5685 /* Align stack boundary. Only needed if we're calling another function
5687 if (!current_function_is_leaf
|| current_function_calls_alloca
5688 || ix86_current_function_calls_tls_descriptor
)
5689 frame
->padding2
= ((offset
+ preferred_alignment
- 1)
5690 & -preferred_alignment
) - offset
;
5692 frame
->padding2
= 0;
5694 offset
+= frame
->padding2
;
5696 /* We've reached end of stack frame. */
5697 frame
->stack_pointer_offset
= offset
;
5699 /* Size prologue needs to allocate. */
5700 frame
->to_allocate
=
5701 (size
+ frame
->padding1
+ frame
->padding2
5702 + frame
->outgoing_arguments_size
+ frame
->va_arg_size
);
5704 if ((!frame
->to_allocate
&& frame
->nregs
<= 1)
5705 || (TARGET_64BIT
&& frame
->to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
5706 frame
->save_regs_using_mov
= false;
5708 if (TARGET_RED_ZONE
&& current_function_sp_is_unchanging
5709 && current_function_is_leaf
5710 && !ix86_current_function_calls_tls_descriptor
)
5712 frame
->red_zone_size
= frame
->to_allocate
;
5713 if (frame
->save_regs_using_mov
)
5714 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
5715 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
5716 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
5719 frame
->red_zone_size
= 0;
5720 frame
->to_allocate
-= frame
->red_zone_size
;
5721 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
5723 fprintf (stderr
, "\n");
5724 fprintf (stderr
, "nregs: %ld\n", (long)frame
->nregs
);
5725 fprintf (stderr
, "size: %ld\n", (long)size
);
5726 fprintf (stderr
, "alignment1: %ld\n", (long)stack_alignment_needed
);
5727 fprintf (stderr
, "padding1: %ld\n", (long)frame
->padding1
);
5728 fprintf (stderr
, "va_arg: %ld\n", (long)frame
->va_arg_size
);
5729 fprintf (stderr
, "padding2: %ld\n", (long)frame
->padding2
);
5730 fprintf (stderr
, "to_allocate: %ld\n", (long)frame
->to_allocate
);
5731 fprintf (stderr
, "red_zone_size: %ld\n", (long)frame
->red_zone_size
);
5732 fprintf (stderr
, "frame_pointer_offset: %ld\n", (long)frame
->frame_pointer_offset
);
5733 fprintf (stderr
, "hard_frame_pointer_offset: %ld\n",
5734 (long)frame
->hard_frame_pointer_offset
);
5735 fprintf (stderr
, "stack_pointer_offset: %ld\n", (long)frame
->stack_pointer_offset
);
5736 fprintf (stderr
, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf
);
5737 fprintf (stderr
, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca
);
5738 fprintf (stderr
, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor
);
5742 /* Emit code to save registers in the prologue. */
5745 ix86_emit_save_regs (void)
5750 for (regno
= FIRST_PSEUDO_REGISTER
; regno
-- > 0; )
5751 if (ix86_save_reg (regno
, true))
5753 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
5754 RTX_FRAME_RELATED_P (insn
) = 1;
5758 /* Emit code to save registers using MOV insns. First register
5759 is restored from POINTER + OFFSET. */
5761 ix86_emit_save_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
)
5766 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5767 if (ix86_save_reg (regno
, true))
5769 insn
= emit_move_insn (adjust_address (gen_rtx_MEM (Pmode
, pointer
),
5771 gen_rtx_REG (Pmode
, regno
));
5772 RTX_FRAME_RELATED_P (insn
) = 1;
5773 offset
+= UNITS_PER_WORD
;
5777 /* Expand prologue or epilogue stack adjustment.
5778 The pattern exist to put a dependency on all ebp-based memory accesses.
5779 STYLE should be negative if instructions should be marked as frame related,
5780 zero if %r11 register is live and cannot be freely used and positive
5784 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
, int style
)
5789 insn
= emit_insn (gen_pro_epilogue_adjust_stack_1 (dest
, src
, offset
));
5790 else if (x86_64_immediate_operand (offset
, DImode
))
5791 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest
, src
, offset
));
5795 /* r11 is used by indirect sibcall return as well, set before the
5796 epilogue and used after the epilogue. ATM indirect sibcall
5797 shouldn't be used together with huge frame sizes in one
5798 function because of the frame_size check in sibcall.c. */
5800 r11
= gen_rtx_REG (DImode
, R11_REG
);
5801 insn
= emit_insn (gen_rtx_SET (DImode
, r11
, offset
));
5803 RTX_FRAME_RELATED_P (insn
) = 1;
5804 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest
, src
, r11
,
5808 RTX_FRAME_RELATED_P (insn
) = 1;
5811 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5814 ix86_internal_arg_pointer (void)
5816 bool has_force_align_arg_pointer
=
5817 (0 != lookup_attribute (ix86_force_align_arg_pointer_string
,
5818 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))));
5819 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5820 && DECL_NAME (current_function_decl
)
5821 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
5822 && DECL_FILE_SCOPE_P (current_function_decl
))
5823 || ix86_force_align_arg_pointer
5824 || has_force_align_arg_pointer
)
5826 /* Nested functions can't realign the stack due to a register
5828 if (DECL_CONTEXT (current_function_decl
)
5829 && TREE_CODE (DECL_CONTEXT (current_function_decl
)) == FUNCTION_DECL
)
5831 if (ix86_force_align_arg_pointer
)
5832 warning (0, "-mstackrealign ignored for nested functions");
5833 if (has_force_align_arg_pointer
)
5834 error ("%s not supported for nested functions",
5835 ix86_force_align_arg_pointer_string
);
5836 return virtual_incoming_args_rtx
;
5838 cfun
->machine
->force_align_arg_pointer
= gen_rtx_REG (Pmode
, 2);
5839 return copy_to_reg (cfun
->machine
->force_align_arg_pointer
);
5842 return virtual_incoming_args_rtx
;
5845 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5846 This is called from dwarf2out.c to emit call frame instructions
5847 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5849 ix86_dwarf_handle_frame_unspec (const char *label
, rtx pattern
, int index
)
5851 rtx unspec
= SET_SRC (pattern
);
5852 gcc_assert (GET_CODE (unspec
) == UNSPEC
);
5856 case UNSPEC_REG_SAVE
:
5857 dwarf2out_reg_save_reg (label
, XVECEXP (unspec
, 0, 0),
5858 SET_DEST (pattern
));
5860 case UNSPEC_DEF_CFA
:
5861 dwarf2out_def_cfa (label
, REGNO (SET_DEST (pattern
)),
5862 INTVAL (XVECEXP (unspec
, 0, 0)));
5869 /* Expand the prologue into a bunch of separate insns. */
5872 ix86_expand_prologue (void)
5876 struct ix86_frame frame
;
5877 HOST_WIDE_INT allocate
;
5879 ix86_compute_frame_layout (&frame
);
5881 if (cfun
->machine
->force_align_arg_pointer
)
5885 /* Grab the argument pointer. */
5886 x
= plus_constant (stack_pointer_rtx
, 4);
5887 y
= cfun
->machine
->force_align_arg_pointer
;
5888 insn
= emit_insn (gen_rtx_SET (VOIDmode
, y
, x
));
5889 RTX_FRAME_RELATED_P (insn
) = 1;
5891 /* The unwind info consists of two parts: install the fafp as the cfa,
5892 and record the fafp as the "save register" of the stack pointer.
5893 The later is there in order that the unwinder can see where it
5894 should restore the stack pointer across the and insn. */
5895 x
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
), UNSPEC_DEF_CFA
);
5896 x
= gen_rtx_SET (VOIDmode
, y
, x
);
5897 RTX_FRAME_RELATED_P (x
) = 1;
5898 y
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, stack_pointer_rtx
),
5900 y
= gen_rtx_SET (VOIDmode
, cfun
->machine
->force_align_arg_pointer
, y
);
5901 RTX_FRAME_RELATED_P (y
) = 1;
5902 x
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
));
5903 x
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, x
, NULL
);
5904 REG_NOTES (insn
) = x
;
5906 /* Align the stack. */
5907 emit_insn (gen_andsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
5910 /* And here we cheat like madmen with the unwind info. We force the
5911 cfa register back to sp+4, which is exactly what it was at the
5912 start of the function. Re-pushing the return address results in
5913 the return at the same spot relative to the cfa, and thus is
5914 correct wrt the unwind info. */
5915 x
= cfun
->machine
->force_align_arg_pointer
;
5916 x
= gen_frame_mem (Pmode
, plus_constant (x
, -4));
5917 insn
= emit_insn (gen_push (x
));
5918 RTX_FRAME_RELATED_P (insn
) = 1;
5921 x
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, x
), UNSPEC_DEF_CFA
);
5922 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
5923 x
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, x
, NULL
);
5924 REG_NOTES (insn
) = x
;
5927 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5928 slower on all targets. Also sdb doesn't like it. */
5930 if (frame_pointer_needed
)
5932 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
5933 RTX_FRAME_RELATED_P (insn
) = 1;
5935 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
5936 RTX_FRAME_RELATED_P (insn
) = 1;
5939 allocate
= frame
.to_allocate
;
5941 if (!frame
.save_regs_using_mov
)
5942 ix86_emit_save_regs ();
5944 allocate
+= frame
.nregs
* UNITS_PER_WORD
;
5946 /* When using red zone we may start register saving before allocating
5947 the stack frame saving one cycle of the prologue. */
5948 if (TARGET_RED_ZONE
&& frame
.save_regs_using_mov
)
5949 ix86_emit_save_regs_using_mov (frame_pointer_needed
? hard_frame_pointer_rtx
5950 : stack_pointer_rtx
,
5951 -frame
.nregs
* UNITS_PER_WORD
);
5955 else if (! TARGET_STACK_PROBE
|| allocate
< CHECK_STACK_LIMIT
)
5956 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
5957 GEN_INT (-allocate
), -1);
5960 /* Only valid for Win32. */
5961 rtx eax
= gen_rtx_REG (Pmode
, 0);
5965 gcc_assert (!TARGET_64BIT
|| TARGET_64BIT_MS_ABI
);
5967 if (TARGET_64BIT_MS_ABI
)
5970 eax_live
= ix86_eax_live_at_start_p ();
5974 emit_insn (gen_push (eax
));
5975 allocate
-= UNITS_PER_WORD
;
5978 emit_move_insn (eax
, GEN_INT (allocate
));
5981 insn
= gen_allocate_stack_worker_64 (eax
);
5983 insn
= gen_allocate_stack_worker_32 (eax
);
5984 insn
= emit_insn (insn
);
5985 RTX_FRAME_RELATED_P (insn
) = 1;
5986 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (-allocate
));
5987 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
5988 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
5989 t
, REG_NOTES (insn
));
5993 if (frame_pointer_needed
)
5994 t
= plus_constant (hard_frame_pointer_rtx
,
5997 - frame
.nregs
* UNITS_PER_WORD
);
5999 t
= plus_constant (stack_pointer_rtx
, allocate
);
6000 emit_move_insn (eax
, gen_rtx_MEM (Pmode
, t
));
6004 if (frame
.save_regs_using_mov
&& !TARGET_RED_ZONE
)
6006 if (!frame_pointer_needed
|| !frame
.to_allocate
)
6007 ix86_emit_save_regs_using_mov (stack_pointer_rtx
, frame
.to_allocate
);
6009 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx
,
6010 -frame
.nregs
* UNITS_PER_WORD
);
6013 pic_reg_used
= false;
6014 if (pic_offset_table_rtx
6015 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
6016 || current_function_profile
))
6018 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
6020 if (alt_pic_reg_used
!= INVALID_REGNUM
)
6021 REGNO (pic_offset_table_rtx
) = alt_pic_reg_used
;
6023 pic_reg_used
= true;
6030 if (ix86_cmodel
== CM_LARGE_PIC
)
6032 rtx tmp_reg
= gen_rtx_REG (DImode
,
6033 FIRST_REX_INT_REG
+ 3 /* R11 */);
6034 rtx label
= gen_label_rtx ();
6036 LABEL_PRESERVE_P (label
) = 1;
6037 gcc_assert (REGNO (pic_offset_table_rtx
) != REGNO (tmp_reg
));
6038 insn
= emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx
, label
));
6039 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
6040 insn
= emit_insn (gen_set_got_offset_rex64 (tmp_reg
, label
));
6041 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
6042 insn
= emit_insn (gen_adddi3 (pic_offset_table_rtx
,
6043 pic_offset_table_rtx
, tmp_reg
));
6046 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
6049 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
6051 /* Even with accurate pre-reload life analysis, we can wind up
6052 deleting all references to the pic register after reload.
6053 Consider if cross-jumping unifies two sides of a branch
6054 controlled by a comparison vs the only read from a global.
6055 In which case, allow the set_got to be deleted, though we're
6056 too late to do anything about the ebx save in the prologue. */
6057 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
6060 /* Prevent function calls from be scheduled before the call to mcount.
6061 In the pic_reg_used case, make sure that the got load isn't deleted. */
6062 if (current_function_profile
)
6063 emit_insn (gen_blockage (pic_reg_used
? pic_offset_table_rtx
: const0_rtx
));
6066 /* Emit code to restore saved registers using MOV insns. First register
6067 is restored from POINTER + OFFSET. */
6069 ix86_emit_restore_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
,
6070 int maybe_eh_return
)
6073 rtx base_address
= gen_rtx_MEM (Pmode
, pointer
);
6075 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
6076 if (ix86_save_reg (regno
, maybe_eh_return
))
6078 /* Ensure that adjust_address won't be forced to produce pointer
6079 out of range allowed by x86-64 instruction set. */
6080 if (TARGET_64BIT
&& offset
!= trunc_int_for_mode (offset
, SImode
))
6084 r11
= gen_rtx_REG (DImode
, R11_REG
);
6085 emit_move_insn (r11
, GEN_INT (offset
));
6086 emit_insn (gen_adddi3 (r11
, r11
, pointer
));
6087 base_address
= gen_rtx_MEM (Pmode
, r11
);
6090 emit_move_insn (gen_rtx_REG (Pmode
, regno
),
6091 adjust_address (base_address
, Pmode
, offset
));
6092 offset
+= UNITS_PER_WORD
;
6096 /* Restore function stack, frame, and registers. */
6099 ix86_expand_epilogue (int style
)
6102 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
6103 struct ix86_frame frame
;
6104 HOST_WIDE_INT offset
;
6106 ix86_compute_frame_layout (&frame
);
6108 /* Calculate start of saved registers relative to ebp. Special care
6109 must be taken for the normal return case of a function using
6110 eh_return: the eax and edx registers are marked as saved, but not
6111 restored along this path. */
6112 offset
= frame
.nregs
;
6113 if (current_function_calls_eh_return
&& style
!= 2)
6115 offset
*= -UNITS_PER_WORD
;
6117 /* If we're only restoring one register and sp is not valid then
6118 using a move instruction to restore the register since it's
6119 less work than reloading sp and popping the register.
6121 The default code result in stack adjustment using add/lea instruction,
6122 while this code results in LEAVE instruction (or discrete equivalent),
6123 so it is profitable in some other cases as well. Especially when there
6124 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6125 and there is exactly one register to pop. This heuristic may need some
6126 tuning in future. */
6127 if ((!sp_valid
&& frame
.nregs
<= 1)
6128 || (TARGET_EPILOGUE_USING_MOVE
6129 && cfun
->machine
->use_fast_prologue_epilogue
6130 && (frame
.nregs
> 1 || frame
.to_allocate
))
6131 || (frame_pointer_needed
&& !frame
.nregs
&& frame
.to_allocate
)
6132 || (frame_pointer_needed
&& TARGET_USE_LEAVE
6133 && cfun
->machine
->use_fast_prologue_epilogue
6134 && frame
.nregs
== 1)
6135 || current_function_calls_eh_return
)
6137 /* Restore registers. We can use ebp or esp to address the memory
6138 locations. If both are available, default to ebp, since offsets
6139 are known to be small. Only exception is esp pointing directly to the
6140 end of block of saved registers, where we may simplify addressing
6143 if (!frame_pointer_needed
|| (sp_valid
&& !frame
.to_allocate
))
6144 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
,
6145 frame
.to_allocate
, style
== 2);
6147 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
,
6148 offset
, style
== 2);
6150 /* eh_return epilogues need %ecx added to the stack pointer. */
6153 rtx tmp
, sa
= EH_RETURN_STACKADJ_RTX
;
6155 if (frame_pointer_needed
)
6157 tmp
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
6158 tmp
= plus_constant (tmp
, UNITS_PER_WORD
);
6159 emit_insn (gen_rtx_SET (VOIDmode
, sa
, tmp
));
6161 tmp
= gen_rtx_MEM (Pmode
, hard_frame_pointer_rtx
);
6162 emit_move_insn (hard_frame_pointer_rtx
, tmp
);
6164 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
6169 tmp
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
6170 tmp
= plus_constant (tmp
, (frame
.to_allocate
6171 + frame
.nregs
* UNITS_PER_WORD
));
6172 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
));
6175 else if (!frame_pointer_needed
)
6176 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
6177 GEN_INT (frame
.to_allocate
6178 + frame
.nregs
* UNITS_PER_WORD
),
6180 /* If not an i386, mov & pop is faster than "leave". */
6181 else if (TARGET_USE_LEAVE
|| optimize_size
6182 || !cfun
->machine
->use_fast_prologue_epilogue
)
6183 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
6186 pro_epilogue_adjust_stack (stack_pointer_rtx
,
6187 hard_frame_pointer_rtx
,
6190 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
6192 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
6197 /* First step is to deallocate the stack frame so that we can
6198 pop the registers. */
6201 gcc_assert (frame_pointer_needed
);
6202 pro_epilogue_adjust_stack (stack_pointer_rtx
,
6203 hard_frame_pointer_rtx
,
6204 GEN_INT (offset
), style
);
6206 else if (frame
.to_allocate
)
6207 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
6208 GEN_INT (frame
.to_allocate
), style
);
6210 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
6211 if (ix86_save_reg (regno
, false))
6214 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode
, regno
)));
6216 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode
, regno
)));
6218 if (frame_pointer_needed
)
6220 /* Leave results in shorter dependency chains on CPUs that are
6221 able to grok it fast. */
6222 if (TARGET_USE_LEAVE
)
6223 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
6224 else if (TARGET_64BIT
)
6225 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
6227 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
6231 if (cfun
->machine
->force_align_arg_pointer
)
6233 emit_insn (gen_addsi3 (stack_pointer_rtx
,
6234 cfun
->machine
->force_align_arg_pointer
,
6238 /* Sibcall epilogues don't want a return instruction. */
6242 if (current_function_pops_args
&& current_function_args_size
)
6244 rtx popc
= GEN_INT (current_function_pops_args
);
6246 /* i386 can only pop 64K bytes. If asked to pop more, pop
6247 return address, do explicit add, and jump indirectly to the
6250 if (current_function_pops_args
>= 65536)
6252 rtx ecx
= gen_rtx_REG (SImode
, 2);
6254 /* There is no "pascal" calling convention in any 64bit ABI. */
6255 gcc_assert (!TARGET_64BIT
);
6257 emit_insn (gen_popsi1 (ecx
));
6258 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
6259 emit_jump_insn (gen_return_indirect_internal (ecx
));
6262 emit_jump_insn (gen_return_pop_internal (popc
));
6265 emit_jump_insn (gen_return_internal ());
6268 /* Reset from the function's potential modifications. */
6271 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
6272 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
6274 if (pic_offset_table_rtx
)
6275 REGNO (pic_offset_table_rtx
) = REAL_PIC_OFFSET_TABLE_REGNUM
;
6277 /* Mach-O doesn't support labels at the end of objects, so if
6278 it looks like we might want one, insert a NOP. */
6280 rtx insn
= get_last_insn ();
6283 && NOTE_KIND (insn
) != NOTE_INSN_DELETED_LABEL
)
6284 insn
= PREV_INSN (insn
);
6288 && NOTE_KIND (insn
) == NOTE_INSN_DELETED_LABEL
)))
6289 fputs ("\tnop\n", file
);
6295 /* Extract the parts of an RTL expression that is a valid memory address
6296 for an instruction. Return 0 if the structure of the address is
6297 grossly off. Return -1 if the address contains ASHIFT, so it is not
6298 strictly valid, but still used for computing length of lea instruction. */
6301 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
6303 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
6304 rtx base_reg
, index_reg
;
6305 HOST_WIDE_INT scale
= 1;
6306 rtx scale_rtx
= NULL_RTX
;
6308 enum ix86_address_seg seg
= SEG_DEFAULT
;
6310 if (REG_P (addr
) || GET_CODE (addr
) == SUBREG
)
6312 else if (GET_CODE (addr
) == PLUS
)
6322 addends
[n
++] = XEXP (op
, 1);
6325 while (GET_CODE (op
) == PLUS
);
6330 for (i
= n
; i
>= 0; --i
)
6333 switch (GET_CODE (op
))
6338 index
= XEXP (op
, 0);
6339 scale_rtx
= XEXP (op
, 1);
6343 if (XINT (op
, 1) == UNSPEC_TP
6344 && TARGET_TLS_DIRECT_SEG_REFS
6345 && seg
== SEG_DEFAULT
)
6346 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
6375 else if (GET_CODE (addr
) == MULT
)
6377 index
= XEXP (addr
, 0); /* index*scale */
6378 scale_rtx
= XEXP (addr
, 1);
6380 else if (GET_CODE (addr
) == ASHIFT
)
6384 /* We're called for lea too, which implements ashift on occasion. */
6385 index
= XEXP (addr
, 0);
6386 tmp
= XEXP (addr
, 1);
6387 if (!CONST_INT_P (tmp
))
6389 scale
= INTVAL (tmp
);
6390 if ((unsigned HOST_WIDE_INT
) scale
> 3)
6396 disp
= addr
; /* displacement */
6398 /* Extract the integral value of scale. */
6401 if (!CONST_INT_P (scale_rtx
))
6403 scale
= INTVAL (scale_rtx
);
6406 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
6407 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
6409 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6410 if (base_reg
&& index_reg
&& scale
== 1
6411 && (index_reg
== arg_pointer_rtx
6412 || index_reg
== frame_pointer_rtx
6413 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
6416 tmp
= base
, base
= index
, index
= tmp
;
6417 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
6420 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6421 if ((base_reg
== hard_frame_pointer_rtx
6422 || base_reg
== frame_pointer_rtx
6423 || base_reg
== arg_pointer_rtx
) && !disp
)
6426 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6427 Avoid this by transforming to [%esi+0]. */
6428 if (ix86_tune
== PROCESSOR_K6
&& !optimize_size
6429 && base_reg
&& !index_reg
&& !disp
6431 && REGNO_REG_CLASS (REGNO (base_reg
)) == SIREG
)
6434 /* Special case: encode reg+reg instead of reg*2. */
6435 if (!base
&& index
&& scale
&& scale
== 2)
6436 base
= index
, base_reg
= index_reg
, scale
= 1;
6438 /* Special case: scaling cannot be encoded without base or displacement. */
6439 if (!base
&& !disp
&& index
&& scale
!= 1)
6451 /* Return cost of the memory address x.
6452 For i386, it is better to use a complex address than let gcc copy
6453 the address into a reg and make a new pseudo. But not if the address
6454 requires to two regs - that would mean more pseudos with longer
6457 ix86_address_cost (rtx x
)
6459 struct ix86_address parts
;
6461 int ok
= ix86_decompose_address (x
, &parts
);
6465 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
6466 parts
.base
= SUBREG_REG (parts
.base
);
6467 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
6468 parts
.index
= SUBREG_REG (parts
.index
);
6470 /* More complex memory references are better. */
6471 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
6473 if (parts
.seg
!= SEG_DEFAULT
)
6476 /* Attempt to minimize number of registers in the address. */
6478 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
6480 && (!REG_P (parts
.index
)
6481 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
6485 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
6487 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
6488 && parts
.base
!= parts
.index
)
6491 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6492 since it's predecode logic can't detect the length of instructions
6493 and it degenerates to vector decoded. Increase cost of such
6494 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6495 to split such addresses or even refuse such addresses at all.
6497 Following addressing modes are affected:
6502 The first and last case may be avoidable by explicitly coding the zero in
6503 memory address, but I don't have AMD-K6 machine handy to check this
6507 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
6508 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
6509 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
6515 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6516 this is used for to form addresses to local data when -fPIC is in
6520 darwin_local_data_pic (rtx disp
)
6522 if (GET_CODE (disp
) == MINUS
)
6524 if (GET_CODE (XEXP (disp
, 0)) == LABEL_REF
6525 || GET_CODE (XEXP (disp
, 0)) == SYMBOL_REF
)
6526 if (GET_CODE (XEXP (disp
, 1)) == SYMBOL_REF
)
6528 const char *sym_name
= XSTR (XEXP (disp
, 1), 0);
6529 if (! strcmp (sym_name
, "<pic base>"))
6537 /* Determine if a given RTX is a valid constant. We already know this
6538 satisfies CONSTANT_P. */
6541 legitimate_constant_p (rtx x
)
6543 switch (GET_CODE (x
))
6548 if (GET_CODE (x
) == PLUS
)
6550 if (!CONST_INT_P (XEXP (x
, 1)))
6555 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
6558 /* Only some unspecs are valid as "constants". */
6559 if (GET_CODE (x
) == UNSPEC
)
6560 switch (XINT (x
, 1))
6565 return TARGET_64BIT
;
6568 x
= XVECEXP (x
, 0, 0);
6569 return (GET_CODE (x
) == SYMBOL_REF
6570 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
6572 x
= XVECEXP (x
, 0, 0);
6573 return (GET_CODE (x
) == SYMBOL_REF
6574 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
6579 /* We must have drilled down to a symbol. */
6580 if (GET_CODE (x
) == LABEL_REF
)
6582 if (GET_CODE (x
) != SYMBOL_REF
)
6587 /* TLS symbols are never valid. */
6588 if (SYMBOL_REF_TLS_MODEL (x
))
6591 /* DLLIMPORT symbols are never valid. */
6592 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
6593 && SYMBOL_REF_DLLIMPORT_P (x
))
6598 if (GET_MODE (x
) == TImode
6599 && x
!= CONST0_RTX (TImode
)
6605 if (x
== CONST0_RTX (GET_MODE (x
)))
6613 /* Otherwise we handle everything else in the move patterns. */
6617 /* Determine if it's legal to put X into the constant pool. This
6618 is not possible for the address of thread-local symbols, which
6619 is checked above. */
6622 ix86_cannot_force_const_mem (rtx x
)
6624 /* We can always put integral constants and vectors in memory. */
6625 switch (GET_CODE (x
))
6635 return !legitimate_constant_p (x
);
6638 /* Determine if a given RTX is a valid constant address. */
6641 constant_address_p (rtx x
)
6643 return CONSTANT_P (x
) && legitimate_address_p (Pmode
, x
, 1);
6646 /* Nonzero if the constant value X is a legitimate general operand
6647 when generating PIC code. It is given that flag_pic is on and
6648 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6651 legitimate_pic_operand_p (rtx x
)
6655 switch (GET_CODE (x
))
6658 inner
= XEXP (x
, 0);
6659 if (GET_CODE (inner
) == PLUS
6660 && CONST_INT_P (XEXP (inner
, 1)))
6661 inner
= XEXP (inner
, 0);
6663 /* Only some unspecs are valid as "constants". */
6664 if (GET_CODE (inner
) == UNSPEC
)
6665 switch (XINT (inner
, 1))
6670 return TARGET_64BIT
;
6672 x
= XVECEXP (inner
, 0, 0);
6673 return (GET_CODE (x
) == SYMBOL_REF
6674 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
6682 return legitimate_pic_address_disp_p (x
);
6689 /* Determine if a given CONST RTX is a valid memory displacement
6693 legitimate_pic_address_disp_p (rtx disp
)
6697 /* In 64bit mode we can allow direct addresses of symbols and labels
6698 when they are not dynamic symbols. */
6701 rtx op0
= disp
, op1
;
6703 switch (GET_CODE (disp
))
6709 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
6711 op0
= XEXP (XEXP (disp
, 0), 0);
6712 op1
= XEXP (XEXP (disp
, 0), 1);
6713 if (!CONST_INT_P (op1
)
6714 || INTVAL (op1
) >= 16*1024*1024
6715 || INTVAL (op1
) < -16*1024*1024)
6717 if (GET_CODE (op0
) == LABEL_REF
)
6719 if (GET_CODE (op0
) != SYMBOL_REF
)
6724 /* TLS references should always be enclosed in UNSPEC. */
6725 if (SYMBOL_REF_TLS_MODEL (op0
))
6727 if (!SYMBOL_REF_FAR_ADDR_P (op0
) && SYMBOL_REF_LOCAL_P (op0
)
6728 && ix86_cmodel
!= CM_LARGE_PIC
)
6736 if (GET_CODE (disp
) != CONST
)
6738 disp
= XEXP (disp
, 0);
6742 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6743 of GOT tables. We should not need these anyway. */
6744 if (GET_CODE (disp
) != UNSPEC
6745 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
6746 && XINT (disp
, 1) != UNSPEC_GOTOFF
6747 && XINT (disp
, 1) != UNSPEC_PLTOFF
))
6750 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
6751 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
6757 if (GET_CODE (disp
) == PLUS
)
6759 if (!CONST_INT_P (XEXP (disp
, 1)))
6761 disp
= XEXP (disp
, 0);
6765 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
6768 if (GET_CODE (disp
) != UNSPEC
)
6771 switch (XINT (disp
, 1))
6776 /* We need to check for both symbols and labels because VxWorks loads
6777 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
6779 return (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
6780 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
);
6782 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6783 While ABI specify also 32bit relocation but we don't produce it in
6784 small PIC model at all. */
6785 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
6786 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
6788 return gotoff_operand (XVECEXP (disp
, 0, 0), Pmode
);
6790 case UNSPEC_GOTTPOFF
:
6791 case UNSPEC_GOTNTPOFF
:
6792 case UNSPEC_INDNTPOFF
:
6795 disp
= XVECEXP (disp
, 0, 0);
6796 return (GET_CODE (disp
) == SYMBOL_REF
6797 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
6799 disp
= XVECEXP (disp
, 0, 0);
6800 return (GET_CODE (disp
) == SYMBOL_REF
6801 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
6803 disp
= XVECEXP (disp
, 0, 0);
6804 return (GET_CODE (disp
) == SYMBOL_REF
6805 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
6811 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6812 memory address for an instruction. The MODE argument is the machine mode
6813 for the MEM expression that wants to use this address.
6815 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6816 convert common non-canonical forms to canonical form so that they will
6820 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
6821 rtx addr
, int strict
)
6823 struct ix86_address parts
;
6824 rtx base
, index
, disp
;
6825 HOST_WIDE_INT scale
;
6826 const char *reason
= NULL
;
6827 rtx reason_rtx
= NULL_RTX
;
6829 if (ix86_decompose_address (addr
, &parts
) <= 0)
6831 reason
= "decomposition failed";
6836 index
= parts
.index
;
6838 scale
= parts
.scale
;
6840 /* Validate base register.
6842 Don't allow SUBREG's that span more than a word here. It can lead to spill
6843 failures when the base is one word out of a two word structure, which is
6844 represented internally as a DImode int. */
6853 else if (GET_CODE (base
) == SUBREG
6854 && REG_P (SUBREG_REG (base
))
6855 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base
)))
6857 reg
= SUBREG_REG (base
);
6860 reason
= "base is not a register";
6864 if (GET_MODE (base
) != Pmode
)
6866 reason
= "base is not in Pmode";
6870 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
6871 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
6873 reason
= "base is not valid";
6878 /* Validate index register.
6880 Don't allow SUBREG's that span more than a word here -- same as above. */
6889 else if (GET_CODE (index
) == SUBREG
6890 && REG_P (SUBREG_REG (index
))
6891 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index
)))
6893 reg
= SUBREG_REG (index
);
6896 reason
= "index is not a register";
6900 if (GET_MODE (index
) != Pmode
)
6902 reason
= "index is not in Pmode";
6906 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
6907 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
6909 reason
= "index is not valid";
6914 /* Validate scale factor. */
6917 reason_rtx
= GEN_INT (scale
);
6920 reason
= "scale without index";
6924 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
6926 reason
= "scale is not a valid multiplier";
6931 /* Validate displacement. */
6936 if (GET_CODE (disp
) == CONST
6937 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
)
6938 switch (XINT (XEXP (disp
, 0), 1))
6940 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6941 used. While ABI specify also 32bit relocations, we don't produce
6942 them at all and use IP relative instead. */
6945 gcc_assert (flag_pic
);
6947 goto is_legitimate_pic
;
6948 reason
= "64bit address unspec";
6951 case UNSPEC_GOTPCREL
:
6952 gcc_assert (flag_pic
);
6953 goto is_legitimate_pic
;
6955 case UNSPEC_GOTTPOFF
:
6956 case UNSPEC_GOTNTPOFF
:
6957 case UNSPEC_INDNTPOFF
:
6963 reason
= "invalid address unspec";
6967 else if (SYMBOLIC_CONST (disp
)
6971 && MACHOPIC_INDIRECT
6972 && !machopic_operand_p (disp
)
6978 if (TARGET_64BIT
&& (index
|| base
))
6980 /* foo@dtpoff(%rX) is ok. */
6981 if (GET_CODE (disp
) != CONST
6982 || GET_CODE (XEXP (disp
, 0)) != PLUS
6983 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
6984 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
6985 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
6986 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
6988 reason
= "non-constant pic memory reference";
6992 else if (! legitimate_pic_address_disp_p (disp
))
6994 reason
= "displacement is an invalid pic construct";
6998 /* This code used to verify that a symbolic pic displacement
6999 includes the pic_offset_table_rtx register.
7001 While this is good idea, unfortunately these constructs may
7002 be created by "adds using lea" optimization for incorrect
7011 This code is nonsensical, but results in addressing
7012 GOT table with pic_offset_table_rtx base. We can't
7013 just refuse it easily, since it gets matched by
7014 "addsi3" pattern, that later gets split to lea in the
7015 case output register differs from input. While this
7016 can be handled by separate addsi pattern for this case
7017 that never results in lea, this seems to be easier and
7018 correct fix for crash to disable this test. */
7020 else if (GET_CODE (disp
) != LABEL_REF
7021 && !CONST_INT_P (disp
)
7022 && (GET_CODE (disp
) != CONST
7023 || !legitimate_constant_p (disp
))
7024 && (GET_CODE (disp
) != SYMBOL_REF
7025 || !legitimate_constant_p (disp
)))
7027 reason
= "displacement is not constant";
7030 else if (TARGET_64BIT
7031 && !x86_64_immediate_operand (disp
, VOIDmode
))
7033 reason
= "displacement is out of range";
7038 /* Everything looks valid. */
7045 /* Return a unique alias set for the GOT. */
7047 static HOST_WIDE_INT
7048 ix86_GOT_alias_set (void)
7050 static HOST_WIDE_INT set
= -1;
7052 set
= new_alias_set ();
7056 /* Return a legitimate reference for ORIG (an address) using the
7057 register REG. If REG is 0, a new pseudo is generated.
7059 There are two types of references that must be handled:
7061 1. Global data references must load the address from the GOT, via
7062 the PIC reg. An insn is emitted to do this load, and the reg is
7065 2. Static data references, constant pool addresses, and code labels
7066 compute the address as an offset from the GOT, whose base is in
7067 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
7068 differentiate them from global data objects. The returned
7069 address is the PIC reg + an unspec constant.
7071 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7072 reg also appears in the address. */
7075 legitimize_pic_address (rtx orig
, rtx reg
)
7082 if (TARGET_MACHO
&& !TARGET_64BIT
)
7085 reg
= gen_reg_rtx (Pmode
);
7086 /* Use the generic Mach-O PIC machinery. */
7087 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
7091 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
7093 else if (TARGET_64BIT
7094 && ix86_cmodel
!= CM_SMALL_PIC
7095 && gotoff_operand (addr
, Pmode
))
7098 /* This symbol may be referenced via a displacement from the PIC
7099 base address (@GOTOFF). */
7101 if (reload_in_progress
)
7102 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7103 if (GET_CODE (addr
) == CONST
)
7104 addr
= XEXP (addr
, 0);
7105 if (GET_CODE (addr
) == PLUS
)
7107 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
7109 new = gen_rtx_PLUS (Pmode
, new, XEXP (addr
, 1));
7112 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
7113 new = gen_rtx_CONST (Pmode
, new);
7115 tmpreg
= gen_reg_rtx (Pmode
);
7118 emit_move_insn (tmpreg
, new);
7122 new = expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
7123 tmpreg
, 1, OPTAB_DIRECT
);
7126 else new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
7128 else if (!TARGET_64BIT
&& gotoff_operand (addr
, Pmode
))
7130 /* This symbol may be referenced via a displacement from the PIC
7131 base address (@GOTOFF). */
7133 if (reload_in_progress
)
7134 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7135 if (GET_CODE (addr
) == CONST
)
7136 addr
= XEXP (addr
, 0);
7137 if (GET_CODE (addr
) == PLUS
)
7139 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
7141 new = gen_rtx_PLUS (Pmode
, new, XEXP (addr
, 1));
7144 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
7145 new = gen_rtx_CONST (Pmode
, new);
7146 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
7150 emit_move_insn (reg
, new);
7154 else if ((GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
7155 /* We can't use @GOTOFF for text labels on VxWorks;
7156 see gotoff_operand. */
7157 || (TARGET_VXWORKS_RTP
&& GET_CODE (addr
) == LABEL_REF
))
7159 /* Given that we've already handled dllimport variables separately
7160 in legitimize_address, and all other variables should satisfy
7161 legitimate_pic_address_disp_p, we should never arrive here. */
7162 gcc_assert (!TARGET_64BIT_MS_ABI
);
7164 if (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
)
7166 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
7167 new = gen_rtx_CONST (Pmode
, new);
7168 new = gen_const_mem (Pmode
, new);
7169 set_mem_alias_set (new, ix86_GOT_alias_set ());
7172 reg
= gen_reg_rtx (Pmode
);
7173 /* Use directly gen_movsi, otherwise the address is loaded
7174 into register for CSE. We don't want to CSE this addresses,
7175 instead we CSE addresses from the GOT table, so skip this. */
7176 emit_insn (gen_movsi (reg
, new));
7181 /* This symbol must be referenced via a load from the
7182 Global Offset Table (@GOT). */
7184 if (reload_in_progress
)
7185 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7186 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
7187 new = gen_rtx_CONST (Pmode
, new);
7189 new = force_reg (Pmode
, new);
7190 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
7191 new = gen_const_mem (Pmode
, new);
7192 set_mem_alias_set (new, ix86_GOT_alias_set ());
7195 reg
= gen_reg_rtx (Pmode
);
7196 emit_move_insn (reg
, new);
7202 if (CONST_INT_P (addr
)
7203 && !x86_64_immediate_operand (addr
, VOIDmode
))
7207 emit_move_insn (reg
, addr
);
7211 new = force_reg (Pmode
, addr
);
7213 else if (GET_CODE (addr
) == CONST
)
7215 addr
= XEXP (addr
, 0);
7217 /* We must match stuff we generate before. Assume the only
7218 unspecs that can get here are ours. Not that we could do
7219 anything with them anyway.... */
7220 if (GET_CODE (addr
) == UNSPEC
7221 || (GET_CODE (addr
) == PLUS
7222 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
7224 gcc_assert (GET_CODE (addr
) == PLUS
);
7226 if (GET_CODE (addr
) == PLUS
)
7228 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
7230 /* Check first to see if this is a constant offset from a @GOTOFF
7231 symbol reference. */
7232 if (gotoff_operand (op0
, Pmode
)
7233 && CONST_INT_P (op1
))
7237 if (reload_in_progress
)
7238 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7239 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
7241 new = gen_rtx_PLUS (Pmode
, new, op1
);
7242 new = gen_rtx_CONST (Pmode
, new);
7243 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
7247 emit_move_insn (reg
, new);
7253 if (INTVAL (op1
) < -16*1024*1024
7254 || INTVAL (op1
) >= 16*1024*1024)
7256 if (!x86_64_immediate_operand (op1
, Pmode
))
7257 op1
= force_reg (Pmode
, op1
);
7258 new = gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
7264 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
7265 new = legitimize_pic_address (XEXP (addr
, 1),
7266 base
== reg
? NULL_RTX
: reg
);
7268 if (CONST_INT_P (new))
7269 new = plus_constant (base
, INTVAL (new));
7272 if (GET_CODE (new) == PLUS
&& CONSTANT_P (XEXP (new, 1)))
7274 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new, 0));
7275 new = XEXP (new, 1);
7277 new = gen_rtx_PLUS (Pmode
, base
, new);
7285 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7288 get_thread_pointer (int to_reg
)
7292 tp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
7296 reg
= gen_reg_rtx (Pmode
);
7297 insn
= gen_rtx_SET (VOIDmode
, reg
, tp
);
7298 insn
= emit_insn (insn
);
7303 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7304 false if we expect this to be used for a memory address and true if
7305 we expect to load the address into a register. */
7308 legitimize_tls_address (rtx x
, enum tls_model model
, int for_mov
)
7310 rtx dest
, base
, off
, pic
, tp
;
7315 case TLS_MODEL_GLOBAL_DYNAMIC
:
7316 dest
= gen_reg_rtx (Pmode
);
7317 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
7319 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
7321 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
;
7324 emit_call_insn (gen_tls_global_dynamic_64 (rax
, x
));
7325 insns
= get_insns ();
7328 CONST_OR_PURE_CALL_P (insns
) = 1;
7329 emit_libcall_block (insns
, dest
, rax
, x
);
7331 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
7332 emit_insn (gen_tls_global_dynamic_64 (dest
, x
));
7334 emit_insn (gen_tls_global_dynamic_32 (dest
, x
));
7336 if (TARGET_GNU2_TLS
)
7338 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
7340 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
7344 case TLS_MODEL_LOCAL_DYNAMIC
:
7345 base
= gen_reg_rtx (Pmode
);
7346 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
7348 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
7350 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
, note
;
7353 emit_call_insn (gen_tls_local_dynamic_base_64 (rax
));
7354 insns
= get_insns ();
7357 note
= gen_rtx_EXPR_LIST (VOIDmode
, const0_rtx
, NULL
);
7358 note
= gen_rtx_EXPR_LIST (VOIDmode
, ix86_tls_get_addr (), note
);
7359 CONST_OR_PURE_CALL_P (insns
) = 1;
7360 emit_libcall_block (insns
, base
, rax
, note
);
7362 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
7363 emit_insn (gen_tls_local_dynamic_base_64 (base
));
7365 emit_insn (gen_tls_local_dynamic_base_32 (base
));
7367 if (TARGET_GNU2_TLS
)
7369 rtx x
= ix86_tls_module_base ();
7371 set_unique_reg_note (get_last_insn (), REG_EQUIV
,
7372 gen_rtx_MINUS (Pmode
, x
, tp
));
7375 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
7376 off
= gen_rtx_CONST (Pmode
, off
);
7378 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
7380 if (TARGET_GNU2_TLS
)
7382 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
7384 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
7389 case TLS_MODEL_INITIAL_EXEC
:
7393 type
= UNSPEC_GOTNTPOFF
;
7397 if (reload_in_progress
)
7398 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7399 pic
= pic_offset_table_rtx
;
7400 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
7402 else if (!TARGET_ANY_GNU_TLS
)
7404 pic
= gen_reg_rtx (Pmode
);
7405 emit_insn (gen_set_got (pic
));
7406 type
= UNSPEC_GOTTPOFF
;
7411 type
= UNSPEC_INDNTPOFF
;
7414 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), type
);
7415 off
= gen_rtx_CONST (Pmode
, off
);
7417 off
= gen_rtx_PLUS (Pmode
, pic
, off
);
7418 off
= gen_const_mem (Pmode
, off
);
7419 set_mem_alias_set (off
, ix86_GOT_alias_set ());
7421 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7423 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
7424 off
= force_reg (Pmode
, off
);
7425 return gen_rtx_PLUS (Pmode
, base
, off
);
7429 base
= get_thread_pointer (true);
7430 dest
= gen_reg_rtx (Pmode
);
7431 emit_insn (gen_subsi3 (dest
, base
, off
));
7435 case TLS_MODEL_LOCAL_EXEC
:
7436 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
7437 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7438 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
7439 off
= gen_rtx_CONST (Pmode
, off
);
7441 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7443 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
7444 return gen_rtx_PLUS (Pmode
, base
, off
);
7448 base
= get_thread_pointer (true);
7449 dest
= gen_reg_rtx (Pmode
);
7450 emit_insn (gen_subsi3 (dest
, base
, off
));
7461 /* Create or return the unique __imp_DECL dllimport symbol corresponding
7464 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map
)))
7465 htab_t dllimport_map
;
7468 get_dllimport_decl (tree decl
)
7470 struct tree_map
*h
, in
;
7474 size_t namelen
, prefixlen
;
7480 dllimport_map
= htab_create_ggc (512, tree_map_hash
, tree_map_eq
, 0);
7482 in
.hash
= htab_hash_pointer (decl
);
7483 in
.base
.from
= decl
;
7484 loc
= htab_find_slot_with_hash (dllimport_map
, &in
, in
.hash
, INSERT
);
7489 *loc
= h
= ggc_alloc (sizeof (struct tree_map
));
7491 h
->base
.from
= decl
;
7492 h
->to
= to
= build_decl (VAR_DECL
, NULL
, ptr_type_node
);
7493 DECL_ARTIFICIAL (to
) = 1;
7494 DECL_IGNORED_P (to
) = 1;
7495 DECL_EXTERNAL (to
) = 1;
7496 TREE_READONLY (to
) = 1;
7498 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
7499 name
= targetm
.strip_name_encoding (name
);
7500 if (name
[0] == FASTCALL_PREFIX
)
7506 prefix
= "*__imp__";
7508 namelen
= strlen (name
);
7509 prefixlen
= strlen (prefix
);
7510 imp_name
= alloca (namelen
+ prefixlen
+ 1);
7511 memcpy (imp_name
, prefix
, prefixlen
);
7512 memcpy (imp_name
+ prefixlen
, name
, namelen
+ 1);
7514 name
= ggc_alloc_string (imp_name
, namelen
+ prefixlen
);
7515 rtl
= gen_rtx_SYMBOL_REF (Pmode
, name
);
7516 SET_SYMBOL_REF_DECL (rtl
, to
);
7517 SYMBOL_REF_FLAGS (rtl
) = SYMBOL_FLAG_LOCAL
;
7519 rtl
= gen_const_mem (Pmode
, rtl
);
7520 set_mem_alias_set (rtl
, ix86_GOT_alias_set ());
7522 SET_DECL_RTL (to
, rtl
);
7527 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
7528 true if we require the result be a register. */
7531 legitimize_dllimport_symbol (rtx symbol
, bool want_reg
)
7536 gcc_assert (SYMBOL_REF_DECL (symbol
));
7537 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
));
7539 x
= DECL_RTL (imp_decl
);
7541 x
= force_reg (Pmode
, x
);
7545 /* Try machine-dependent ways of modifying an illegitimate address
7546 to be legitimate. If we find one, return the new, valid address.
7547 This macro is used in only one place: `memory_address' in explow.c.
7549 OLDX is the address as it was before break_out_memory_refs was called.
7550 In some cases it is useful to look at this to decide what needs to be done.
7552 MODE and WIN are passed so that this macro can use
7553 GO_IF_LEGITIMATE_ADDRESS.
7555 It is always safe for this macro to do nothing. It exists to recognize
7556 opportunities to optimize the output.
7558 For the 80386, we handle X+REG by loading X into a register R and
7559 using R+REG. R will go in a general reg and indexing will be used.
7560 However, if REG is a broken-out memory address or multiplication,
7561 nothing needs to be done because REG can certainly go in a general reg.
7563 When -fpic is used, special handling is needed for symbolic references.
7564 See comments by legitimize_pic_address in i386.c for details. */
7567 legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
, enum machine_mode mode
)
7572 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
7574 return legitimize_tls_address (x
, log
, false);
7575 if (GET_CODE (x
) == CONST
7576 && GET_CODE (XEXP (x
, 0)) == PLUS
7577 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
7578 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
7580 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0), log
, false);
7581 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
7584 if (flag_pic
&& SYMBOLIC_CONST (x
))
7585 return legitimize_pic_address (x
, 0);
7587 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
7589 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (x
))
7590 return legitimize_dllimport_symbol (x
, true);
7591 if (GET_CODE (x
) == CONST
7592 && GET_CODE (XEXP (x
, 0)) == PLUS
7593 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
7594 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x
, 0), 0)))
7596 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (x
, 0), 0), true);
7597 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
7601 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7602 if (GET_CODE (x
) == ASHIFT
7603 && CONST_INT_P (XEXP (x
, 1))
7604 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
7607 log
= INTVAL (XEXP (x
, 1));
7608 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
7609 GEN_INT (1 << log
));
7612 if (GET_CODE (x
) == PLUS
)
7614 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7616 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
7617 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
7618 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
7621 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
7622 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
7623 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
7624 GEN_INT (1 << log
));
7627 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
7628 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
7629 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
7632 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
7633 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
7634 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
7635 GEN_INT (1 << log
));
7638 /* Put multiply first if it isn't already. */
7639 if (GET_CODE (XEXP (x
, 1)) == MULT
)
7641 rtx tmp
= XEXP (x
, 0);
7642 XEXP (x
, 0) = XEXP (x
, 1);
7647 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7648 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7649 created by virtual register instantiation, register elimination, and
7650 similar optimizations. */
7651 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
7654 x
= gen_rtx_PLUS (Pmode
,
7655 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
7656 XEXP (XEXP (x
, 1), 0)),
7657 XEXP (XEXP (x
, 1), 1));
7661 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7662 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7663 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
7664 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
7665 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
7666 && CONSTANT_P (XEXP (x
, 1)))
7669 rtx other
= NULL_RTX
;
7671 if (CONST_INT_P (XEXP (x
, 1)))
7673 constant
= XEXP (x
, 1);
7674 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
7676 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
7678 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
7679 other
= XEXP (x
, 1);
7687 x
= gen_rtx_PLUS (Pmode
,
7688 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
7689 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
7690 plus_constant (other
, INTVAL (constant
)));
7694 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
7697 if (GET_CODE (XEXP (x
, 0)) == MULT
)
7700 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
7703 if (GET_CODE (XEXP (x
, 1)) == MULT
)
7706 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
7710 && REG_P (XEXP (x
, 1))
7711 && REG_P (XEXP (x
, 0)))
7714 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
7717 x
= legitimize_pic_address (x
, 0);
7720 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
7723 if (REG_P (XEXP (x
, 0)))
7725 rtx temp
= gen_reg_rtx (Pmode
);
7726 rtx val
= force_operand (XEXP (x
, 1), temp
);
7728 emit_move_insn (temp
, val
);
7734 else if (REG_P (XEXP (x
, 1)))
7736 rtx temp
= gen_reg_rtx (Pmode
);
7737 rtx val
= force_operand (XEXP (x
, 0), temp
);
7739 emit_move_insn (temp
, val
);
7749 /* Print an integer constant expression in assembler syntax. Addition
7750 and subtraction are the only arithmetic that may appear in these
7751 expressions. FILE is the stdio stream to write to, X is the rtx, and
7752 CODE is the operand print code from the output string. */
7755 output_pic_addr_const (FILE *file
, rtx x
, int code
)
7759 switch (GET_CODE (x
))
7762 gcc_assert (flag_pic
);
7767 if (! TARGET_MACHO
|| TARGET_64BIT
)
7768 output_addr_const (file
, x
);
7771 const char *name
= XSTR (x
, 0);
7773 /* Mark the decl as referenced so that cgraph will
7774 output the function. */
7775 if (SYMBOL_REF_DECL (x
))
7776 mark_decl_referenced (SYMBOL_REF_DECL (x
));
7779 if (MACHOPIC_INDIRECT
7780 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
7781 name
= machopic_indirection_name (x
, /*stub_p=*/true);
7783 assemble_name (file
, name
);
7785 if (!TARGET_MACHO
&& !TARGET_64BIT_MS_ABI
7786 && code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
7787 fputs ("@PLT", file
);
7794 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
7795 assemble_name (asm_out_file
, buf
);
7799 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
7803 /* This used to output parentheses around the expression,
7804 but that does not work on the 386 (either ATT or BSD assembler). */
7805 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7809 if (GET_MODE (x
) == VOIDmode
)
7811 /* We can use %d if the number is <32 bits and positive. */
7812 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
7813 fprintf (file
, "0x%lx%08lx",
7814 (unsigned long) CONST_DOUBLE_HIGH (x
),
7815 (unsigned long) CONST_DOUBLE_LOW (x
));
7817 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
7820 /* We can't handle floating point constants;
7821 PRINT_OPERAND must handle them. */
7822 output_operand_lossage ("floating constant misused");
7826 /* Some assemblers need integer constants to appear first. */
7827 if (CONST_INT_P (XEXP (x
, 0)))
7829 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7831 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7835 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
7836 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7838 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7844 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
7845 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7847 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7849 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
7853 gcc_assert (XVECLEN (x
, 0) == 1);
7854 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
7855 switch (XINT (x
, 1))
7858 fputs ("@GOT", file
);
7861 fputs ("@GOTOFF", file
);
7864 fputs ("@PLTOFF", file
);
7866 case UNSPEC_GOTPCREL
:
7867 fputs ("@GOTPCREL(%rip)", file
);
7869 case UNSPEC_GOTTPOFF
:
7870 /* FIXME: This might be @TPOFF in Sun ld too. */
7871 fputs ("@GOTTPOFF", file
);
7874 fputs ("@TPOFF", file
);
7878 fputs ("@TPOFF", file
);
7880 fputs ("@NTPOFF", file
);
7883 fputs ("@DTPOFF", file
);
7885 case UNSPEC_GOTNTPOFF
:
7887 fputs ("@GOTTPOFF(%rip)", file
);
7889 fputs ("@GOTNTPOFF", file
);
7891 case UNSPEC_INDNTPOFF
:
7892 fputs ("@INDNTPOFF", file
);
7895 output_operand_lossage ("invalid UNSPEC as operand");
7901 output_operand_lossage ("invalid expression as operand");
7905 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7906 We need to emit DTP-relative relocations. */
7908 static void ATTRIBUTE_UNUSED
7909 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
7911 fputs (ASM_LONG
, file
);
7912 output_addr_const (file
, x
);
7913 fputs ("@DTPOFF", file
);
7919 fputs (", 0", file
);
7926 /* In the name of slightly smaller debug output, and to cater to
7927 general assembler lossage, recognize PIC+GOTOFF and turn it back
7928 into a direct symbol reference.
7930 On Darwin, this is necessary to avoid a crash, because Darwin
7931 has a different PIC label for each routine but the DWARF debugging
7932 information is not associated with any particular routine, so it's
7933 necessary to remove references to the PIC label from RTL stored by
7934 the DWARF output code. */
7937 ix86_delegitimize_address (rtx orig_x
)
7940 /* reg_addend is NULL or a multiple of some register. */
7941 rtx reg_addend
= NULL_RTX
;
7942 /* const_addend is NULL or a const_int. */
7943 rtx const_addend
= NULL_RTX
;
7944 /* This is the result, or NULL. */
7945 rtx result
= NULL_RTX
;
7952 if (GET_CODE (x
) != CONST
7953 || GET_CODE (XEXP (x
, 0)) != UNSPEC
7954 || XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
7957 return XVECEXP (XEXP (x
, 0), 0, 0);
7960 if (GET_CODE (x
) != PLUS
7961 || GET_CODE (XEXP (x
, 1)) != CONST
)
7964 if (REG_P (XEXP (x
, 0))
7965 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
7966 /* %ebx + GOT/GOTOFF */
7968 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
7970 /* %ebx + %reg * scale + GOT/GOTOFF */
7971 reg_addend
= XEXP (x
, 0);
7972 if (REG_P (XEXP (reg_addend
, 0))
7973 && REGNO (XEXP (reg_addend
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
7974 reg_addend
= XEXP (reg_addend
, 1);
7975 else if (REG_P (XEXP (reg_addend
, 1))
7976 && REGNO (XEXP (reg_addend
, 1)) == PIC_OFFSET_TABLE_REGNUM
)
7977 reg_addend
= XEXP (reg_addend
, 0);
7980 if (!REG_P (reg_addend
)
7981 && GET_CODE (reg_addend
) != MULT
7982 && GET_CODE (reg_addend
) != ASHIFT
)
7988 x
= XEXP (XEXP (x
, 1), 0);
7989 if (GET_CODE (x
) == PLUS
7990 && CONST_INT_P (XEXP (x
, 1)))
7992 const_addend
= XEXP (x
, 1);
7996 if (GET_CODE (x
) == UNSPEC
7997 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
))
7998 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))))
7999 result
= XVECEXP (x
, 0, 0);
8001 if (TARGET_MACHO
&& darwin_local_data_pic (x
)
8003 result
= XEXP (x
, 0);
8009 result
= gen_rtx_PLUS (Pmode
, result
, const_addend
);
8011 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
8015 /* If X is a machine specific address (i.e. a symbol or label being
8016 referenced as a displacement from the GOT implemented using an
8017 UNSPEC), then return the base term. Otherwise return X. */
8020 ix86_find_base_term (rtx x
)
8026 if (GET_CODE (x
) != CONST
)
8029 if (GET_CODE (term
) == PLUS
8030 && (CONST_INT_P (XEXP (term
, 1))
8031 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
8032 term
= XEXP (term
, 0);
8033 if (GET_CODE (term
) != UNSPEC
8034 || XINT (term
, 1) != UNSPEC_GOTPCREL
)
8037 term
= XVECEXP (term
, 0, 0);
8039 if (GET_CODE (term
) != SYMBOL_REF
8040 && GET_CODE (term
) != LABEL_REF
)
8046 term
= ix86_delegitimize_address (x
);
8048 if (GET_CODE (term
) != SYMBOL_REF
8049 && GET_CODE (term
) != LABEL_REF
)
8056 put_condition_code (enum rtx_code code
, enum machine_mode mode
, int reverse
,
8061 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
8063 enum rtx_code second_code
, bypass_code
;
8064 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
8065 gcc_assert (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
);
8066 code
= ix86_fp_compare_code_to_integer (code
);
8070 code
= reverse_condition (code
);
8081 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
8085 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
8086 Those same assemblers have the same but opposite lossage on cmov. */
8087 gcc_assert (mode
== CCmode
);
8088 suffix
= fp
? "nbe" : "a";
8108 gcc_assert (mode
== CCmode
);
8130 gcc_assert (mode
== CCmode
);
8131 suffix
= fp
? "nb" : "ae";
8134 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
8138 gcc_assert (mode
== CCmode
);
8142 suffix
= fp
? "u" : "p";
8145 suffix
= fp
? "nu" : "np";
8150 fputs (suffix
, file
);
8153 /* Print the name of register X to FILE based on its machine mode and number.
8154 If CODE is 'w', pretend the mode is HImode.
8155 If CODE is 'b', pretend the mode is QImode.
8156 If CODE is 'k', pretend the mode is SImode.
8157 If CODE is 'q', pretend the mode is DImode.
8158 If CODE is 'h', pretend the reg is the 'high' byte register.
8159 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8162 print_reg (rtx x
, int code
, FILE *file
)
8164 gcc_assert (REGNO (x
) != ARG_POINTER_REGNUM
8165 && REGNO (x
) != FRAME_POINTER_REGNUM
8166 && REGNO (x
) != FLAGS_REG
8167 && REGNO (x
) != FPSR_REG
8168 && REGNO (x
) != FPCR_REG
);
8170 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
8173 if (code
== 'w' || MMX_REG_P (x
))
8175 else if (code
== 'b')
8177 else if (code
== 'k')
8179 else if (code
== 'q')
8181 else if (code
== 'y')
8183 else if (code
== 'h')
8186 code
= GET_MODE_SIZE (GET_MODE (x
));
8188 /* Irritatingly, AMD extended registers use different naming convention
8189 from the normal registers. */
8190 if (REX_INT_REG_P (x
))
8192 gcc_assert (TARGET_64BIT
);
8196 error ("extended registers have no high halves");
8199 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8202 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8205 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8208 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8211 error ("unsupported operand size for extended register");
8219 if (STACK_TOP_P (x
))
8221 fputs ("st(0)", file
);
8228 if (! ANY_FP_REG_P (x
))
8229 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
8234 fputs (hi_reg_name
[REGNO (x
)], file
);
8237 if (REGNO (x
) >= ARRAY_SIZE (qi_reg_name
))
8239 fputs (qi_reg_name
[REGNO (x
)], file
);
8242 if (REGNO (x
) >= ARRAY_SIZE (qi_high_reg_name
))
8244 fputs (qi_high_reg_name
[REGNO (x
)], file
);
8251 /* Locate some local-dynamic symbol still in use by this function
8252 so that we can print its name in some tls_local_dynamic_base
8256 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
8260 if (GET_CODE (x
) == SYMBOL_REF
8261 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
8263 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
8271 get_some_local_dynamic_name (void)
8275 if (cfun
->machine
->some_ld_name
)
8276 return cfun
->machine
->some_ld_name
;
8278 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
8280 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
8281 return cfun
->machine
->some_ld_name
;
8287 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8288 C -- print opcode suffix for set/cmov insn.
8289 c -- like C, but print reversed condition
8290 F,f -- likewise, but for floating-point.
8291 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8293 R -- print the prefix for register names.
8294 z -- print the opcode suffix for the size of the current operand.
8295 * -- print a star (in certain assembler syntax)
8296 A -- print an absolute memory reference.
8297 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8298 s -- print a shift double count, followed by the assemblers argument
8300 b -- print the QImode name of the register for the indicated operand.
8301 %b0 would print %al if operands[0] is reg 0.
8302 w -- likewise, print the HImode name of the register.
8303 k -- likewise, print the SImode name of the register.
8304 q -- likewise, print the DImode name of the register.
8305 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8306 y -- print "st(0)" instead of "st" as a register.
8307 D -- print condition for SSE cmp instruction.
8308 P -- if PIC, print an @PLT suffix.
8309 X -- don't print any sort of PIC '@' suffix for a symbol.
8310 & -- print some in-use local-dynamic symbol name.
8311 H -- print a memory address offset by 8; used for sse high-parts
8315 print_operand (FILE *file
, rtx x
, int code
)
8322 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8327 assemble_name (file
, get_some_local_dynamic_name ());
8331 switch (ASSEMBLER_DIALECT
)
8338 /* Intel syntax. For absolute addresses, registers should not
8339 be surrounded by braces. */
8343 PRINT_OPERAND (file
, x
, 0);
8353 PRINT_OPERAND (file
, x
, 0);
8358 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8363 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8368 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8373 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8378 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8383 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8388 /* 387 opcodes don't get size suffixes if the operands are
8390 if (STACK_REG_P (x
))
8393 /* Likewise if using Intel opcodes. */
8394 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
8397 /* This is the size of op from size of operand. */
8398 switch (GET_MODE_SIZE (GET_MODE (x
)))
8407 #ifdef HAVE_GAS_FILDS_FISTS
8417 if (GET_MODE (x
) == SFmode
)
8432 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
8434 #ifdef GAS_MNEMONICS
8460 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
8462 PRINT_OPERAND (file
, x
, 0);
8468 /* Little bit of braindamage here. The SSE compare instructions
8469 does use completely different names for the comparisons that the
8470 fp conditional moves. */
8471 switch (GET_CODE (x
))
8486 fputs ("unord", file
);
8490 fputs ("neq", file
);
8494 fputs ("nlt", file
);
8498 fputs ("nle", file
);
8501 fputs ("ord", file
);
8508 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8509 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8511 switch (GET_MODE (x
))
8513 case HImode
: putc ('w', file
); break;
8515 case SFmode
: putc ('l', file
); break;
8517 case DFmode
: putc ('q', file
); break;
8518 default: gcc_unreachable ();
8525 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
8528 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8529 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8532 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
8535 /* Like above, but reverse condition */
8537 /* Check to see if argument to %c is really a constant
8538 and not a condition code which needs to be reversed. */
8539 if (!COMPARISON_P (x
))
8541 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8544 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
8547 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8548 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8551 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
8555 /* It doesn't actually matter what mode we use here, as we're
8556 only going to use this for printing. */
8557 x
= adjust_address_nv (x
, DImode
, 8);
8564 if (!optimize
|| optimize_size
|| !TARGET_BRANCH_PREDICTION_HINTS
)
8567 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
8570 int pred_val
= INTVAL (XEXP (x
, 0));
8572 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
8573 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
8575 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
8576 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
8578 /* Emit hints only in the case default branch prediction
8579 heuristics would fail. */
8580 if (taken
!= cputaken
)
8582 /* We use 3e (DS) prefix for taken branches and
8583 2e (CS) prefix for not taken branches. */
8585 fputs ("ds ; ", file
);
8587 fputs ("cs ; ", file
);
8594 output_operand_lossage ("invalid operand code '%c'", code
);
8599 print_reg (x
, code
, file
);
8603 /* No `byte ptr' prefix for call instructions. */
8604 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P')
8607 switch (GET_MODE_SIZE (GET_MODE (x
)))
8609 case 1: size
= "BYTE"; break;
8610 case 2: size
= "WORD"; break;
8611 case 4: size
= "DWORD"; break;
8612 case 8: size
= "QWORD"; break;
8613 case 12: size
= "XWORD"; break;
8614 case 16: size
= "XMMWORD"; break;
8619 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8622 else if (code
== 'w')
8624 else if (code
== 'k')
8628 fputs (" PTR ", file
);
8632 /* Avoid (%rip) for call operands. */
8633 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
8634 && !CONST_INT_P (x
))
8635 output_addr_const (file
, x
);
8636 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
8637 output_operand_lossage ("invalid constraints for operand");
8642 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
8647 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8648 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
8650 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8652 fprintf (file
, "0x%08lx", l
);
8655 /* These float cases don't actually occur as immediate operands. */
8656 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
8660 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
8661 fprintf (file
, "%s", dstr
);
8664 else if (GET_CODE (x
) == CONST_DOUBLE
8665 && GET_MODE (x
) == XFmode
)
8669 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
8670 fprintf (file
, "%s", dstr
);
8675 /* We have patterns that allow zero sets of memory, for instance.
8676 In 64-bit mode, we should probably support all 8-byte vectors,
8677 since we can in fact encode that into an immediate. */
8678 if (GET_CODE (x
) == CONST_VECTOR
)
8680 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
8686 if (CONST_INT_P (x
) || GET_CODE (x
) == CONST_DOUBLE
)
8688 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8691 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
8692 || GET_CODE (x
) == LABEL_REF
)
8694 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8697 fputs ("OFFSET FLAT:", file
);
8700 if (CONST_INT_P (x
))
8701 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
8703 output_pic_addr_const (file
, x
, code
);
8705 output_addr_const (file
, x
);
8709 /* Print a memory operand whose address is ADDR. */
8712 print_operand_address (FILE *file
, rtx addr
)
8714 struct ix86_address parts
;
8715 rtx base
, index
, disp
;
8717 int ok
= ix86_decompose_address (addr
, &parts
);
8722 index
= parts
.index
;
8724 scale
= parts
.scale
;
8732 if (USER_LABEL_PREFIX
[0] == 0)
8734 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
8740 if (!base
&& !index
)
8742 /* Displacement only requires special attention. */
8744 if (CONST_INT_P (disp
))
8746 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
8748 if (USER_LABEL_PREFIX
[0] == 0)
8750 fputs ("ds:", file
);
8752 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
8755 output_pic_addr_const (file
, disp
, 0);
8757 output_addr_const (file
, disp
);
8759 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8762 if (GET_CODE (disp
) == CONST
8763 && GET_CODE (XEXP (disp
, 0)) == PLUS
8764 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
8765 disp
= XEXP (XEXP (disp
, 0), 0);
8766 if (GET_CODE (disp
) == LABEL_REF
8767 || (GET_CODE (disp
) == SYMBOL_REF
8768 && SYMBOL_REF_TLS_MODEL (disp
) == 0))
8769 fputs ("(%rip)", file
);
8774 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8779 output_pic_addr_const (file
, disp
, 0);
8780 else if (GET_CODE (disp
) == LABEL_REF
)
8781 output_asm_label (disp
);
8783 output_addr_const (file
, disp
);
8788 print_reg (base
, 0, file
);
8792 print_reg (index
, 0, file
);
8794 fprintf (file
, ",%d", scale
);
8800 rtx offset
= NULL_RTX
;
8804 /* Pull out the offset of a symbol; print any symbol itself. */
8805 if (GET_CODE (disp
) == CONST
8806 && GET_CODE (XEXP (disp
, 0)) == PLUS
8807 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
8809 offset
= XEXP (XEXP (disp
, 0), 1);
8810 disp
= gen_rtx_CONST (VOIDmode
,
8811 XEXP (XEXP (disp
, 0), 0));
8815 output_pic_addr_const (file
, disp
, 0);
8816 else if (GET_CODE (disp
) == LABEL_REF
)
8817 output_asm_label (disp
);
8818 else if (CONST_INT_P (disp
))
8821 output_addr_const (file
, disp
);
8827 print_reg (base
, 0, file
);
8830 if (INTVAL (offset
) >= 0)
8832 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
8836 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
8843 print_reg (index
, 0, file
);
8845 fprintf (file
, "*%d", scale
);
8853 output_addr_const_extra (FILE *file
, rtx x
)
8857 if (GET_CODE (x
) != UNSPEC
)
8860 op
= XVECEXP (x
, 0, 0);
8861 switch (XINT (x
, 1))
8863 case UNSPEC_GOTTPOFF
:
8864 output_addr_const (file
, op
);
8865 /* FIXME: This might be @TPOFF in Sun ld. */
8866 fputs ("@GOTTPOFF", file
);
8869 output_addr_const (file
, op
);
8870 fputs ("@TPOFF", file
);
8873 output_addr_const (file
, op
);
8875 fputs ("@TPOFF", file
);
8877 fputs ("@NTPOFF", file
);
8880 output_addr_const (file
, op
);
8881 fputs ("@DTPOFF", file
);
8883 case UNSPEC_GOTNTPOFF
:
8884 output_addr_const (file
, op
);
8886 fputs ("@GOTTPOFF(%rip)", file
);
8888 fputs ("@GOTNTPOFF", file
);
8890 case UNSPEC_INDNTPOFF
:
8891 output_addr_const (file
, op
);
8892 fputs ("@INDNTPOFF", file
);
8902 /* Split one or more DImode RTL references into pairs of SImode
8903 references. The RTL can be REG, offsettable MEM, integer constant, or
8904 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8905 split and "num" is its length. lo_half and hi_half are output arrays
8906 that parallel "operands". */
8909 split_di (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
8913 rtx op
= operands
[num
];
8915 /* simplify_subreg refuse to split volatile memory addresses,
8916 but we still have to handle it. */
8919 lo_half
[num
] = adjust_address (op
, SImode
, 0);
8920 hi_half
[num
] = adjust_address (op
, SImode
, 4);
8924 lo_half
[num
] = simplify_gen_subreg (SImode
, op
,
8925 GET_MODE (op
) == VOIDmode
8926 ? DImode
: GET_MODE (op
), 0);
8927 hi_half
[num
] = simplify_gen_subreg (SImode
, op
,
8928 GET_MODE (op
) == VOIDmode
8929 ? DImode
: GET_MODE (op
), 4);
8933 /* Split one or more TImode RTL references into pairs of DImode
8934 references. The RTL can be REG, offsettable MEM, integer constant, or
8935 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8936 split and "num" is its length. lo_half and hi_half are output arrays
8937 that parallel "operands". */
8940 split_ti (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
8944 rtx op
= operands
[num
];
8946 /* simplify_subreg refuse to split volatile memory addresses, but we
8947 still have to handle it. */
8950 lo_half
[num
] = adjust_address (op
, DImode
, 0);
8951 hi_half
[num
] = adjust_address (op
, DImode
, 8);
8955 lo_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 0);
8956 hi_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 8);
8961 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8962 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8963 is the expression of the binary operation. The output may either be
8964 emitted here, or returned to the caller, like all output_* functions.
8966 There is no guarantee that the operands are the same mode, as they
8967 might be within FLOAT or FLOAT_EXTEND expressions. */
8969 #ifndef SYSV386_COMPAT
8970 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8971 wants to fix the assemblers because that causes incompatibility
8972 with gcc. No-one wants to fix gcc because that causes
8973 incompatibility with assemblers... You can use the option of
8974 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8975 #define SYSV386_COMPAT 1
8979 output_387_binary_op (rtx insn
, rtx
*operands
)
8981 static char buf
[30];
8984 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
8986 #ifdef ENABLE_CHECKING
8987 /* Even if we do not want to check the inputs, this documents input
8988 constraints. Which helps in understanding the following code. */
8989 if (STACK_REG_P (operands
[0])
8990 && ((REG_P (operands
[1])
8991 && REGNO (operands
[0]) == REGNO (operands
[1])
8992 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
8993 || (REG_P (operands
[2])
8994 && REGNO (operands
[0]) == REGNO (operands
[2])
8995 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
8996 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
8999 gcc_assert (is_sse
);
9002 switch (GET_CODE (operands
[3]))
9005 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
9006 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
9014 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
9015 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
9023 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
9024 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
9032 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
9033 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
9047 if (GET_MODE (operands
[0]) == SFmode
)
9048 strcat (buf
, "ss\t{%2, %0|%0, %2}");
9050 strcat (buf
, "sd\t{%2, %0|%0, %2}");
9055 switch (GET_CODE (operands
[3]))
9059 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
9061 rtx temp
= operands
[2];
9062 operands
[2] = operands
[1];
9066 /* know operands[0] == operands[1]. */
9068 if (MEM_P (operands
[2]))
9074 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
9076 if (STACK_TOP_P (operands
[0]))
9077 /* How is it that we are storing to a dead operand[2]?
9078 Well, presumably operands[1] is dead too. We can't
9079 store the result to st(0) as st(0) gets popped on this
9080 instruction. Instead store to operands[2] (which I
9081 think has to be st(1)). st(1) will be popped later.
9082 gcc <= 2.8.1 didn't have this check and generated
9083 assembly code that the Unixware assembler rejected. */
9084 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9086 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9090 if (STACK_TOP_P (operands
[0]))
9091 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9093 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9098 if (MEM_P (operands
[1]))
9104 if (MEM_P (operands
[2]))
9110 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
9113 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
9114 derived assemblers, confusingly reverse the direction of
9115 the operation for fsub{r} and fdiv{r} when the
9116 destination register is not st(0). The Intel assembler
9117 doesn't have this brain damage. Read !SYSV386_COMPAT to
9118 figure out what the hardware really does. */
9119 if (STACK_TOP_P (operands
[0]))
9120 p
= "{p\t%0, %2|rp\t%2, %0}";
9122 p
= "{rp\t%2, %0|p\t%0, %2}";
9124 if (STACK_TOP_P (operands
[0]))
9125 /* As above for fmul/fadd, we can't store to st(0). */
9126 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9128 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9133 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
9136 if (STACK_TOP_P (operands
[0]))
9137 p
= "{rp\t%0, %1|p\t%1, %0}";
9139 p
= "{p\t%1, %0|rp\t%0, %1}";
9141 if (STACK_TOP_P (operands
[0]))
9142 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9144 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9149 if (STACK_TOP_P (operands
[0]))
9151 if (STACK_TOP_P (operands
[1]))
9152 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9154 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9157 else if (STACK_TOP_P (operands
[1]))
9160 p
= "{\t%1, %0|r\t%0, %1}";
9162 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9168 p
= "{r\t%2, %0|\t%0, %2}";
9170 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9183 /* Return needed mode for entity in optimize_mode_switching pass. */
9186 ix86_mode_needed (int entity
, rtx insn
)
9188 enum attr_i387_cw mode
;
9190 /* The mode UNINITIALIZED is used to store control word after a
9191 function call or ASM pattern. The mode ANY specify that function
9192 has no requirements on the control word and make no changes in the
9193 bits we are interested in. */
9196 || (NONJUMP_INSN_P (insn
)
9197 && (asm_noperands (PATTERN (insn
)) >= 0
9198 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
9199 return I387_CW_UNINITIALIZED
;
9201 if (recog_memoized (insn
) < 0)
9204 mode
= get_attr_i387_cw (insn
);
9209 if (mode
== I387_CW_TRUNC
)
9214 if (mode
== I387_CW_FLOOR
)
9219 if (mode
== I387_CW_CEIL
)
9224 if (mode
== I387_CW_MASK_PM
)
9235 /* Output code to initialize control word copies used by trunc?f?i and
9236 rounding patterns. CURRENT_MODE is set to current control word,
9237 while NEW_MODE is set to new control word. */
9240 emit_i387_cw_initialization (int mode
)
9242 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
9247 rtx reg
= gen_reg_rtx (HImode
);
9249 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
9250 emit_move_insn (reg
, copy_rtx (stored_mode
));
9252 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
|| optimize_size
)
9257 /* round toward zero (truncate) */
9258 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
9259 slot
= SLOT_CW_TRUNC
;
9263 /* round down toward -oo */
9264 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
9265 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
9266 slot
= SLOT_CW_FLOOR
;
9270 /* round up toward +oo */
9271 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
9272 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
9273 slot
= SLOT_CW_CEIL
;
9276 case I387_CW_MASK_PM
:
9277 /* mask precision exception for nearbyint() */
9278 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
9279 slot
= SLOT_CW_MASK_PM
;
9291 /* round toward zero (truncate) */
9292 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
9293 slot
= SLOT_CW_TRUNC
;
9297 /* round down toward -oo */
9298 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
9299 slot
= SLOT_CW_FLOOR
;
9303 /* round up toward +oo */
9304 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
9305 slot
= SLOT_CW_CEIL
;
9308 case I387_CW_MASK_PM
:
9309 /* mask precision exception for nearbyint() */
9310 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
9311 slot
= SLOT_CW_MASK_PM
;
9319 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
9321 new_mode
= assign_386_stack_local (HImode
, slot
);
9322 emit_move_insn (new_mode
, reg
);
9325 /* Output code for INSN to convert a float to a signed int. OPERANDS
9326 are the insn operands. The output may be [HSD]Imode and the input
9327 operand may be [SDX]Fmode. */
9330 output_fix_trunc (rtx insn
, rtx
*operands
, int fisttp
)
9332 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
9333 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
9334 int round_mode
= get_attr_i387_cw (insn
);
9336 /* Jump through a hoop or two for DImode, since the hardware has no
9337 non-popping instruction. We used to do this a different way, but
9338 that was somewhat fragile and broke with post-reload splitters. */
9339 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
9340 output_asm_insn ("fld\t%y1", operands
);
9342 gcc_assert (STACK_TOP_P (operands
[1]));
9343 gcc_assert (MEM_P (operands
[0]));
9344 gcc_assert (GET_MODE (operands
[1]) != TFmode
);
9347 output_asm_insn ("fisttp%z0\t%0", operands
);
9350 if (round_mode
!= I387_CW_ANY
)
9351 output_asm_insn ("fldcw\t%3", operands
);
9352 if (stack_top_dies
|| dimode_p
)
9353 output_asm_insn ("fistp%z0\t%0", operands
);
9355 output_asm_insn ("fist%z0\t%0", operands
);
9356 if (round_mode
!= I387_CW_ANY
)
9357 output_asm_insn ("fldcw\t%2", operands
);
9363 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9364 have the values zero or one, indicates the ffreep insn's operand
9365 from the OPERANDS array. */
9368 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
9370 if (TARGET_USE_FFREEP
)
9371 #if HAVE_AS_IX86_FFREEP
9372 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
9375 static char retval
[] = ".word\t0xc_df";
9376 int regno
= REGNO (operands
[opno
]);
9378 gcc_assert (FP_REGNO_P (regno
));
9380 retval
[9] = '0' + (regno
- FIRST_STACK_REG
);
9385 return opno
? "fstp\t%y1" : "fstp\t%y0";
9389 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9390 should be used. UNORDERED_P is true when fucom should be used. */
9393 output_fp_compare (rtx insn
, rtx
*operands
, int eflags_p
, int unordered_p
)
9396 rtx cmp_op0
, cmp_op1
;
9397 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
9401 cmp_op0
= operands
[0];
9402 cmp_op1
= operands
[1];
9406 cmp_op0
= operands
[1];
9407 cmp_op1
= operands
[2];
9412 if (GET_MODE (operands
[0]) == SFmode
)
9414 return "ucomiss\t{%1, %0|%0, %1}";
9416 return "comiss\t{%1, %0|%0, %1}";
9419 return "ucomisd\t{%1, %0|%0, %1}";
9421 return "comisd\t{%1, %0|%0, %1}";
9424 gcc_assert (STACK_TOP_P (cmp_op0
));
9426 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
9428 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
9432 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
9433 return output_387_ffreep (operands
, 1);
9436 return "ftst\n\tfnstsw\t%0";
9439 if (STACK_REG_P (cmp_op1
)
9441 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
9442 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
9444 /* If both the top of the 387 stack dies, and the other operand
9445 is also a stack register that dies, then this must be a
9446 `fcompp' float compare */
9450 /* There is no double popping fcomi variant. Fortunately,
9451 eflags is immune from the fstp's cc clobbering. */
9453 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
9455 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
9456 return output_387_ffreep (operands
, 0);
9461 return "fucompp\n\tfnstsw\t%0";
9463 return "fcompp\n\tfnstsw\t%0";
9468 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9470 static const char * const alt
[16] =
9472 "fcom%z2\t%y2\n\tfnstsw\t%0",
9473 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9474 "fucom%z2\t%y2\n\tfnstsw\t%0",
9475 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9477 "ficom%z2\t%y2\n\tfnstsw\t%0",
9478 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9482 "fcomi\t{%y1, %0|%0, %y1}",
9483 "fcomip\t{%y1, %0|%0, %y1}",
9484 "fucomi\t{%y1, %0|%0, %y1}",
9485 "fucomip\t{%y1, %0|%0, %y1}",
9496 mask
= eflags_p
<< 3;
9497 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
9498 mask
|= unordered_p
<< 1;
9499 mask
|= stack_top_dies
;
9501 gcc_assert (mask
< 16);
9510 ix86_output_addr_vec_elt (FILE *file
, int value
)
9512 const char *directive
= ASM_LONG
;
9516 directive
= ASM_QUAD
;
9518 gcc_assert (!TARGET_64BIT
);
9521 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
9525 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
9527 const char *directive
= ASM_LONG
;
9530 if (TARGET_64BIT
&& CASE_VECTOR_MODE
== DImode
)
9531 directive
= ASM_QUAD
;
9533 gcc_assert (!TARGET_64BIT
);
9535 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
9536 if (TARGET_64BIT
|| TARGET_VXWORKS_RTP
)
9537 fprintf (file
, "%s%s%d-%s%d\n",
9538 directive
, LPREFIX
, value
, LPREFIX
, rel
);
9539 else if (HAVE_AS_GOTOFF_IN_DATA
)
9540 fprintf (file
, "%s%s%d@GOTOFF\n", ASM_LONG
, LPREFIX
, value
);
9542 else if (TARGET_MACHO
)
9544 fprintf (file
, "%s%s%d-", ASM_LONG
, LPREFIX
, value
);
9545 machopic_output_function_base_name (file
);
9546 fprintf(file
, "\n");
9550 asm_fprintf (file
, "%s%U%s+[.-%s%d]\n",
9551 ASM_LONG
, GOT_SYMBOL_NAME
, LPREFIX
, value
);
9554 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9558 ix86_expand_clear (rtx dest
)
9562 /* We play register width games, which are only valid after reload. */
9563 gcc_assert (reload_completed
);
9565 /* Avoid HImode and its attendant prefix byte. */
9566 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
9567 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
9568 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
9570 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9571 if (reload_completed
&& (!TARGET_USE_MOV0
|| optimize_size
))
9573 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, 17));
9574 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
9580 /* X is an unchanging MEM. If it is a constant pool reference, return
9581 the constant pool rtx, else NULL. */
9584 maybe_get_pool_constant (rtx x
)
9586 x
= ix86_delegitimize_address (XEXP (x
, 0));
9588 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
9589 return get_pool_constant (x
);
9595 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
9597 int strict
= (reload_in_progress
|| reload_completed
);
9599 enum tls_model model
;
9604 if (GET_CODE (op1
) == SYMBOL_REF
)
9606 model
= SYMBOL_REF_TLS_MODEL (op1
);
9609 op1
= legitimize_tls_address (op1
, model
, true);
9610 op1
= force_operand (op1
, op0
);
9614 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9615 && SYMBOL_REF_DLLIMPORT_P (op1
))
9616 op1
= legitimize_dllimport_symbol (op1
, false);
9618 else if (GET_CODE (op1
) == CONST
9619 && GET_CODE (XEXP (op1
, 0)) == PLUS
9620 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
9622 rtx addend
= XEXP (XEXP (op1
, 0), 1);
9623 rtx symbol
= XEXP (XEXP (op1
, 0), 0);
9626 model
= SYMBOL_REF_TLS_MODEL (symbol
);
9628 tmp
= legitimize_tls_address (symbol
, model
, true);
9629 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9630 && SYMBOL_REF_DLLIMPORT_P (symbol
))
9631 tmp
= legitimize_dllimport_symbol (symbol
, true);
9635 tmp
= force_operand (tmp
, NULL
);
9636 tmp
= expand_simple_binop (Pmode
, PLUS
, tmp
, addend
,
9637 op0
, 1, OPTAB_DIRECT
);
9643 if (flag_pic
&& mode
== Pmode
&& symbolic_operand (op1
, Pmode
))
9645 if (TARGET_MACHO
&& !TARGET_64BIT
)
9650 rtx temp
= ((reload_in_progress
9651 || ((op0
&& REG_P (op0
))
9653 ? op0
: gen_reg_rtx (Pmode
));
9654 op1
= machopic_indirect_data_reference (op1
, temp
);
9655 op1
= machopic_legitimize_pic_address (op1
, mode
,
9656 temp
== op1
? 0 : temp
);
9658 else if (MACHOPIC_INDIRECT
)
9659 op1
= machopic_indirect_data_reference (op1
, 0);
9667 op1
= force_reg (Pmode
, op1
);
9668 else if (!TARGET_64BIT
|| !x86_64_movabs_operand (op1
, Pmode
))
9670 rtx reg
= no_new_pseudos
? op0
: NULL_RTX
;
9671 op1
= legitimize_pic_address (op1
, reg
);
9680 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
9681 || !push_operand (op0
, mode
))
9683 op1
= force_reg (mode
, op1
);
9685 if (push_operand (op0
, mode
)
9686 && ! general_no_elim_operand (op1
, mode
))
9687 op1
= copy_to_mode_reg (mode
, op1
);
9689 /* Force large constants in 64bit compilation into register
9690 to get them CSEed. */
9691 if (TARGET_64BIT
&& mode
== DImode
9692 && immediate_operand (op1
, mode
)
9693 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
9694 && !register_operand (op0
, mode
)
9695 && optimize
&& !reload_completed
&& !reload_in_progress
)
9696 op1
= copy_to_mode_reg (mode
, op1
);
9698 if (FLOAT_MODE_P (mode
))
9700 /* If we are loading a floating point constant to a register,
9701 force the value to memory now, since we'll get better code
9702 out the back end. */
9706 else if (GET_CODE (op1
) == CONST_DOUBLE
)
9708 op1
= validize_mem (force_const_mem (mode
, op1
));
9709 if (!register_operand (op0
, mode
))
9711 rtx temp
= gen_reg_rtx (mode
);
9712 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
9713 emit_move_insn (op0
, temp
);
9720 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
9724 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
9726 rtx op0
= operands
[0], op1
= operands
[1];
9728 /* Force constants other than zero into memory. We do not know how
9729 the instructions used to build constants modify the upper 64 bits
9730 of the register, once we have that information we may be able
9731 to handle some of them more efficiently. */
9732 if ((reload_in_progress
| reload_completed
) == 0
9733 && register_operand (op0
, mode
)
9735 && standard_sse_constant_p (op1
) <= 0)
9736 op1
= validize_mem (force_const_mem (mode
, op1
));
9738 /* Make operand1 a register if it isn't already. */
9740 && !register_operand (op0
, mode
)
9741 && !register_operand (op1
, mode
))
9743 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
9747 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
9750 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9751 straight to ix86_expand_vector_move. */
9752 /* Code generation for scalar reg-reg moves of single and double precision data:
9753 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
9757 if (x86_sse_partial_reg_dependency == true)
9762 Code generation for scalar loads of double precision data:
9763 if (x86_sse_split_regs == true)
9764 movlpd mem, reg (gas syntax)
9768 Code generation for unaligned packed loads of single precision data
9769 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
9770 if (x86_sse_unaligned_move_optimal)
9773 if (x86_sse_partial_reg_dependency == true)
9785 Code generation for unaligned packed loads of double precision data
9786 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
9787 if (x86_sse_unaligned_move_optimal)
9790 if (x86_sse_split_regs == true)
9803 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
9812 /* If we're optimizing for size, movups is the smallest. */
9815 op0
= gen_lowpart (V4SFmode
, op0
);
9816 op1
= gen_lowpart (V4SFmode
, op1
);
9817 emit_insn (gen_sse_movups (op0
, op1
));
9821 /* ??? If we have typed data, then it would appear that using
9822 movdqu is the only way to get unaligned data loaded with
9824 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
9826 op0
= gen_lowpart (V16QImode
, op0
);
9827 op1
= gen_lowpart (V16QImode
, op1
);
9828 emit_insn (gen_sse2_movdqu (op0
, op1
));
9832 if (TARGET_SSE2
&& mode
== V2DFmode
)
9836 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL
)
9838 op0
= gen_lowpart (V2DFmode
, op0
);
9839 op1
= gen_lowpart (V2DFmode
, op1
);
9840 emit_insn (gen_sse2_movupd (op0
, op1
));
9844 /* When SSE registers are split into halves, we can avoid
9845 writing to the top half twice. */
9846 if (TARGET_SSE_SPLIT_REGS
)
9848 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
9853 /* ??? Not sure about the best option for the Intel chips.
9854 The following would seem to satisfy; the register is
9855 entirely cleared, breaking the dependency chain. We
9856 then store to the upper half, with a dependency depth
9857 of one. A rumor has it that Intel recommends two movsd
9858 followed by an unpacklpd, but this is unconfirmed. And
9859 given that the dependency depth of the unpacklpd would
9860 still be one, I'm not sure why this would be better. */
9861 zero
= CONST0_RTX (V2DFmode
);
9864 m
= adjust_address (op1
, DFmode
, 0);
9865 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
9866 m
= adjust_address (op1
, DFmode
, 8);
9867 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
9871 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL
)
9873 op0
= gen_lowpart (V4SFmode
, op0
);
9874 op1
= gen_lowpart (V4SFmode
, op1
);
9875 emit_insn (gen_sse_movups (op0
, op1
));
9879 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
9880 emit_move_insn (op0
, CONST0_RTX (mode
));
9882 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
9884 if (mode
!= V4SFmode
)
9885 op0
= gen_lowpart (V4SFmode
, op0
);
9886 m
= adjust_address (op1
, V2SFmode
, 0);
9887 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
9888 m
= adjust_address (op1
, V2SFmode
, 8);
9889 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
9892 else if (MEM_P (op0
))
9894 /* If we're optimizing for size, movups is the smallest. */
9897 op0
= gen_lowpart (V4SFmode
, op0
);
9898 op1
= gen_lowpart (V4SFmode
, op1
);
9899 emit_insn (gen_sse_movups (op0
, op1
));
9903 /* ??? Similar to above, only less clear because of quote
9904 typeless stores unquote. */
9905 if (TARGET_SSE2
&& !TARGET_SSE_TYPELESS_STORES
9906 && GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
9908 op0
= gen_lowpart (V16QImode
, op0
);
9909 op1
= gen_lowpart (V16QImode
, op1
);
9910 emit_insn (gen_sse2_movdqu (op0
, op1
));
9914 if (TARGET_SSE2
&& mode
== V2DFmode
)
9916 m
= adjust_address (op0
, DFmode
, 0);
9917 emit_insn (gen_sse2_storelpd (m
, op1
));
9918 m
= adjust_address (op0
, DFmode
, 8);
9919 emit_insn (gen_sse2_storehpd (m
, op1
));
9923 if (mode
!= V4SFmode
)
9924 op1
= gen_lowpart (V4SFmode
, op1
);
9925 m
= adjust_address (op0
, V2SFmode
, 0);
9926 emit_insn (gen_sse_storelps (m
, op1
));
9927 m
= adjust_address (op0
, V2SFmode
, 8);
9928 emit_insn (gen_sse_storehps (m
, op1
));
9935 /* Expand a push in MODE. This is some mode for which we do not support
9936 proper push instructions, at least from the registers that we expect
9937 the value to live in. */
9940 ix86_expand_push (enum machine_mode mode
, rtx x
)
9944 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
9945 GEN_INT (-GET_MODE_SIZE (mode
)),
9946 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
9947 if (tmp
!= stack_pointer_rtx
)
9948 emit_move_insn (stack_pointer_rtx
, tmp
);
9950 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
9951 emit_move_insn (tmp
, x
);
9954 /* Helper function of ix86_fixup_binary_operands to canonicalize
9955 operand order. Returns true if the operands should be swapped. */
9958 ix86_swap_binary_operands_p (enum rtx_code code
, enum machine_mode mode
,
9961 rtx dst
= operands
[0];
9962 rtx src1
= operands
[1];
9963 rtx src2
= operands
[2];
9965 /* If the operation is not commutative, we can't do anything. */
9966 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
9969 /* Highest priority is that src1 should match dst. */
9970 if (rtx_equal_p (dst
, src1
))
9972 if (rtx_equal_p (dst
, src2
))
9975 /* Next highest priority is that immediate constants come second. */
9976 if (immediate_operand (src2
, mode
))
9978 if (immediate_operand (src1
, mode
))
9981 /* Lowest priority is that memory references should come second. */
9991 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9992 destination to use for the operation. If different from the true
9993 destination in operands[0], a copy operation will be required. */
9996 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
9999 rtx dst
= operands
[0];
10000 rtx src1
= operands
[1];
10001 rtx src2
= operands
[2];
10003 /* Canonicalize operand order. */
10004 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
10011 /* Both source operands cannot be in memory. */
10012 if (MEM_P (src1
) && MEM_P (src2
))
10014 /* Optimization: Only read from memory once. */
10015 if (rtx_equal_p (src1
, src2
))
10017 src2
= force_reg (mode
, src2
);
10021 src2
= force_reg (mode
, src2
);
10024 /* If the destination is memory, and we do not have matching source
10025 operands, do things in registers. */
10026 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
10027 dst
= gen_reg_rtx (mode
);
10029 /* Source 1 cannot be a constant. */
10030 if (CONSTANT_P (src1
))
10031 src1
= force_reg (mode
, src1
);
10033 /* Source 1 cannot be a non-matching memory. */
10034 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
10035 src1
= force_reg (mode
, src1
);
10037 operands
[1] = src1
;
10038 operands
[2] = src2
;
10042 /* Similarly, but assume that the destination has already been
10043 set up properly. */
10046 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
10047 enum machine_mode mode
, rtx operands
[])
10049 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
10050 gcc_assert (dst
== operands
[0]);
10053 /* Attempt to expand a binary operator. Make the expansion closer to the
10054 actual machine, then just general_operand, which will allow 3 separate
10055 memory references (one output, two input) in a single insn. */
10058 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
10061 rtx src1
, src2
, dst
, op
, clob
;
10063 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
10064 src1
= operands
[1];
10065 src2
= operands
[2];
10067 /* Emit the instruction. */
10069 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
10070 if (reload_in_progress
)
10072 /* Reload doesn't know about the flags register, and doesn't know that
10073 it doesn't want to clobber it. We can only do this with PLUS. */
10074 gcc_assert (code
== PLUS
);
10079 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
10080 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
10083 /* Fix up the destination if needed. */
10084 if (dst
!= operands
[0])
10085 emit_move_insn (operands
[0], dst
);
10088 /* Return TRUE or FALSE depending on whether the binary operator meets the
10089 appropriate constraints. */
10092 ix86_binary_operator_ok (enum rtx_code code
, enum machine_mode mode
,
10095 rtx dst
= operands
[0];
10096 rtx src1
= operands
[1];
10097 rtx src2
= operands
[2];
10099 /* Both source operands cannot be in memory. */
10100 if (MEM_P (src1
) && MEM_P (src2
))
10103 /* Canonicalize operand order for commutative operators. */
10104 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
10111 /* If the destination is memory, we must have a matching source operand. */
10112 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
10115 /* Source 1 cannot be a constant. */
10116 if (CONSTANT_P (src1
))
10119 /* Source 1 cannot be a non-matching memory. */
10120 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
10126 /* Attempt to expand a unary operator. Make the expansion closer to the
10127 actual machine, then just general_operand, which will allow 2 separate
10128 memory references (one output, one input) in a single insn. */
10131 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
10134 int matching_memory
;
10135 rtx src
, dst
, op
, clob
;
10140 /* If the destination is memory, and we do not have matching source
10141 operands, do things in registers. */
10142 matching_memory
= 0;
10145 if (rtx_equal_p (dst
, src
))
10146 matching_memory
= 1;
10148 dst
= gen_reg_rtx (mode
);
10151 /* When source operand is memory, destination must match. */
10152 if (MEM_P (src
) && !matching_memory
)
10153 src
= force_reg (mode
, src
);
10155 /* Emit the instruction. */
10157 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
10158 if (reload_in_progress
|| code
== NOT
)
10160 /* Reload doesn't know about the flags register, and doesn't know that
10161 it doesn't want to clobber it. */
10162 gcc_assert (code
== NOT
);
10167 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
10168 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
10171 /* Fix up the destination if needed. */
10172 if (dst
!= operands
[0])
10173 emit_move_insn (operands
[0], dst
);
10176 /* Return TRUE or FALSE depending on whether the unary operator meets the
10177 appropriate constraints. */
10180 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
10181 enum machine_mode mode ATTRIBUTE_UNUSED
,
10182 rtx operands
[2] ATTRIBUTE_UNUSED
)
10184 /* If one of operands is memory, source and destination must match. */
10185 if ((MEM_P (operands
[0])
10186 || MEM_P (operands
[1]))
10187 && ! rtx_equal_p (operands
[0], operands
[1]))
10192 /* Post-reload splitter for converting an SF or DFmode value in an
10193 SSE register into an unsigned SImode. */
10196 ix86_split_convert_uns_si_sse (rtx operands
[])
10198 enum machine_mode vecmode
;
10199 rtx value
, large
, zero_or_two31
, input
, two31
, x
;
10201 large
= operands
[1];
10202 zero_or_two31
= operands
[2];
10203 input
= operands
[3];
10204 two31
= operands
[4];
10205 vecmode
= GET_MODE (large
);
10206 value
= gen_rtx_REG (vecmode
, REGNO (operands
[0]));
10208 /* Load up the value into the low element. We must ensure that the other
10209 elements are valid floats -- zero is the easiest such value. */
10212 if (vecmode
== V4SFmode
)
10213 emit_insn (gen_vec_setv4sf_0 (value
, CONST0_RTX (V4SFmode
), input
));
10215 emit_insn (gen_sse2_loadlpd (value
, CONST0_RTX (V2DFmode
), input
));
10219 input
= gen_rtx_REG (vecmode
, REGNO (input
));
10220 emit_move_insn (value
, CONST0_RTX (vecmode
));
10221 if (vecmode
== V4SFmode
)
10222 emit_insn (gen_sse_movss (value
, value
, input
));
10224 emit_insn (gen_sse2_movsd (value
, value
, input
));
10227 emit_move_insn (large
, two31
);
10228 emit_move_insn (zero_or_two31
, MEM_P (two31
) ? large
: two31
);
10230 x
= gen_rtx_fmt_ee (LE
, vecmode
, large
, value
);
10231 emit_insn (gen_rtx_SET (VOIDmode
, large
, x
));
10233 x
= gen_rtx_AND (vecmode
, zero_or_two31
, large
);
10234 emit_insn (gen_rtx_SET (VOIDmode
, zero_or_two31
, x
));
10236 x
= gen_rtx_MINUS (vecmode
, value
, zero_or_two31
);
10237 emit_insn (gen_rtx_SET (VOIDmode
, value
, x
));
10239 large
= gen_rtx_REG (V4SImode
, REGNO (large
));
10240 emit_insn (gen_ashlv4si3 (large
, large
, GEN_INT (31)));
10242 x
= gen_rtx_REG (V4SImode
, REGNO (value
));
10243 if (vecmode
== V4SFmode
)
10244 emit_insn (gen_sse2_cvttps2dq (x
, value
));
10246 emit_insn (gen_sse2_cvttpd2dq (x
, value
));
10249 emit_insn (gen_xorv4si3 (value
, value
, large
));
10252 /* Convert an unsigned DImode value into a DFmode, using only SSE.
10253 Expects the 64-bit DImode to be supplied in a pair of integral
10254 registers. Requires SSE2; will use SSE3 if available. For x86_32,
10255 -mfpmath=sse, !optimize_size only. */
10258 ix86_expand_convert_uns_didf_sse (rtx target
, rtx input
)
10260 REAL_VALUE_TYPE bias_lo_rvt
, bias_hi_rvt
;
10261 rtx int_xmm
, fp_xmm
;
10262 rtx biases
, exponents
;
10265 int_xmm
= gen_reg_rtx (V4SImode
);
10266 if (TARGET_INTER_UNIT_MOVES
)
10267 emit_insn (gen_movdi_to_sse (int_xmm
, input
));
10268 else if (TARGET_SSE_SPLIT_REGS
)
10270 emit_insn (gen_rtx_CLOBBER (VOIDmode
, int_xmm
));
10271 emit_move_insn (gen_lowpart (DImode
, int_xmm
), input
);
10275 x
= gen_reg_rtx (V2DImode
);
10276 ix86_expand_vector_init_one_nonzero (false, V2DImode
, x
, input
, 0);
10277 emit_move_insn (int_xmm
, gen_lowpart (V4SImode
, x
));
10280 x
= gen_rtx_CONST_VECTOR (V4SImode
,
10281 gen_rtvec (4, GEN_INT (0x43300000UL
),
10282 GEN_INT (0x45300000UL
),
10283 const0_rtx
, const0_rtx
));
10284 exponents
= validize_mem (force_const_mem (V4SImode
, x
));
10286 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10287 emit_insn (gen_sse2_punpckldq (int_xmm
, int_xmm
, exponents
));
10289 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10290 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10291 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10292 (0x1.0p84 + double(fp_value_hi_xmm)).
10293 Note these exponents differ by 32. */
10295 fp_xmm
= copy_to_mode_reg (V2DFmode
, gen_lowpart (V2DFmode
, int_xmm
));
10297 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
10298 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10299 real_ldexp (&bias_lo_rvt
, &dconst1
, 52);
10300 real_ldexp (&bias_hi_rvt
, &dconst1
, 84);
10301 biases
= const_double_from_real_value (bias_lo_rvt
, DFmode
);
10302 x
= const_double_from_real_value (bias_hi_rvt
, DFmode
);
10303 biases
= gen_rtx_CONST_VECTOR (V2DFmode
, gen_rtvec (2, biases
, x
));
10304 biases
= validize_mem (force_const_mem (V2DFmode
, biases
));
10305 emit_insn (gen_subv2df3 (fp_xmm
, fp_xmm
, biases
));
10307 /* Add the upper and lower DFmode values together. */
10309 emit_insn (gen_sse3_haddv2df3 (fp_xmm
, fp_xmm
, fp_xmm
));
10312 x
= copy_to_mode_reg (V2DFmode
, fp_xmm
);
10313 emit_insn (gen_sse2_unpckhpd (fp_xmm
, fp_xmm
, fp_xmm
));
10314 emit_insn (gen_addv2df3 (fp_xmm
, fp_xmm
, x
));
10317 ix86_expand_vector_extract (false, target
, fp_xmm
, 0);
10320 /* Convert an unsigned SImode value into a DFmode. Only currently used
10321 for SSE, but applicable anywhere. */
10324 ix86_expand_convert_uns_sidf_sse (rtx target
, rtx input
)
10326 REAL_VALUE_TYPE TWO31r
;
10329 x
= expand_simple_binop (SImode
, PLUS
, input
, GEN_INT (-2147483647 - 1),
10330 NULL
, 1, OPTAB_DIRECT
);
10332 fp
= gen_reg_rtx (DFmode
);
10333 emit_insn (gen_floatsidf2 (fp
, x
));
10335 real_ldexp (&TWO31r
, &dconst1
, 31);
10336 x
= const_double_from_real_value (TWO31r
, DFmode
);
10338 x
= expand_simple_binop (DFmode
, PLUS
, fp
, x
, target
, 0, OPTAB_DIRECT
);
10340 emit_move_insn (target
, x
);
10343 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10344 32-bit mode; otherwise we have a direct convert instruction. */
10347 ix86_expand_convert_sign_didf_sse (rtx target
, rtx input
)
10349 REAL_VALUE_TYPE TWO32r
;
10350 rtx fp_lo
, fp_hi
, x
;
10352 fp_lo
= gen_reg_rtx (DFmode
);
10353 fp_hi
= gen_reg_rtx (DFmode
);
10355 emit_insn (gen_floatsidf2 (fp_hi
, gen_highpart (SImode
, input
)));
10357 real_ldexp (&TWO32r
, &dconst1
, 32);
10358 x
= const_double_from_real_value (TWO32r
, DFmode
);
10359 fp_hi
= expand_simple_binop (DFmode
, MULT
, fp_hi
, x
, fp_hi
, 0, OPTAB_DIRECT
);
10361 ix86_expand_convert_uns_sidf_sse (fp_lo
, gen_lowpart (SImode
, input
));
10363 x
= expand_simple_binop (DFmode
, PLUS
, fp_hi
, fp_lo
, target
,
10366 emit_move_insn (target
, x
);
10369 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10370 For x86_32, -mfpmath=sse, !optimize_size only. */
10372 ix86_expand_convert_uns_sisf_sse (rtx target
, rtx input
)
10374 REAL_VALUE_TYPE ONE16r
;
10375 rtx fp_hi
, fp_lo
, int_hi
, int_lo
, x
;
10377 real_ldexp (&ONE16r
, &dconst1
, 16);
10378 x
= const_double_from_real_value (ONE16r
, SFmode
);
10379 int_lo
= expand_simple_binop (SImode
, AND
, input
, GEN_INT(0xffff),
10380 NULL
, 0, OPTAB_DIRECT
);
10381 int_hi
= expand_simple_binop (SImode
, LSHIFTRT
, input
, GEN_INT(16),
10382 NULL
, 0, OPTAB_DIRECT
);
10383 fp_hi
= gen_reg_rtx (SFmode
);
10384 fp_lo
= gen_reg_rtx (SFmode
);
10385 emit_insn (gen_floatsisf2 (fp_hi
, int_hi
));
10386 emit_insn (gen_floatsisf2 (fp_lo
, int_lo
));
10387 fp_hi
= expand_simple_binop (SFmode
, MULT
, fp_hi
, x
, fp_hi
,
10389 fp_hi
= expand_simple_binop (SFmode
, PLUS
, fp_hi
, fp_lo
, target
,
10391 if (!rtx_equal_p (target
, fp_hi
))
10392 emit_move_insn (target
, fp_hi
);
10395 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10396 then replicate the value for all elements of the vector
10400 ix86_build_const_vector (enum machine_mode mode
, bool vect
, rtx value
)
10407 v
= gen_rtvec (4, value
, value
, value
, value
);
10409 v
= gen_rtvec (4, value
, CONST0_RTX (SFmode
),
10410 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
10411 return gen_rtx_CONST_VECTOR (V4SFmode
, v
);
10415 v
= gen_rtvec (2, value
, value
);
10417 v
= gen_rtvec (2, value
, CONST0_RTX (DFmode
));
10418 return gen_rtx_CONST_VECTOR (V2DFmode
, v
);
10421 gcc_unreachable ();
10425 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
10426 Create a mask for the sign bit in MODE for an SSE register. If VECT is
10427 true, then replicate the mask for all elements of the vector register.
10428 If INVERT is true, then create a mask excluding the sign bit. */
10431 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
10433 enum machine_mode vec_mode
;
10434 HOST_WIDE_INT hi
, lo
;
10439 /* Find the sign bit, sign extended to 2*HWI. */
10440 if (mode
== SFmode
)
10441 lo
= 0x80000000, hi
= lo
< 0;
10442 else if (HOST_BITS_PER_WIDE_INT
>= 64)
10443 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
10445 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
10448 lo
= ~lo
, hi
= ~hi
;
10450 /* Force this value into the low part of a fp vector constant. */
10451 mask
= immed_double_const (lo
, hi
, mode
== SFmode
? SImode
: DImode
);
10452 mask
= gen_lowpart (mode
, mask
);
10454 v
= ix86_build_const_vector (mode
, vect
, mask
);
10455 vec_mode
= (mode
== SFmode
) ? V4SFmode
: V2DFmode
;
10456 return force_reg (vec_mode
, v
);
10459 /* Generate code for floating point ABS or NEG. */
10462 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
10465 rtx mask
, set
, use
, clob
, dst
, src
;
10466 bool matching_memory
;
10467 bool use_sse
= false;
10468 bool vector_mode
= VECTOR_MODE_P (mode
);
10469 enum machine_mode elt_mode
= mode
;
10473 elt_mode
= GET_MODE_INNER (mode
);
10476 else if (TARGET_SSE_MATH
)
10477 use_sse
= SSE_FLOAT_MODE_P (mode
);
10479 /* NEG and ABS performed with SSE use bitwise mask operations.
10480 Create the appropriate mask now. */
10482 mask
= ix86_build_signbit_mask (elt_mode
, vector_mode
, code
== ABS
);
10489 /* If the destination is memory, and we don't have matching source
10490 operands or we're using the x87, do things in registers. */
10491 matching_memory
= false;
10494 if (use_sse
&& rtx_equal_p (dst
, src
))
10495 matching_memory
= true;
10497 dst
= gen_reg_rtx (mode
);
10499 if (MEM_P (src
) && !matching_memory
)
10500 src
= force_reg (mode
, src
);
10504 set
= gen_rtx_fmt_ee (code
== NEG
? XOR
: AND
, mode
, src
, mask
);
10505 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
10510 set
= gen_rtx_fmt_e (code
, mode
, src
);
10511 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
10514 use
= gen_rtx_USE (VOIDmode
, mask
);
10515 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
10516 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
10517 gen_rtvec (3, set
, use
, clob
)));
10523 if (dst
!= operands
[0])
10524 emit_move_insn (operands
[0], dst
);
10527 /* Expand a copysign operation. Special case operand 0 being a constant. */
10530 ix86_expand_copysign (rtx operands
[])
10532 enum machine_mode mode
, vmode
;
10533 rtx dest
, op0
, op1
, mask
, nmask
;
10535 dest
= operands
[0];
10539 mode
= GET_MODE (dest
);
10540 vmode
= mode
== SFmode
? V4SFmode
: V2DFmode
;
10542 if (GET_CODE (op0
) == CONST_DOUBLE
)
10546 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
10547 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
10549 if (op0
== CONST0_RTX (mode
))
10550 op0
= CONST0_RTX (vmode
);
10553 if (mode
== SFmode
)
10554 v
= gen_rtvec (4, op0
, CONST0_RTX (SFmode
),
10555 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
10557 v
= gen_rtvec (2, op0
, CONST0_RTX (DFmode
));
10558 op0
= force_reg (vmode
, gen_rtx_CONST_VECTOR (vmode
, v
));
10561 mask
= ix86_build_signbit_mask (mode
, 0, 0);
10563 if (mode
== SFmode
)
10564 emit_insn (gen_copysignsf3_const (dest
, op0
, op1
, mask
));
10566 emit_insn (gen_copysigndf3_const (dest
, op0
, op1
, mask
));
10570 nmask
= ix86_build_signbit_mask (mode
, 0, 1);
10571 mask
= ix86_build_signbit_mask (mode
, 0, 0);
10573 if (mode
== SFmode
)
10574 emit_insn (gen_copysignsf3_var (dest
, NULL
, op0
, op1
, nmask
, mask
));
10576 emit_insn (gen_copysigndf3_var (dest
, NULL
, op0
, op1
, nmask
, mask
));
10580 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
10581 be a constant, and so has already been expanded into a vector constant. */
10584 ix86_split_copysign_const (rtx operands
[])
10586 enum machine_mode mode
, vmode
;
10587 rtx dest
, op0
, op1
, mask
, x
;
10589 dest
= operands
[0];
10592 mask
= operands
[3];
10594 mode
= GET_MODE (dest
);
10595 vmode
= GET_MODE (mask
);
10597 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
10598 x
= gen_rtx_AND (vmode
, dest
, mask
);
10599 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10601 if (op0
!= CONST0_RTX (vmode
))
10603 x
= gen_rtx_IOR (vmode
, dest
, op0
);
10604 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10608 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
10609 so we have to do two masks. */
10612 ix86_split_copysign_var (rtx operands
[])
10614 enum machine_mode mode
, vmode
;
10615 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
10617 dest
= operands
[0];
10618 scratch
= operands
[1];
10621 nmask
= operands
[4];
10622 mask
= operands
[5];
10624 mode
= GET_MODE (dest
);
10625 vmode
= GET_MODE (mask
);
10627 if (rtx_equal_p (op0
, op1
))
10629 /* Shouldn't happen often (it's useless, obviously), but when it does
10630 we'd generate incorrect code if we continue below. */
10631 emit_move_insn (dest
, op0
);
10635 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
10637 gcc_assert (REGNO (op1
) == REGNO (scratch
));
10639 x
= gen_rtx_AND (vmode
, scratch
, mask
);
10640 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
10643 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
10644 x
= gen_rtx_NOT (vmode
, dest
);
10645 x
= gen_rtx_AND (vmode
, x
, op0
);
10646 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10650 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
10652 x
= gen_rtx_AND (vmode
, scratch
, mask
);
10654 else /* alternative 2,4 */
10656 gcc_assert (REGNO (mask
) == REGNO (scratch
));
10657 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
10658 x
= gen_rtx_AND (vmode
, scratch
, op1
);
10660 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
10662 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
10664 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
10665 x
= gen_rtx_AND (vmode
, dest
, nmask
);
10667 else /* alternative 3,4 */
10669 gcc_assert (REGNO (nmask
) == REGNO (dest
));
10671 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
10672 x
= gen_rtx_AND (vmode
, dest
, op0
);
10674 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10677 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
10678 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10681 /* Return TRUE or FALSE depending on whether the first SET in INSN
10682 has source and destination with matching CC modes, and that the
10683 CC mode is at least as constrained as REQ_MODE. */
10686 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
10689 enum machine_mode set_mode
;
10691 set
= PATTERN (insn
);
10692 if (GET_CODE (set
) == PARALLEL
)
10693 set
= XVECEXP (set
, 0, 0);
10694 gcc_assert (GET_CODE (set
) == SET
);
10695 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
10697 set_mode
= GET_MODE (SET_DEST (set
));
10701 if (req_mode
!= CCNOmode
10702 && (req_mode
!= CCmode
10703 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
10707 if (req_mode
== CCGCmode
)
10711 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
10715 if (req_mode
== CCZmode
)
10722 gcc_unreachable ();
10725 return (GET_MODE (SET_SRC (set
)) == set_mode
);
10728 /* Generate insn patterns to do an integer compare of OPERANDS. */
10731 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
10733 enum machine_mode cmpmode
;
10736 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
10737 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
10739 /* This is very simple, but making the interface the same as in the
10740 FP case makes the rest of the code easier. */
10741 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
10742 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
10744 /* Return the test that should be put into the flags user, i.e.
10745 the bcc, scc, or cmov instruction. */
10746 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
10749 /* Figure out whether to use ordered or unordered fp comparisons.
10750 Return the appropriate mode to use. */
10753 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
10755 /* ??? In order to make all comparisons reversible, we do all comparisons
10756 non-trapping when compiling for IEEE. Once gcc is able to distinguish
10757 all forms trapping and nontrapping comparisons, we can make inequality
10758 comparisons trapping again, since it results in better code when using
10759 FCOM based compares. */
10760 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
10764 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
10766 enum machine_mode mode
= GET_MODE (op0
);
10768 if (SCALAR_FLOAT_MODE_P (mode
))
10770 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
10771 return ix86_fp_compare_mode (code
);
10776 /* Only zero flag is needed. */
10777 case EQ
: /* ZF=0 */
10778 case NE
: /* ZF!=0 */
10780 /* Codes needing carry flag. */
10781 case GEU
: /* CF=0 */
10782 case GTU
: /* CF=0 & ZF=0 */
10783 case LTU
: /* CF=1 */
10784 case LEU
: /* CF=1 | ZF=1 */
10786 /* Codes possibly doable only with sign flag when
10787 comparing against zero. */
10788 case GE
: /* SF=OF or SF=0 */
10789 case LT
: /* SF<>OF or SF=1 */
10790 if (op1
== const0_rtx
)
10793 /* For other cases Carry flag is not required. */
10795 /* Codes doable only with sign flag when comparing
10796 against zero, but we miss jump instruction for it
10797 so we need to use relational tests against overflow
10798 that thus needs to be zero. */
10799 case GT
: /* ZF=0 & SF=OF */
10800 case LE
: /* ZF=1 | SF<>OF */
10801 if (op1
== const0_rtx
)
10805 /* strcmp pattern do (use flags) and combine may ask us for proper
10810 gcc_unreachable ();
10814 /* Return the fixed registers used for condition codes. */
10817 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
10824 /* If two condition code modes are compatible, return a condition code
10825 mode which is compatible with both. Otherwise, return
10828 static enum machine_mode
10829 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
10834 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
10837 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
10838 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
10844 gcc_unreachable ();
10866 /* These are only compatible with themselves, which we already
10872 /* Split comparison code CODE into comparisons we can do using branch
10873 instructions. BYPASS_CODE is comparison code for branch that will
10874 branch around FIRST_CODE and SECOND_CODE. If some of branches
10875 is not required, set value to UNKNOWN.
10876 We never require more than two branches. */
10879 ix86_fp_comparison_codes (enum rtx_code code
, enum rtx_code
*bypass_code
,
10880 enum rtx_code
*first_code
,
10881 enum rtx_code
*second_code
)
10883 *first_code
= code
;
10884 *bypass_code
= UNKNOWN
;
10885 *second_code
= UNKNOWN
;
10887 /* The fcomi comparison sets flags as follows:
10897 case GT
: /* GTU - CF=0 & ZF=0 */
10898 case GE
: /* GEU - CF=0 */
10899 case ORDERED
: /* PF=0 */
10900 case UNORDERED
: /* PF=1 */
10901 case UNEQ
: /* EQ - ZF=1 */
10902 case UNLT
: /* LTU - CF=1 */
10903 case UNLE
: /* LEU - CF=1 | ZF=1 */
10904 case LTGT
: /* EQ - ZF=0 */
10906 case LT
: /* LTU - CF=1 - fails on unordered */
10907 *first_code
= UNLT
;
10908 *bypass_code
= UNORDERED
;
10910 case LE
: /* LEU - CF=1 | ZF=1 - fails on unordered */
10911 *first_code
= UNLE
;
10912 *bypass_code
= UNORDERED
;
10914 case EQ
: /* EQ - ZF=1 - fails on unordered */
10915 *first_code
= UNEQ
;
10916 *bypass_code
= UNORDERED
;
10918 case NE
: /* NE - ZF=0 - fails on unordered */
10919 *first_code
= LTGT
;
10920 *second_code
= UNORDERED
;
10922 case UNGE
: /* GEU - CF=0 - fails on unordered */
10924 *second_code
= UNORDERED
;
10926 case UNGT
: /* GTU - CF=0 & ZF=0 - fails on unordered */
10928 *second_code
= UNORDERED
;
10931 gcc_unreachable ();
10933 if (!TARGET_IEEE_FP
)
10935 *second_code
= UNKNOWN
;
10936 *bypass_code
= UNKNOWN
;
10940 /* Return cost of comparison done fcom + arithmetics operations on AX.
10941 All following functions do use number of instructions as a cost metrics.
10942 In future this should be tweaked to compute bytes for optimize_size and
10943 take into account performance of various instructions on various CPUs. */
10945 ix86_fp_comparison_arithmetics_cost (enum rtx_code code
)
10947 if (!TARGET_IEEE_FP
)
10949 /* The cost of code output by ix86_expand_fp_compare. */
10973 gcc_unreachable ();
10977 /* Return cost of comparison done using fcomi operation.
10978 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10980 ix86_fp_comparison_fcomi_cost (enum rtx_code code
)
10982 enum rtx_code bypass_code
, first_code
, second_code
;
10983 /* Return arbitrarily high cost when instruction is not supported - this
10984 prevents gcc from using it. */
10987 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10988 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 2;
10991 /* Return cost of comparison done using sahf operation.
10992 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10994 ix86_fp_comparison_sahf_cost (enum rtx_code code
)
10996 enum rtx_code bypass_code
, first_code
, second_code
;
10997 /* Return arbitrarily high cost when instruction is not preferred - this
10998 avoids gcc from using it. */
10999 if (!(TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_size
)))
11001 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11002 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 3;
11005 /* Compute cost of the comparison done using any method.
11006 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11008 ix86_fp_comparison_cost (enum rtx_code code
)
11010 int fcomi_cost
, sahf_cost
, arithmetics_cost
= 1024;
11013 fcomi_cost
= ix86_fp_comparison_fcomi_cost (code
);
11014 sahf_cost
= ix86_fp_comparison_sahf_cost (code
);
11016 min
= arithmetics_cost
= ix86_fp_comparison_arithmetics_cost (code
);
11017 if (min
> sahf_cost
)
11019 if (min
> fcomi_cost
)
11024 /* Return true if we should use an FCOMI instruction for this
11028 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED
)
11030 enum rtx_code swapped_code
= swap_condition (code
);
11032 return ((ix86_fp_comparison_cost (code
)
11033 == ix86_fp_comparison_fcomi_cost (code
))
11034 || (ix86_fp_comparison_cost (swapped_code
)
11035 == ix86_fp_comparison_fcomi_cost (swapped_code
)));
11038 /* Swap, force into registers, or otherwise massage the two operands
11039 to a fp comparison. The operands are updated in place; the new
11040 comparison code is returned. */
11042 static enum rtx_code
11043 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
11045 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
11046 rtx op0
= *pop0
, op1
= *pop1
;
11047 enum machine_mode op_mode
= GET_MODE (op0
);
11048 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
11050 /* All of the unordered compare instructions only work on registers.
11051 The same is true of the fcomi compare instructions. The XFmode
11052 compare instructions require registers except when comparing
11053 against zero or when converting operand 1 from fixed point to
11057 && (fpcmp_mode
== CCFPUmode
11058 || (op_mode
== XFmode
11059 && ! (standard_80387_constant_p (op0
) == 1
11060 || standard_80387_constant_p (op1
) == 1)
11061 && GET_CODE (op1
) != FLOAT
)
11062 || ix86_use_fcomi_compare (code
)))
11064 op0
= force_reg (op_mode
, op0
);
11065 op1
= force_reg (op_mode
, op1
);
11069 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
11070 things around if they appear profitable, otherwise force op0
11071 into a register. */
11073 if (standard_80387_constant_p (op0
) == 0
11075 && ! (standard_80387_constant_p (op1
) == 0
11079 tmp
= op0
, op0
= op1
, op1
= tmp
;
11080 code
= swap_condition (code
);
11084 op0
= force_reg (op_mode
, op0
);
11086 if (CONSTANT_P (op1
))
11088 int tmp
= standard_80387_constant_p (op1
);
11090 op1
= validize_mem (force_const_mem (op_mode
, op1
));
11094 op1
= force_reg (op_mode
, op1
);
11097 op1
= force_reg (op_mode
, op1
);
11101 /* Try to rearrange the comparison to make it cheaper. */
11102 if (ix86_fp_comparison_cost (code
)
11103 > ix86_fp_comparison_cost (swap_condition (code
))
11104 && (REG_P (op1
) || !no_new_pseudos
))
11107 tmp
= op0
, op0
= op1
, op1
= tmp
;
11108 code
= swap_condition (code
);
11110 op0
= force_reg (op_mode
, op0
);
11118 /* Convert comparison codes we use to represent FP comparison to integer
11119 code that will result in proper branch. Return UNKNOWN if no such code
11123 ix86_fp_compare_code_to_integer (enum rtx_code code
)
11152 /* Generate insn patterns to do a floating point compare of OPERANDS. */
11155 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
,
11156 rtx
*second_test
, rtx
*bypass_test
)
11158 enum machine_mode fpcmp_mode
, intcmp_mode
;
11160 int cost
= ix86_fp_comparison_cost (code
);
11161 enum rtx_code bypass_code
, first_code
, second_code
;
11163 fpcmp_mode
= ix86_fp_compare_mode (code
);
11164 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
11167 *second_test
= NULL_RTX
;
11169 *bypass_test
= NULL_RTX
;
11171 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11173 /* Do fcomi/sahf based test when profitable. */
11174 if ((TARGET_CMOVE
|| TARGET_SAHF
)
11175 && (bypass_code
== UNKNOWN
|| bypass_test
)
11176 && (second_code
== UNKNOWN
|| second_test
)
11177 && ix86_fp_comparison_arithmetics_cost (code
) > cost
)
11181 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
11182 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
11188 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
11189 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
11191 scratch
= gen_reg_rtx (HImode
);
11192 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
11193 emit_insn (gen_x86_sahf_1 (scratch
));
11196 /* The FP codes work out to act like unsigned. */
11197 intcmp_mode
= fpcmp_mode
;
11199 if (bypass_code
!= UNKNOWN
)
11200 *bypass_test
= gen_rtx_fmt_ee (bypass_code
, VOIDmode
,
11201 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
11203 if (second_code
!= UNKNOWN
)
11204 *second_test
= gen_rtx_fmt_ee (second_code
, VOIDmode
,
11205 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
11210 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
11211 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
11212 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
11214 scratch
= gen_reg_rtx (HImode
);
11215 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
11217 /* In the unordered case, we have to check C2 for NaN's, which
11218 doesn't happen to work out to anything nice combination-wise.
11219 So do some bit twiddling on the value we've got in AH to come
11220 up with an appropriate set of condition codes. */
11222 intcmp_mode
= CCNOmode
;
11227 if (code
== GT
|| !TARGET_IEEE_FP
)
11229 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
11234 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11235 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
11236 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
11237 intcmp_mode
= CCmode
;
11243 if (code
== LT
&& TARGET_IEEE_FP
)
11245 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11246 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
11247 intcmp_mode
= CCmode
;
11252 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
11258 if (code
== GE
|| !TARGET_IEEE_FP
)
11260 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
11265 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11266 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
11273 if (code
== LE
&& TARGET_IEEE_FP
)
11275 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11276 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
11277 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
11278 intcmp_mode
= CCmode
;
11283 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
11289 if (code
== EQ
&& TARGET_IEEE_FP
)
11291 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11292 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
11293 intcmp_mode
= CCmode
;
11298 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
11305 if (code
== NE
&& TARGET_IEEE_FP
)
11307 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11308 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
11314 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
11320 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
11324 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
11329 gcc_unreachable ();
11333 /* Return the test that should be put into the flags user, i.e.
11334 the bcc, scc, or cmov instruction. */
11335 return gen_rtx_fmt_ee (code
, VOIDmode
,
11336 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
11341 ix86_expand_compare (enum rtx_code code
, rtx
*second_test
, rtx
*bypass_test
)
11344 op0
= ix86_compare_op0
;
11345 op1
= ix86_compare_op1
;
11348 *second_test
= NULL_RTX
;
11350 *bypass_test
= NULL_RTX
;
11352 if (ix86_compare_emitted
)
11354 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, ix86_compare_emitted
, const0_rtx
);
11355 ix86_compare_emitted
= NULL_RTX
;
11357 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
11359 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0
)));
11360 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
11361 second_test
, bypass_test
);
11364 ret
= ix86_expand_int_compare (code
, op0
, op1
);
11369 /* Return true if the CODE will result in nontrivial jump sequence. */
11371 ix86_fp_jump_nontrivial_p (enum rtx_code code
)
11373 enum rtx_code bypass_code
, first_code
, second_code
;
11376 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11377 return bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
;
11381 ix86_expand_branch (enum rtx_code code
, rtx label
)
11385 /* If we have emitted a compare insn, go straight to simple.
11386 ix86_expand_compare won't emit anything if ix86_compare_emitted
11388 if (ix86_compare_emitted
)
11391 switch (GET_MODE (ix86_compare_op0
))
11397 tmp
= ix86_expand_compare (code
, NULL
, NULL
);
11398 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
11399 gen_rtx_LABEL_REF (VOIDmode
, label
),
11401 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
11410 enum rtx_code bypass_code
, first_code
, second_code
;
11412 code
= ix86_prepare_fp_compare_args (code
, &ix86_compare_op0
,
11413 &ix86_compare_op1
);
11415 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11417 /* Check whether we will use the natural sequence with one jump. If
11418 so, we can expand jump early. Otherwise delay expansion by
11419 creating compound insn to not confuse optimizers. */
11420 if (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
11423 ix86_split_fp_branch (code
, ix86_compare_op0
, ix86_compare_op1
,
11424 gen_rtx_LABEL_REF (VOIDmode
, label
),
11425 pc_rtx
, NULL_RTX
, NULL_RTX
);
11429 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
,
11430 ix86_compare_op0
, ix86_compare_op1
);
11431 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
11432 gen_rtx_LABEL_REF (VOIDmode
, label
),
11434 tmp
= gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
);
11436 use_fcomi
= ix86_use_fcomi_compare (code
);
11437 vec
= rtvec_alloc (3 + !use_fcomi
);
11438 RTVEC_ELT (vec
, 0) = tmp
;
11440 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 18));
11442 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 17));
11445 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (HImode
));
11447 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, vec
));
11456 /* Expand DImode branch into multiple compare+branch. */
11458 rtx lo
[2], hi
[2], label2
;
11459 enum rtx_code code1
, code2
, code3
;
11460 enum machine_mode submode
;
11462 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
11464 tmp
= ix86_compare_op0
;
11465 ix86_compare_op0
= ix86_compare_op1
;
11466 ix86_compare_op1
= tmp
;
11467 code
= swap_condition (code
);
11469 if (GET_MODE (ix86_compare_op0
) == DImode
)
11471 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
11472 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
11477 split_ti (&ix86_compare_op0
, 1, lo
+0, hi
+0);
11478 split_ti (&ix86_compare_op1
, 1, lo
+1, hi
+1);
11482 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
11483 avoid two branches. This costs one extra insn, so disable when
11484 optimizing for size. */
11486 if ((code
== EQ
|| code
== NE
)
11488 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
11493 if (hi
[1] != const0_rtx
)
11494 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
11495 NULL_RTX
, 0, OPTAB_WIDEN
);
11498 if (lo
[1] != const0_rtx
)
11499 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
11500 NULL_RTX
, 0, OPTAB_WIDEN
);
11502 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
11503 NULL_RTX
, 0, OPTAB_WIDEN
);
11505 ix86_compare_op0
= tmp
;
11506 ix86_compare_op1
= const0_rtx
;
11507 ix86_expand_branch (code
, label
);
11511 /* Otherwise, if we are doing less-than or greater-or-equal-than,
11512 op1 is a constant and the low word is zero, then we can just
11513 examine the high word. */
11515 if (CONST_INT_P (hi
[1]) && lo
[1] == const0_rtx
)
11518 case LT
: case LTU
: case GE
: case GEU
:
11519 ix86_compare_op0
= hi
[0];
11520 ix86_compare_op1
= hi
[1];
11521 ix86_expand_branch (code
, label
);
11527 /* Otherwise, we need two or three jumps. */
11529 label2
= gen_label_rtx ();
11532 code2
= swap_condition (code
);
11533 code3
= unsigned_condition (code
);
11537 case LT
: case GT
: case LTU
: case GTU
:
11540 case LE
: code1
= LT
; code2
= GT
; break;
11541 case GE
: code1
= GT
; code2
= LT
; break;
11542 case LEU
: code1
= LTU
; code2
= GTU
; break;
11543 case GEU
: code1
= GTU
; code2
= LTU
; break;
11545 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
11546 case NE
: code2
= UNKNOWN
; break;
11549 gcc_unreachable ();
11554 * if (hi(a) < hi(b)) goto true;
11555 * if (hi(a) > hi(b)) goto false;
11556 * if (lo(a) < lo(b)) goto true;
11560 ix86_compare_op0
= hi
[0];
11561 ix86_compare_op1
= hi
[1];
11563 if (code1
!= UNKNOWN
)
11564 ix86_expand_branch (code1
, label
);
11565 if (code2
!= UNKNOWN
)
11566 ix86_expand_branch (code2
, label2
);
11568 ix86_compare_op0
= lo
[0];
11569 ix86_compare_op1
= lo
[1];
11570 ix86_expand_branch (code3
, label
);
11572 if (code2
!= UNKNOWN
)
11573 emit_label (label2
);
11578 gcc_unreachable ();
11582 /* Split branch based on floating point condition. */
11584 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
11585 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
11587 rtx second
, bypass
;
11588 rtx label
= NULL_RTX
;
11590 int bypass_probability
= -1, second_probability
= -1, probability
= -1;
11593 if (target2
!= pc_rtx
)
11596 code
= reverse_condition_maybe_unordered (code
);
11601 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
11602 tmp
, &second
, &bypass
);
11604 /* Remove pushed operand from stack. */
11606 ix86_free_from_memory (GET_MODE (pushed
));
11608 if (split_branch_probability
>= 0)
11610 /* Distribute the probabilities across the jumps.
11611 Assume the BYPASS and SECOND to be always test
11613 probability
= split_branch_probability
;
11615 /* Value of 1 is low enough to make no need for probability
11616 to be updated. Later we may run some experiments and see
11617 if unordered values are more frequent in practice. */
11619 bypass_probability
= 1;
11621 second_probability
= 1;
11623 if (bypass
!= NULL_RTX
)
11625 label
= gen_label_rtx ();
11626 i
= emit_jump_insn (gen_rtx_SET
11628 gen_rtx_IF_THEN_ELSE (VOIDmode
,
11630 gen_rtx_LABEL_REF (VOIDmode
,
11633 if (bypass_probability
>= 0)
11635 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
11636 GEN_INT (bypass_probability
),
11639 i
= emit_jump_insn (gen_rtx_SET
11641 gen_rtx_IF_THEN_ELSE (VOIDmode
,
11642 condition
, target1
, target2
)));
11643 if (probability
>= 0)
11645 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
11646 GEN_INT (probability
),
11648 if (second
!= NULL_RTX
)
11650 i
= emit_jump_insn (gen_rtx_SET
11652 gen_rtx_IF_THEN_ELSE (VOIDmode
, second
, target1
,
11654 if (second_probability
>= 0)
11656 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
11657 GEN_INT (second_probability
),
11660 if (label
!= NULL_RTX
)
11661 emit_label (label
);
11665 ix86_expand_setcc (enum rtx_code code
, rtx dest
)
11667 rtx ret
, tmp
, tmpreg
, equiv
;
11668 rtx second_test
, bypass_test
;
11670 if (GET_MODE (ix86_compare_op0
) == (TARGET_64BIT
? TImode
: DImode
))
11671 return 0; /* FAIL */
11673 gcc_assert (GET_MODE (dest
) == QImode
);
11675 ret
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
11676 PUT_MODE (ret
, QImode
);
11681 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
11682 if (bypass_test
|| second_test
)
11684 rtx test
= second_test
;
11686 rtx tmp2
= gen_reg_rtx (QImode
);
11689 gcc_assert (!second_test
);
11690 test
= bypass_test
;
11692 PUT_CODE (test
, reverse_condition_maybe_unordered (GET_CODE (test
)));
11694 PUT_MODE (test
, QImode
);
11695 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, test
));
11698 emit_insn (gen_andqi3 (tmp
, tmpreg
, tmp2
));
11700 emit_insn (gen_iorqi3 (tmp
, tmpreg
, tmp2
));
11703 /* Attach a REG_EQUAL note describing the comparison result. */
11704 if (ix86_compare_op0
&& ix86_compare_op1
)
11706 equiv
= simplify_gen_relational (code
, QImode
,
11707 GET_MODE (ix86_compare_op0
),
11708 ix86_compare_op0
, ix86_compare_op1
);
11709 set_unique_reg_note (get_last_insn (), REG_EQUAL
, equiv
);
11712 return 1; /* DONE */
11715 /* Expand comparison setting or clearing carry flag. Return true when
11716 successful and set pop for the operation. */
11718 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
11720 enum machine_mode mode
=
11721 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
11723 /* Do not handle DImode compares that go through special path.
11724 Also we can't deal with FP compares yet. This is possible to add. */
11725 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
11728 if (SCALAR_FLOAT_MODE_P (mode
))
11730 rtx second_test
= NULL
, bypass_test
= NULL
;
11731 rtx compare_op
, compare_seq
;
11733 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
11735 /* Shortcut: following common codes never translate
11736 into carry flag compares. */
11737 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
11738 || code
== ORDERED
|| code
== UNORDERED
)
11741 /* These comparisons require zero flag; swap operands so they won't. */
11742 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
11743 && !TARGET_IEEE_FP
)
11748 code
= swap_condition (code
);
11751 /* Try to expand the comparison and verify that we end up with carry flag
11752 based comparison. This is fails to be true only when we decide to expand
11753 comparison using arithmetic that is not too common scenario. */
11755 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
11756 &second_test
, &bypass_test
);
11757 compare_seq
= get_insns ();
11760 if (second_test
|| bypass_test
)
11762 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
11763 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
11764 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
11766 code
= GET_CODE (compare_op
);
11767 if (code
!= LTU
&& code
!= GEU
)
11769 emit_insn (compare_seq
);
11773 if (!INTEGRAL_MODE_P (mode
))
11781 /* Convert a==0 into (unsigned)a<1. */
11784 if (op1
!= const0_rtx
)
11787 code
= (code
== EQ
? LTU
: GEU
);
11790 /* Convert a>b into b<a or a>=b-1. */
11793 if (CONST_INT_P (op1
))
11795 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
11796 /* Bail out on overflow. We still can swap operands but that
11797 would force loading of the constant into register. */
11798 if (op1
== const0_rtx
11799 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
11801 code
= (code
== GTU
? GEU
: LTU
);
11808 code
= (code
== GTU
? LTU
: GEU
);
11812 /* Convert a>=0 into (unsigned)a<0x80000000. */
11815 if (mode
== DImode
|| op1
!= const0_rtx
)
11817 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
11818 code
= (code
== LT
? GEU
: LTU
);
11822 if (mode
== DImode
|| op1
!= constm1_rtx
)
11824 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
11825 code
= (code
== LE
? GEU
: LTU
);
11831 /* Swapping operands may cause constant to appear as first operand. */
11832 if (!nonimmediate_operand (op0
, VOIDmode
))
11834 if (no_new_pseudos
)
11836 op0
= force_reg (mode
, op0
);
11838 ix86_compare_op0
= op0
;
11839 ix86_compare_op1
= op1
;
11840 *pop
= ix86_expand_compare (code
, NULL
, NULL
);
11841 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
11846 ix86_expand_int_movcc (rtx operands
[])
11848 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
11849 rtx compare_seq
, compare_op
;
11850 rtx second_test
, bypass_test
;
11851 enum machine_mode mode
= GET_MODE (operands
[0]);
11852 bool sign_bit_compare_p
= false;;
11855 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
11856 compare_seq
= get_insns ();
11859 compare_code
= GET_CODE (compare_op
);
11861 if ((ix86_compare_op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
11862 || (ix86_compare_op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
11863 sign_bit_compare_p
= true;
11865 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
11866 HImode insns, we'd be swallowed in word prefix ops. */
11868 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
11869 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
11870 && CONST_INT_P (operands
[2])
11871 && CONST_INT_P (operands
[3]))
11873 rtx out
= operands
[0];
11874 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
11875 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
11876 HOST_WIDE_INT diff
;
11879 /* Sign bit compares are better done using shifts than we do by using
11881 if (sign_bit_compare_p
11882 || ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
11883 ix86_compare_op1
, &compare_op
))
11885 /* Detect overlap between destination and compare sources. */
11888 if (!sign_bit_compare_p
)
11890 bool fpcmp
= false;
11892 compare_code
= GET_CODE (compare_op
);
11894 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
11895 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
11898 compare_code
= ix86_fp_compare_code_to_integer (compare_code
);
11901 /* To simplify rest of code, restrict to the GEU case. */
11902 if (compare_code
== LTU
)
11904 HOST_WIDE_INT tmp
= ct
;
11907 compare_code
= reverse_condition (compare_code
);
11908 code
= reverse_condition (code
);
11913 PUT_CODE (compare_op
,
11914 reverse_condition_maybe_unordered
11915 (GET_CODE (compare_op
)));
11917 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
11921 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
11922 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
11923 tmp
= gen_reg_rtx (mode
);
11925 if (mode
== DImode
)
11926 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp
, compare_op
));
11928 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
), compare_op
));
11932 if (code
== GT
|| code
== GE
)
11933 code
= reverse_condition (code
);
11936 HOST_WIDE_INT tmp
= ct
;
11941 tmp
= emit_store_flag (tmp
, code
, ix86_compare_op0
,
11942 ix86_compare_op1
, VOIDmode
, 0, -1);
11955 tmp
= expand_simple_binop (mode
, PLUS
,
11957 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11968 tmp
= expand_simple_binop (mode
, IOR
,
11970 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11972 else if (diff
== -1 && ct
)
11982 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
11984 tmp
= expand_simple_binop (mode
, PLUS
,
11985 copy_rtx (tmp
), GEN_INT (cf
),
11986 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11994 * andl cf - ct, dest
12004 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
12007 tmp
= expand_simple_binop (mode
, AND
,
12009 gen_int_mode (cf
- ct
, mode
),
12010 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
12012 tmp
= expand_simple_binop (mode
, PLUS
,
12013 copy_rtx (tmp
), GEN_INT (ct
),
12014 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
12017 if (!rtx_equal_p (tmp
, out
))
12018 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
12020 return 1; /* DONE */
12025 enum machine_mode cmp_mode
= GET_MODE (ix86_compare_op0
);
12028 tmp
= ct
, ct
= cf
, cf
= tmp
;
12031 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
12033 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
12035 /* We may be reversing unordered compare to normal compare, that
12036 is not valid in general (we may convert non-trapping condition
12037 to trapping one), however on i386 we currently emit all
12038 comparisons unordered. */
12039 compare_code
= reverse_condition_maybe_unordered (compare_code
);
12040 code
= reverse_condition_maybe_unordered (code
);
12044 compare_code
= reverse_condition (compare_code
);
12045 code
= reverse_condition (code
);
12049 compare_code
= UNKNOWN
;
12050 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0
)) == MODE_INT
12051 && CONST_INT_P (ix86_compare_op1
))
12053 if (ix86_compare_op1
== const0_rtx
12054 && (code
== LT
|| code
== GE
))
12055 compare_code
= code
;
12056 else if (ix86_compare_op1
== constm1_rtx
)
12060 else if (code
== GT
)
12065 /* Optimize dest = (op0 < 0) ? -1 : cf. */
12066 if (compare_code
!= UNKNOWN
12067 && GET_MODE (ix86_compare_op0
) == GET_MODE (out
)
12068 && (cf
== -1 || ct
== -1))
12070 /* If lea code below could be used, only optimize
12071 if it results in a 2 insn sequence. */
12073 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
12074 || diff
== 3 || diff
== 5 || diff
== 9)
12075 || (compare_code
== LT
&& ct
== -1)
12076 || (compare_code
== GE
&& cf
== -1))
12079 * notl op1 (if necessary)
12087 code
= reverse_condition (code
);
12090 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
12091 ix86_compare_op1
, VOIDmode
, 0, -1);
12093 out
= expand_simple_binop (mode
, IOR
,
12095 out
, 1, OPTAB_DIRECT
);
12096 if (out
!= operands
[0])
12097 emit_move_insn (operands
[0], out
);
12099 return 1; /* DONE */
12104 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
12105 || diff
== 3 || diff
== 5 || diff
== 9)
12106 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
12108 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
12114 * lea cf(dest*(ct-cf)),dest
12118 * This also catches the degenerate setcc-only case.
12124 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
12125 ix86_compare_op1
, VOIDmode
, 0, 1);
12128 /* On x86_64 the lea instruction operates on Pmode, so we need
12129 to get arithmetics done in proper mode to match. */
12131 tmp
= copy_rtx (out
);
12135 out1
= copy_rtx (out
);
12136 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
12140 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
12146 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
12149 if (!rtx_equal_p (tmp
, out
))
12152 out
= force_operand (tmp
, copy_rtx (out
));
12154 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
12156 if (!rtx_equal_p (out
, operands
[0]))
12157 emit_move_insn (operands
[0], copy_rtx (out
));
12159 return 1; /* DONE */
12163 * General case: Jumpful:
12164 * xorl dest,dest cmpl op1, op2
12165 * cmpl op1, op2 movl ct, dest
12166 * setcc dest jcc 1f
12167 * decl dest movl cf, dest
12168 * andl (cf-ct),dest 1:
12171 * Size 20. Size 14.
12173 * This is reasonably steep, but branch mispredict costs are
12174 * high on modern cpus, so consider failing only if optimizing
12178 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
12179 && BRANCH_COST
>= 2)
12183 enum machine_mode cmp_mode
= GET_MODE (ix86_compare_op0
);
12188 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
12190 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
12192 /* We may be reversing unordered compare to normal compare,
12193 that is not valid in general (we may convert non-trapping
12194 condition to trapping one), however on i386 we currently
12195 emit all comparisons unordered. */
12196 code
= reverse_condition_maybe_unordered (code
);
12200 code
= reverse_condition (code
);
12201 if (compare_code
!= UNKNOWN
)
12202 compare_code
= reverse_condition (compare_code
);
12206 if (compare_code
!= UNKNOWN
)
12208 /* notl op1 (if needed)
12213 For x < 0 (resp. x <= -1) there will be no notl,
12214 so if possible swap the constants to get rid of the
12216 True/false will be -1/0 while code below (store flag
12217 followed by decrement) is 0/-1, so the constants need
12218 to be exchanged once more. */
12220 if (compare_code
== GE
|| !cf
)
12222 code
= reverse_condition (code
);
12227 HOST_WIDE_INT tmp
= cf
;
12232 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
12233 ix86_compare_op1
, VOIDmode
, 0, -1);
12237 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
12238 ix86_compare_op1
, VOIDmode
, 0, 1);
12240 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), constm1_rtx
,
12241 copy_rtx (out
), 1, OPTAB_DIRECT
);
12244 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
12245 gen_int_mode (cf
- ct
, mode
),
12246 copy_rtx (out
), 1, OPTAB_DIRECT
);
12248 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
12249 copy_rtx (out
), 1, OPTAB_DIRECT
);
12250 if (!rtx_equal_p (out
, operands
[0]))
12251 emit_move_insn (operands
[0], copy_rtx (out
));
12253 return 1; /* DONE */
12257 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
12259 /* Try a few things more with specific constants and a variable. */
12262 rtx var
, orig_out
, out
, tmp
;
12264 if (BRANCH_COST
<= 2)
12265 return 0; /* FAIL */
12267 /* If one of the two operands is an interesting constant, load a
12268 constant with the above and mask it in with a logical operation. */
12270 if (CONST_INT_P (operands
[2]))
12273 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
12274 operands
[3] = constm1_rtx
, op
= and_optab
;
12275 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
12276 operands
[3] = const0_rtx
, op
= ior_optab
;
12278 return 0; /* FAIL */
12280 else if (CONST_INT_P (operands
[3]))
12283 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
12284 operands
[2] = constm1_rtx
, op
= and_optab
;
12285 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
12286 operands
[2] = const0_rtx
, op
= ior_optab
;
12288 return 0; /* FAIL */
12291 return 0; /* FAIL */
12293 orig_out
= operands
[0];
12294 tmp
= gen_reg_rtx (mode
);
12297 /* Recurse to get the constant loaded. */
12298 if (ix86_expand_int_movcc (operands
) == 0)
12299 return 0; /* FAIL */
12301 /* Mask in the interesting variable. */
12302 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
12304 if (!rtx_equal_p (out
, orig_out
))
12305 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
12307 return 1; /* DONE */
12311 * For comparison with above,
12321 if (! nonimmediate_operand (operands
[2], mode
))
12322 operands
[2] = force_reg (mode
, operands
[2]);
12323 if (! nonimmediate_operand (operands
[3], mode
))
12324 operands
[3] = force_reg (mode
, operands
[3]);
12326 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
12328 rtx tmp
= gen_reg_rtx (mode
);
12329 emit_move_insn (tmp
, operands
[3]);
12332 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
12334 rtx tmp
= gen_reg_rtx (mode
);
12335 emit_move_insn (tmp
, operands
[2]);
12339 if (! register_operand (operands
[2], VOIDmode
)
12341 || ! register_operand (operands
[3], VOIDmode
)))
12342 operands
[2] = force_reg (mode
, operands
[2]);
12345 && ! register_operand (operands
[3], VOIDmode
))
12346 operands
[3] = force_reg (mode
, operands
[3]);
12348 emit_insn (compare_seq
);
12349 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12350 gen_rtx_IF_THEN_ELSE (mode
,
12351 compare_op
, operands
[2],
12354 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
12355 gen_rtx_IF_THEN_ELSE (mode
,
12357 copy_rtx (operands
[3]),
12358 copy_rtx (operands
[0]))));
12360 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
12361 gen_rtx_IF_THEN_ELSE (mode
,
12363 copy_rtx (operands
[2]),
12364 copy_rtx (operands
[0]))));
12366 return 1; /* DONE */
12369 /* Swap, force into registers, or otherwise massage the two operands
12370 to an sse comparison with a mask result. Thus we differ a bit from
12371 ix86_prepare_fp_compare_args which expects to produce a flags result.
12373 The DEST operand exists to help determine whether to commute commutative
12374 operators. The POP0/POP1 operands are updated in place. The new
12375 comparison code is returned, or UNKNOWN if not implementable. */
12377 static enum rtx_code
12378 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
12379 rtx
*pop0
, rtx
*pop1
)
12387 /* We have no LTGT as an operator. We could implement it with
12388 NE & ORDERED, but this requires an extra temporary. It's
12389 not clear that it's worth it. */
12396 /* These are supported directly. */
12403 /* For commutative operators, try to canonicalize the destination
12404 operand to be first in the comparison - this helps reload to
12405 avoid extra moves. */
12406 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
12414 /* These are not supported directly. Swap the comparison operands
12415 to transform into something that is supported. */
12419 code
= swap_condition (code
);
12423 gcc_unreachable ();
12429 /* Detect conditional moves that exactly match min/max operational
12430 semantics. Note that this is IEEE safe, as long as we don't
12431 interchange the operands.
12433 Returns FALSE if this conditional move doesn't match a MIN/MAX,
12434 and TRUE if the operation is successful and instructions are emitted. */
12437 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
12438 rtx cmp_op1
, rtx if_true
, rtx if_false
)
12440 enum machine_mode mode
;
12446 else if (code
== UNGE
)
12449 if_true
= if_false
;
12455 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
12457 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
12462 mode
= GET_MODE (dest
);
12464 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
12465 but MODE may be a vector mode and thus not appropriate. */
12466 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
12468 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
12471 if_true
= force_reg (mode
, if_true
);
12472 v
= gen_rtvec (2, if_true
, if_false
);
12473 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
12477 code
= is_min
? SMIN
: SMAX
;
12478 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
12481 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
12485 /* Expand an sse vector comparison. Return the register with the result. */
12488 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
12489 rtx op_true
, rtx op_false
)
12491 enum machine_mode mode
= GET_MODE (dest
);
12494 cmp_op0
= force_reg (mode
, cmp_op0
);
12495 if (!nonimmediate_operand (cmp_op1
, mode
))
12496 cmp_op1
= force_reg (mode
, cmp_op1
);
12499 || reg_overlap_mentioned_p (dest
, op_true
)
12500 || reg_overlap_mentioned_p (dest
, op_false
))
12501 dest
= gen_reg_rtx (mode
);
12503 x
= gen_rtx_fmt_ee (code
, mode
, cmp_op0
, cmp_op1
);
12504 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12509 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
12510 operations. This is used for both scalar and vector conditional moves. */
12513 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
12515 enum machine_mode mode
= GET_MODE (dest
);
12518 if (op_false
== CONST0_RTX (mode
))
12520 op_true
= force_reg (mode
, op_true
);
12521 x
= gen_rtx_AND (mode
, cmp
, op_true
);
12522 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12524 else if (op_true
== CONST0_RTX (mode
))
12526 op_false
= force_reg (mode
, op_false
);
12527 x
= gen_rtx_NOT (mode
, cmp
);
12528 x
= gen_rtx_AND (mode
, x
, op_false
);
12529 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12533 op_true
= force_reg (mode
, op_true
);
12534 op_false
= force_reg (mode
, op_false
);
12536 t2
= gen_reg_rtx (mode
);
12538 t3
= gen_reg_rtx (mode
);
12542 x
= gen_rtx_AND (mode
, op_true
, cmp
);
12543 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
12545 x
= gen_rtx_NOT (mode
, cmp
);
12546 x
= gen_rtx_AND (mode
, x
, op_false
);
12547 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
12549 x
= gen_rtx_IOR (mode
, t3
, t2
);
12550 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12554 /* Expand a floating-point conditional move. Return true if successful. */
12557 ix86_expand_fp_movcc (rtx operands
[])
12559 enum machine_mode mode
= GET_MODE (operands
[0]);
12560 enum rtx_code code
= GET_CODE (operands
[1]);
12561 rtx tmp
, compare_op
, second_test
, bypass_test
;
12563 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
12565 enum machine_mode cmode
;
12567 /* Since we've no cmove for sse registers, don't force bad register
12568 allocation just to gain access to it. Deny movcc when the
12569 comparison mode doesn't match the move mode. */
12570 cmode
= GET_MODE (ix86_compare_op0
);
12571 if (cmode
== VOIDmode
)
12572 cmode
= GET_MODE (ix86_compare_op1
);
12576 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
12578 &ix86_compare_op1
);
12579 if (code
== UNKNOWN
)
12582 if (ix86_expand_sse_fp_minmax (operands
[0], code
, ix86_compare_op0
,
12583 ix86_compare_op1
, operands
[2],
12587 tmp
= ix86_expand_sse_cmp (operands
[0], code
, ix86_compare_op0
,
12588 ix86_compare_op1
, operands
[2], operands
[3]);
12589 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
12593 /* The floating point conditional move instructions don't directly
12594 support conditions resulting from a signed integer comparison. */
12596 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
12598 /* The floating point conditional move instructions don't directly
12599 support signed integer comparisons. */
12601 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
12603 gcc_assert (!second_test
&& !bypass_test
);
12604 tmp
= gen_reg_rtx (QImode
);
12605 ix86_expand_setcc (code
, tmp
);
12607 ix86_compare_op0
= tmp
;
12608 ix86_compare_op1
= const0_rtx
;
12609 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
12611 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
12613 tmp
= gen_reg_rtx (mode
);
12614 emit_move_insn (tmp
, operands
[3]);
12617 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
12619 tmp
= gen_reg_rtx (mode
);
12620 emit_move_insn (tmp
, operands
[2]);
12624 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12625 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
12626 operands
[2], operands
[3])));
12628 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12629 gen_rtx_IF_THEN_ELSE (mode
, bypass_test
,
12630 operands
[3], operands
[0])));
12632 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12633 gen_rtx_IF_THEN_ELSE (mode
, second_test
,
12634 operands
[2], operands
[0])));
12639 /* Expand a floating-point vector conditional move; a vcond operation
12640 rather than a movcc operation. */
12643 ix86_expand_fp_vcond (rtx operands
[])
12645 enum rtx_code code
= GET_CODE (operands
[3]);
12648 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
12649 &operands
[4], &operands
[5]);
12650 if (code
== UNKNOWN
)
12653 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
12654 operands
[5], operands
[1], operands
[2]))
12657 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
12658 operands
[1], operands
[2]);
12659 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
12663 /* Expand a signed integral vector conditional move. */
12666 ix86_expand_int_vcond (rtx operands
[])
12668 enum machine_mode mode
= GET_MODE (operands
[0]);
12669 enum rtx_code code
= GET_CODE (operands
[3]);
12670 bool negate
= false;
12673 cop0
= operands
[4];
12674 cop1
= operands
[5];
12676 /* Canonicalize the comparison to EQ, GT, GTU. */
12687 code
= reverse_condition (code
);
12693 code
= reverse_condition (code
);
12699 code
= swap_condition (code
);
12700 x
= cop0
, cop0
= cop1
, cop1
= x
;
12704 gcc_unreachable ();
12707 /* Unsigned parallel compare is not supported by the hardware. Play some
12708 tricks to turn this into a signed comparison against 0. */
12711 cop0
= force_reg (mode
, cop0
);
12719 /* Perform a parallel modulo subtraction. */
12720 t1
= gen_reg_rtx (mode
);
12721 emit_insn (gen_subv4si3 (t1
, cop0
, cop1
));
12723 /* Extract the original sign bit of op0. */
12724 mask
= GEN_INT (-0x80000000);
12725 mask
= gen_rtx_CONST_VECTOR (mode
,
12726 gen_rtvec (4, mask
, mask
, mask
, mask
));
12727 mask
= force_reg (mode
, mask
);
12728 t2
= gen_reg_rtx (mode
);
12729 emit_insn (gen_andv4si3 (t2
, cop0
, mask
));
12731 /* XOR it back into the result of the subtraction. This results
12732 in the sign bit set iff we saw unsigned underflow. */
12733 x
= gen_reg_rtx (mode
);
12734 emit_insn (gen_xorv4si3 (x
, t1
, t2
));
12742 /* Perform a parallel unsigned saturating subtraction. */
12743 x
= gen_reg_rtx (mode
);
12744 emit_insn (gen_rtx_SET (VOIDmode
, x
,
12745 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
12752 gcc_unreachable ();
12756 cop1
= CONST0_RTX (mode
);
12759 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
12760 operands
[1+negate
], operands
[2-negate
]);
12762 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
12763 operands
[2-negate
]);
12767 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
12768 true if we should do zero extension, else sign extension. HIGH_P is
12769 true if we want the N/2 high elements, else the low elements. */
12772 ix86_expand_sse_unpack (rtx operands
[2], bool unsigned_p
, bool high_p
)
12774 enum machine_mode imode
= GET_MODE (operands
[1]);
12775 rtx (*unpack
)(rtx
, rtx
, rtx
);
12782 unpack
= gen_vec_interleave_highv16qi
;
12784 unpack
= gen_vec_interleave_lowv16qi
;
12788 unpack
= gen_vec_interleave_highv8hi
;
12790 unpack
= gen_vec_interleave_lowv8hi
;
12794 unpack
= gen_vec_interleave_highv4si
;
12796 unpack
= gen_vec_interleave_lowv4si
;
12799 gcc_unreachable ();
12802 dest
= gen_lowpart (imode
, operands
[0]);
12805 se
= force_reg (imode
, CONST0_RTX (imode
));
12807 se
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
12808 operands
[1], pc_rtx
, pc_rtx
);
12810 emit_insn (unpack (dest
, operands
[1], se
));
12813 /* Expand conditional increment or decrement using adb/sbb instructions.
12814 The default case using setcc followed by the conditional move can be
12815 done by generic code. */
12817 ix86_expand_int_addcc (rtx operands
[])
12819 enum rtx_code code
= GET_CODE (operands
[1]);
12821 rtx val
= const0_rtx
;
12822 bool fpcmp
= false;
12823 enum machine_mode mode
= GET_MODE (operands
[0]);
12825 if (operands
[3] != const1_rtx
12826 && operands
[3] != constm1_rtx
)
12828 if (!ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
12829 ix86_compare_op1
, &compare_op
))
12831 code
= GET_CODE (compare_op
);
12833 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
12834 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
12837 code
= ix86_fp_compare_code_to_integer (code
);
12844 PUT_CODE (compare_op
,
12845 reverse_condition_maybe_unordered
12846 (GET_CODE (compare_op
)));
12848 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
12850 PUT_MODE (compare_op
, mode
);
12852 /* Construct either adc or sbb insn. */
12853 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
12855 switch (GET_MODE (operands
[0]))
12858 emit_insn (gen_subqi3_carry (operands
[0], operands
[2], val
, compare_op
));
12861 emit_insn (gen_subhi3_carry (operands
[0], operands
[2], val
, compare_op
));
12864 emit_insn (gen_subsi3_carry (operands
[0], operands
[2], val
, compare_op
));
12867 emit_insn (gen_subdi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
12870 gcc_unreachable ();
12875 switch (GET_MODE (operands
[0]))
12878 emit_insn (gen_addqi3_carry (operands
[0], operands
[2], val
, compare_op
));
12881 emit_insn (gen_addhi3_carry (operands
[0], operands
[2], val
, compare_op
));
12884 emit_insn (gen_addsi3_carry (operands
[0], operands
[2], val
, compare_op
));
12887 emit_insn (gen_adddi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
12890 gcc_unreachable ();
12893 return 1; /* DONE */
12897 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
12898 works for floating pointer parameters and nonoffsetable memories.
12899 For pushes, it returns just stack offsets; the values will be saved
12900 in the right order. Maximally three parts are generated. */
12903 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
12908 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
12910 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
12912 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
12913 gcc_assert (size
>= 2 && size
<= 3);
12915 /* Optimize constant pool reference to immediates. This is used by fp
12916 moves, that force all constants to memory to allow combining. */
12917 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
12919 rtx tmp
= maybe_get_pool_constant (operand
);
12924 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
12926 /* The only non-offsetable memories we handle are pushes. */
12927 int ok
= push_operand (operand
, VOIDmode
);
12931 operand
= copy_rtx (operand
);
12932 PUT_MODE (operand
, Pmode
);
12933 parts
[0] = parts
[1] = parts
[2] = operand
;
12937 if (GET_CODE (operand
) == CONST_VECTOR
)
12939 enum machine_mode imode
= int_mode_for_mode (mode
);
12940 /* Caution: if we looked through a constant pool memory above,
12941 the operand may actually have a different mode now. That's
12942 ok, since we want to pun this all the way back to an integer. */
12943 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
12944 gcc_assert (operand
!= NULL
);
12950 if (mode
== DImode
)
12951 split_di (&operand
, 1, &parts
[0], &parts
[1]);
12954 if (REG_P (operand
))
12956 gcc_assert (reload_completed
);
12957 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
12958 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
12960 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
12962 else if (offsettable_memref_p (operand
))
12964 operand
= adjust_address (operand
, SImode
, 0);
12965 parts
[0] = operand
;
12966 parts
[1] = adjust_address (operand
, SImode
, 4);
12968 parts
[2] = adjust_address (operand
, SImode
, 8);
12970 else if (GET_CODE (operand
) == CONST_DOUBLE
)
12975 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
12979 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
12980 parts
[2] = gen_int_mode (l
[2], SImode
);
12983 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
12986 gcc_unreachable ();
12988 parts
[1] = gen_int_mode (l
[1], SImode
);
12989 parts
[0] = gen_int_mode (l
[0], SImode
);
12992 gcc_unreachable ();
12997 if (mode
== TImode
)
12998 split_ti (&operand
, 1, &parts
[0], &parts
[1]);
12999 if (mode
== XFmode
|| mode
== TFmode
)
13001 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
13002 if (REG_P (operand
))
13004 gcc_assert (reload_completed
);
13005 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
13006 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
13008 else if (offsettable_memref_p (operand
))
13010 operand
= adjust_address (operand
, DImode
, 0);
13011 parts
[0] = operand
;
13012 parts
[1] = adjust_address (operand
, upper_mode
, 8);
13014 else if (GET_CODE (operand
) == CONST_DOUBLE
)
13019 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
13020 real_to_target (l
, &r
, mode
);
13022 /* Do not use shift by 32 to avoid warning on 32bit systems. */
13023 if (HOST_BITS_PER_WIDE_INT
>= 64)
13026 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
13027 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
13030 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
13032 if (upper_mode
== SImode
)
13033 parts
[1] = gen_int_mode (l
[2], SImode
);
13034 else if (HOST_BITS_PER_WIDE_INT
>= 64)
13037 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
13038 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
13041 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
13044 gcc_unreachable ();
13051 /* Emit insns to perform a move or push of DI, DF, and XF values.
13052 Return false when normal moves are needed; true when all required
13053 insns have been emitted. Operands 2-4 contain the input values
13054 int the correct order; operands 5-7 contain the output values. */
13057 ix86_split_long_move (rtx operands
[])
13062 int collisions
= 0;
13063 enum machine_mode mode
= GET_MODE (operands
[0]);
13065 /* The DFmode expanders may ask us to move double.
13066 For 64bit target this is single move. By hiding the fact
13067 here we simplify i386.md splitters. */
13068 if (GET_MODE_SIZE (GET_MODE (operands
[0])) == 8 && TARGET_64BIT
)
13070 /* Optimize constant pool reference to immediates. This is used by
13071 fp moves, that force all constants to memory to allow combining. */
13073 if (MEM_P (operands
[1])
13074 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
13075 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
13076 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
13077 if (push_operand (operands
[0], VOIDmode
))
13079 operands
[0] = copy_rtx (operands
[0]);
13080 PUT_MODE (operands
[0], Pmode
);
13083 operands
[0] = gen_lowpart (DImode
, operands
[0]);
13084 operands
[1] = gen_lowpart (DImode
, operands
[1]);
13085 emit_move_insn (operands
[0], operands
[1]);
13089 /* The only non-offsettable memory we handle is push. */
13090 if (push_operand (operands
[0], VOIDmode
))
13093 gcc_assert (!MEM_P (operands
[0])
13094 || offsettable_memref_p (operands
[0]));
13096 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
13097 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
13099 /* When emitting push, take care for source operands on the stack. */
13100 if (push
&& MEM_P (operands
[1])
13101 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
13104 part
[1][1] = change_address (part
[1][1], GET_MODE (part
[1][1]),
13105 XEXP (part
[1][2], 0));
13106 part
[1][0] = change_address (part
[1][0], GET_MODE (part
[1][0]),
13107 XEXP (part
[1][1], 0));
13110 /* We need to do copy in the right order in case an address register
13111 of the source overlaps the destination. */
13112 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
13114 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
13116 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
13119 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
13122 /* Collision in the middle part can be handled by reordering. */
13123 if (collisions
== 1 && nparts
== 3
13124 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
13127 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
13128 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
13131 /* If there are more collisions, we can't handle it by reordering.
13132 Do an lea to the last part and use only one colliding move. */
13133 else if (collisions
> 1)
13139 base
= part
[0][nparts
- 1];
13141 /* Handle the case when the last part isn't valid for lea.
13142 Happens in 64-bit mode storing the 12-byte XFmode. */
13143 if (GET_MODE (base
) != Pmode
)
13144 base
= gen_rtx_REG (Pmode
, REGNO (base
));
13146 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
13147 part
[1][0] = replace_equiv_address (part
[1][0], base
);
13148 part
[1][1] = replace_equiv_address (part
[1][1],
13149 plus_constant (base
, UNITS_PER_WORD
));
13151 part
[1][2] = replace_equiv_address (part
[1][2],
13152 plus_constant (base
, 8));
13162 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
13163 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, GEN_INT (-4)));
13164 emit_move_insn (part
[0][2], part
[1][2]);
13169 /* In 64bit mode we don't have 32bit push available. In case this is
13170 register, it is OK - we will just use larger counterpart. We also
13171 retype memory - these comes from attempt to avoid REX prefix on
13172 moving of second half of TFmode value. */
13173 if (GET_MODE (part
[1][1]) == SImode
)
13175 switch (GET_CODE (part
[1][1]))
13178 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
13182 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
13186 gcc_unreachable ();
13189 if (GET_MODE (part
[1][0]) == SImode
)
13190 part
[1][0] = part
[1][1];
13193 emit_move_insn (part
[0][1], part
[1][1]);
13194 emit_move_insn (part
[0][0], part
[1][0]);
13198 /* Choose correct order to not overwrite the source before it is copied. */
13199 if ((REG_P (part
[0][0])
13200 && REG_P (part
[1][1])
13201 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
13203 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
13205 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
13209 operands
[2] = part
[0][2];
13210 operands
[3] = part
[0][1];
13211 operands
[4] = part
[0][0];
13212 operands
[5] = part
[1][2];
13213 operands
[6] = part
[1][1];
13214 operands
[7] = part
[1][0];
13218 operands
[2] = part
[0][1];
13219 operands
[3] = part
[0][0];
13220 operands
[5] = part
[1][1];
13221 operands
[6] = part
[1][0];
13228 operands
[2] = part
[0][0];
13229 operands
[3] = part
[0][1];
13230 operands
[4] = part
[0][2];
13231 operands
[5] = part
[1][0];
13232 operands
[6] = part
[1][1];
13233 operands
[7] = part
[1][2];
13237 operands
[2] = part
[0][0];
13238 operands
[3] = part
[0][1];
13239 operands
[5] = part
[1][0];
13240 operands
[6] = part
[1][1];
13244 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
13247 if (CONST_INT_P (operands
[5])
13248 && operands
[5] != const0_rtx
13249 && REG_P (operands
[2]))
13251 if (CONST_INT_P (operands
[6])
13252 && INTVAL (operands
[6]) == INTVAL (operands
[5]))
13253 operands
[6] = operands
[2];
13256 && CONST_INT_P (operands
[7])
13257 && INTVAL (operands
[7]) == INTVAL (operands
[5]))
13258 operands
[7] = operands
[2];
13262 && CONST_INT_P (operands
[6])
13263 && operands
[6] != const0_rtx
13264 && REG_P (operands
[3])
13265 && CONST_INT_P (operands
[7])
13266 && INTVAL (operands
[7]) == INTVAL (operands
[6]))
13267 operands
[7] = operands
[3];
13270 emit_move_insn (operands
[2], operands
[5]);
13271 emit_move_insn (operands
[3], operands
[6]);
13273 emit_move_insn (operands
[4], operands
[7]);
13278 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
13279 left shift by a constant, either using a single shift or
13280 a sequence of add instructions. */
13283 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
13287 emit_insn ((mode
== DImode
13289 : gen_adddi3
) (operand
, operand
, operand
));
13291 else if (!optimize_size
13292 && count
* ix86_cost
->add
<= ix86_cost
->shift_const
)
13295 for (i
=0; i
<count
; i
++)
13297 emit_insn ((mode
== DImode
13299 : gen_adddi3
) (operand
, operand
, operand
));
13303 emit_insn ((mode
== DImode
13305 : gen_ashldi3
) (operand
, operand
, GEN_INT (count
)));
13309 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
13311 rtx low
[2], high
[2];
13313 const int single_width
= mode
== DImode
? 32 : 64;
13315 if (CONST_INT_P (operands
[2]))
13317 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
13318 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
13320 if (count
>= single_width
)
13322 emit_move_insn (high
[0], low
[1]);
13323 emit_move_insn (low
[0], const0_rtx
);
13325 if (count
> single_width
)
13326 ix86_expand_ashl_const (high
[0], count
- single_width
, mode
);
13330 if (!rtx_equal_p (operands
[0], operands
[1]))
13331 emit_move_insn (operands
[0], operands
[1]);
13332 emit_insn ((mode
== DImode
13334 : gen_x86_64_shld
) (high
[0], low
[0], GEN_INT (count
)));
13335 ix86_expand_ashl_const (low
[0], count
, mode
);
13340 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13342 if (operands
[1] == const1_rtx
)
13344 /* Assuming we've chosen a QImode capable registers, then 1 << N
13345 can be done with two 32/64-bit shifts, no branches, no cmoves. */
13346 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
13348 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
13350 ix86_expand_clear (low
[0]);
13351 ix86_expand_clear (high
[0]);
13352 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (single_width
)));
13354 d
= gen_lowpart (QImode
, low
[0]);
13355 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
13356 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
13357 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
13359 d
= gen_lowpart (QImode
, high
[0]);
13360 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
13361 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
13362 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
13365 /* Otherwise, we can get the same results by manually performing
13366 a bit extract operation on bit 5/6, and then performing the two
13367 shifts. The two methods of getting 0/1 into low/high are exactly
13368 the same size. Avoiding the shift in the bit extract case helps
13369 pentium4 a bit; no one else seems to care much either way. */
13374 if (TARGET_PARTIAL_REG_STALL
&& !optimize_size
)
13375 x
= gen_rtx_ZERO_EXTEND (mode
== DImode
? SImode
: DImode
, operands
[2]);
13377 x
= gen_lowpart (mode
== DImode
? SImode
: DImode
, operands
[2]);
13378 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
13380 emit_insn ((mode
== DImode
13382 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (mode
== DImode
? 5 : 6)));
13383 emit_insn ((mode
== DImode
13385 : gen_anddi3
) (high
[0], high
[0], GEN_INT (1)));
13386 emit_move_insn (low
[0], high
[0]);
13387 emit_insn ((mode
== DImode
13389 : gen_xordi3
) (low
[0], low
[0], GEN_INT (1)));
13392 emit_insn ((mode
== DImode
13394 : gen_ashldi3
) (low
[0], low
[0], operands
[2]));
13395 emit_insn ((mode
== DImode
13397 : gen_ashldi3
) (high
[0], high
[0], operands
[2]));
13401 if (operands
[1] == constm1_rtx
)
13403 /* For -1 << N, we can avoid the shld instruction, because we
13404 know that we're shifting 0...31/63 ones into a -1. */
13405 emit_move_insn (low
[0], constm1_rtx
);
13407 emit_move_insn (high
[0], low
[0]);
13409 emit_move_insn (high
[0], constm1_rtx
);
13413 if (!rtx_equal_p (operands
[0], operands
[1]))
13414 emit_move_insn (operands
[0], operands
[1]);
13416 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13417 emit_insn ((mode
== DImode
13419 : gen_x86_64_shld
) (high
[0], low
[0], operands
[2]));
13422 emit_insn ((mode
== DImode
? gen_ashlsi3
: gen_ashldi3
) (low
[0], low
[0], operands
[2]));
13424 if (TARGET_CMOVE
&& scratch
)
13426 ix86_expand_clear (scratch
);
13427 emit_insn ((mode
== DImode
13428 ? gen_x86_shift_adj_1
13429 : gen_x86_64_shift_adj
) (high
[0], low
[0], operands
[2], scratch
));
13432 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
13436 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
13438 rtx low
[2], high
[2];
13440 const int single_width
= mode
== DImode
? 32 : 64;
13442 if (CONST_INT_P (operands
[2]))
13444 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
13445 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
13447 if (count
== single_width
* 2 - 1)
13449 emit_move_insn (high
[0], high
[1]);
13450 emit_insn ((mode
== DImode
13452 : gen_ashrdi3
) (high
[0], high
[0],
13453 GEN_INT (single_width
- 1)));
13454 emit_move_insn (low
[0], high
[0]);
13457 else if (count
>= single_width
)
13459 emit_move_insn (low
[0], high
[1]);
13460 emit_move_insn (high
[0], low
[0]);
13461 emit_insn ((mode
== DImode
13463 : gen_ashrdi3
) (high
[0], high
[0],
13464 GEN_INT (single_width
- 1)));
13465 if (count
> single_width
)
13466 emit_insn ((mode
== DImode
13468 : gen_ashrdi3
) (low
[0], low
[0],
13469 GEN_INT (count
- single_width
)));
13473 if (!rtx_equal_p (operands
[0], operands
[1]))
13474 emit_move_insn (operands
[0], operands
[1]);
13475 emit_insn ((mode
== DImode
13477 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
13478 emit_insn ((mode
== DImode
13480 : gen_ashrdi3
) (high
[0], high
[0], GEN_INT (count
)));
13485 if (!rtx_equal_p (operands
[0], operands
[1]))
13486 emit_move_insn (operands
[0], operands
[1]);
13488 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13490 emit_insn ((mode
== DImode
13492 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
13493 emit_insn ((mode
== DImode
13495 : gen_ashrdi3
) (high
[0], high
[0], operands
[2]));
13497 if (TARGET_CMOVE
&& scratch
)
13499 emit_move_insn (scratch
, high
[0]);
13500 emit_insn ((mode
== DImode
13502 : gen_ashrdi3
) (scratch
, scratch
,
13503 GEN_INT (single_width
- 1)));
13504 emit_insn ((mode
== DImode
13505 ? gen_x86_shift_adj_1
13506 : gen_x86_64_shift_adj
) (low
[0], high
[0], operands
[2],
13510 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
13515 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
13517 rtx low
[2], high
[2];
13519 const int single_width
= mode
== DImode
? 32 : 64;
13521 if (CONST_INT_P (operands
[2]))
13523 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
13524 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
13526 if (count
>= single_width
)
13528 emit_move_insn (low
[0], high
[1]);
13529 ix86_expand_clear (high
[0]);
13531 if (count
> single_width
)
13532 emit_insn ((mode
== DImode
13534 : gen_lshrdi3
) (low
[0], low
[0],
13535 GEN_INT (count
- single_width
)));
13539 if (!rtx_equal_p (operands
[0], operands
[1]))
13540 emit_move_insn (operands
[0], operands
[1]);
13541 emit_insn ((mode
== DImode
13543 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
13544 emit_insn ((mode
== DImode
13546 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (count
)));
13551 if (!rtx_equal_p (operands
[0], operands
[1]))
13552 emit_move_insn (operands
[0], operands
[1]);
13554 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13556 emit_insn ((mode
== DImode
13558 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
13559 emit_insn ((mode
== DImode
13561 : gen_lshrdi3
) (high
[0], high
[0], operands
[2]));
13563 /* Heh. By reversing the arguments, we can reuse this pattern. */
13564 if (TARGET_CMOVE
&& scratch
)
13566 ix86_expand_clear (scratch
);
13567 emit_insn ((mode
== DImode
13568 ? gen_x86_shift_adj_1
13569 : gen_x86_64_shift_adj
) (low
[0], high
[0], operands
[2],
13573 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
13577 /* Predict just emitted jump instruction to be taken with probability PROB. */
13579 predict_jump (int prob
)
13581 rtx insn
= get_last_insn ();
13582 gcc_assert (JUMP_P (insn
));
13584 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
13589 /* Helper function for the string operations below. Dest VARIABLE whether
13590 it is aligned to VALUE bytes. If true, jump to the label. */
13592 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
13594 rtx label
= gen_label_rtx ();
13595 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
13596 if (GET_MODE (variable
) == DImode
)
13597 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
13599 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
13600 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
13603 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
13605 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
13609 /* Adjust COUNTER by the VALUE. */
13611 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
13613 if (GET_MODE (countreg
) == DImode
)
13614 emit_insn (gen_adddi3 (countreg
, countreg
, GEN_INT (-value
)));
13616 emit_insn (gen_addsi3 (countreg
, countreg
, GEN_INT (-value
)));
13619 /* Zero extend possibly SImode EXP to Pmode register. */
13621 ix86_zero_extend_to_Pmode (rtx exp
)
13624 if (GET_MODE (exp
) == VOIDmode
)
13625 return force_reg (Pmode
, exp
);
13626 if (GET_MODE (exp
) == Pmode
)
13627 return copy_to_mode_reg (Pmode
, exp
);
13628 r
= gen_reg_rtx (Pmode
);
13629 emit_insn (gen_zero_extendsidi2 (r
, exp
));
13633 /* Divide COUNTREG by SCALE. */
13635 scale_counter (rtx countreg
, int scale
)
13638 rtx piece_size_mask
;
13642 if (CONST_INT_P (countreg
))
13643 return GEN_INT (INTVAL (countreg
) / scale
);
13644 gcc_assert (REG_P (countreg
));
13646 piece_size_mask
= GEN_INT (scale
- 1);
13647 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
13648 GEN_INT (exact_log2 (scale
)),
13649 NULL
, 1, OPTAB_DIRECT
);
13653 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
13654 DImode for constant loop counts. */
13656 static enum machine_mode
13657 counter_mode (rtx count_exp
)
13659 if (GET_MODE (count_exp
) != VOIDmode
)
13660 return GET_MODE (count_exp
);
13661 if (GET_CODE (count_exp
) != CONST_INT
)
13663 if (TARGET_64BIT
&& (INTVAL (count_exp
) & ~0xffffffff))
13668 /* When SRCPTR is non-NULL, output simple loop to move memory
13669 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
13670 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
13671 equivalent loop to set memory by VALUE (supposed to be in MODE).
13673 The size is rounded down to whole number of chunk size moved at once.
13674 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
13678 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
13679 rtx destptr
, rtx srcptr
, rtx value
,
13680 rtx count
, enum machine_mode mode
, int unroll
,
13683 rtx out_label
, top_label
, iter
, tmp
;
13684 enum machine_mode iter_mode
= counter_mode (count
);
13685 rtx piece_size
= GEN_INT (GET_MODE_SIZE (mode
) * unroll
);
13686 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
13692 top_label
= gen_label_rtx ();
13693 out_label
= gen_label_rtx ();
13694 iter
= gen_reg_rtx (iter_mode
);
13696 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
13697 NULL
, 1, OPTAB_DIRECT
);
13698 /* Those two should combine. */
13699 if (piece_size
== const1_rtx
)
13701 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
13703 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
13705 emit_move_insn (iter
, const0_rtx
);
13707 emit_label (top_label
);
13709 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
13710 x_addr
= gen_rtx_PLUS (Pmode
, destptr
, tmp
);
13711 destmem
= change_address (destmem
, mode
, x_addr
);
13715 y_addr
= gen_rtx_PLUS (Pmode
, srcptr
, copy_rtx (tmp
));
13716 srcmem
= change_address (srcmem
, mode
, y_addr
);
13718 /* When unrolling for chips that reorder memory reads and writes,
13719 we can save registers by using single temporary.
13720 Also using 4 temporaries is overkill in 32bit mode. */
13721 if (!TARGET_64BIT
&& 0)
13723 for (i
= 0; i
< unroll
; i
++)
13728 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
13730 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
13732 emit_move_insn (destmem
, srcmem
);
13738 gcc_assert (unroll
<= 4);
13739 for (i
= 0; i
< unroll
; i
++)
13741 tmpreg
[i
] = gen_reg_rtx (mode
);
13745 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
13747 emit_move_insn (tmpreg
[i
], srcmem
);
13749 for (i
= 0; i
< unroll
; i
++)
13754 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
13756 emit_move_insn (destmem
, tmpreg
[i
]);
13761 for (i
= 0; i
< unroll
; i
++)
13765 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
13766 emit_move_insn (destmem
, value
);
13769 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
13770 true, OPTAB_LIB_WIDEN
);
13772 emit_move_insn (iter
, tmp
);
13774 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
13776 if (expected_size
!= -1)
13778 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
13779 if (expected_size
== 0)
13781 else if (expected_size
> REG_BR_PROB_BASE
)
13782 predict_jump (REG_BR_PROB_BASE
- 1);
13784 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
13787 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
13788 iter
= ix86_zero_extend_to_Pmode (iter
);
13789 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
13790 true, OPTAB_LIB_WIDEN
);
13791 if (tmp
!= destptr
)
13792 emit_move_insn (destptr
, tmp
);
13795 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
13796 true, OPTAB_LIB_WIDEN
);
13798 emit_move_insn (srcptr
, tmp
);
13800 emit_label (out_label
);
13803 /* Output "rep; mov" instruction.
13804 Arguments have same meaning as for previous function */
13806 expand_movmem_via_rep_mov (rtx destmem
, rtx srcmem
,
13807 rtx destptr
, rtx srcptr
,
13809 enum machine_mode mode
)
13815 /* If the size is known, it is shorter to use rep movs. */
13816 if (mode
== QImode
&& CONST_INT_P (count
)
13817 && !(INTVAL (count
) & 3))
13820 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
13821 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
13822 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
13823 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
13824 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
13825 if (mode
!= QImode
)
13827 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
13828 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
13829 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
13830 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
13831 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
13832 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
13836 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
13837 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
13839 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
13843 /* Output "rep; stos" instruction.
13844 Arguments have same meaning as for previous function */
13846 expand_setmem_via_rep_stos (rtx destmem
, rtx destptr
, rtx value
,
13848 enum machine_mode mode
)
13853 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
13854 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
13855 value
= force_reg (mode
, gen_lowpart (mode
, value
));
13856 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
13857 if (mode
!= QImode
)
13859 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
13860 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
13861 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
13864 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
13865 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
13869 emit_strmov (rtx destmem
, rtx srcmem
,
13870 rtx destptr
, rtx srcptr
, enum machine_mode mode
, int offset
)
13872 rtx src
= adjust_automodify_address_nv (srcmem
, mode
, srcptr
, offset
);
13873 rtx dest
= adjust_automodify_address_nv (destmem
, mode
, destptr
, offset
);
13874 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13877 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
13879 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
13880 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
13883 if (CONST_INT_P (count
))
13885 HOST_WIDE_INT countval
= INTVAL (count
);
13888 if ((countval
& 0x10) && max_size
> 16)
13892 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
13893 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
+ 8);
13896 gcc_unreachable ();
13899 if ((countval
& 0x08) && max_size
> 8)
13902 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
13905 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
13906 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
+ 4);
13910 if ((countval
& 0x04) && max_size
> 4)
13912 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
13915 if ((countval
& 0x02) && max_size
> 2)
13917 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, HImode
, offset
);
13920 if ((countval
& 0x01) && max_size
> 1)
13922 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, QImode
, offset
);
13929 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
13930 count
, 1, OPTAB_DIRECT
);
13931 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
13932 count
, QImode
, 1, 4);
13936 /* When there are stringops, we can cheaply increase dest and src pointers.
13937 Otherwise we save code size by maintaining offset (zero is readily
13938 available from preceding rep operation) and using x86 addressing modes.
13940 if (TARGET_SINGLE_STRINGOP
)
13944 rtx label
= ix86_expand_aligntest (count
, 4, true);
13945 src
= change_address (srcmem
, SImode
, srcptr
);
13946 dest
= change_address (destmem
, SImode
, destptr
);
13947 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13948 emit_label (label
);
13949 LABEL_NUSES (label
) = 1;
13953 rtx label
= ix86_expand_aligntest (count
, 2, true);
13954 src
= change_address (srcmem
, HImode
, srcptr
);
13955 dest
= change_address (destmem
, HImode
, destptr
);
13956 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13957 emit_label (label
);
13958 LABEL_NUSES (label
) = 1;
13962 rtx label
= ix86_expand_aligntest (count
, 1, true);
13963 src
= change_address (srcmem
, QImode
, srcptr
);
13964 dest
= change_address (destmem
, QImode
, destptr
);
13965 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13966 emit_label (label
);
13967 LABEL_NUSES (label
) = 1;
13972 rtx offset
= force_reg (Pmode
, const0_rtx
);
13977 rtx label
= ix86_expand_aligntest (count
, 4, true);
13978 src
= change_address (srcmem
, SImode
, srcptr
);
13979 dest
= change_address (destmem
, SImode
, destptr
);
13980 emit_move_insn (dest
, src
);
13981 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
13982 true, OPTAB_LIB_WIDEN
);
13984 emit_move_insn (offset
, tmp
);
13985 emit_label (label
);
13986 LABEL_NUSES (label
) = 1;
13990 rtx label
= ix86_expand_aligntest (count
, 2, true);
13991 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
13992 src
= change_address (srcmem
, HImode
, tmp
);
13993 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
13994 dest
= change_address (destmem
, HImode
, tmp
);
13995 emit_move_insn (dest
, src
);
13996 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
13997 true, OPTAB_LIB_WIDEN
);
13999 emit_move_insn (offset
, tmp
);
14000 emit_label (label
);
14001 LABEL_NUSES (label
) = 1;
14005 rtx label
= ix86_expand_aligntest (count
, 1, true);
14006 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
14007 src
= change_address (srcmem
, QImode
, tmp
);
14008 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
14009 dest
= change_address (destmem
, QImode
, tmp
);
14010 emit_move_insn (dest
, src
);
14011 emit_label (label
);
14012 LABEL_NUSES (label
) = 1;
14017 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14019 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
14020 rtx count
, int max_size
)
14023 expand_simple_binop (counter_mode (count
), AND
, count
,
14024 GEN_INT (max_size
- 1), count
, 1, OPTAB_DIRECT
);
14025 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
14026 gen_lowpart (QImode
, value
), count
, QImode
,
14030 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14032 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx value
, rtx count
, int max_size
)
14036 if (CONST_INT_P (count
))
14038 HOST_WIDE_INT countval
= INTVAL (count
);
14041 if ((countval
& 0x10) && max_size
> 16)
14045 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
14046 emit_insn (gen_strset (destptr
, dest
, value
));
14047 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
+ 8);
14048 emit_insn (gen_strset (destptr
, dest
, value
));
14051 gcc_unreachable ();
14054 if ((countval
& 0x08) && max_size
> 8)
14058 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
14059 emit_insn (gen_strset (destptr
, dest
, value
));
14063 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
14064 emit_insn (gen_strset (destptr
, dest
, value
));
14065 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
+ 4);
14066 emit_insn (gen_strset (destptr
, dest
, value
));
14070 if ((countval
& 0x04) && max_size
> 4)
14072 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
14073 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
14076 if ((countval
& 0x02) && max_size
> 2)
14078 dest
= adjust_automodify_address_nv (destmem
, HImode
, destptr
, offset
);
14079 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
14082 if ((countval
& 0x01) && max_size
> 1)
14084 dest
= adjust_automodify_address_nv (destmem
, QImode
, destptr
, offset
);
14085 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
14092 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
, max_size
);
14097 rtx label
= ix86_expand_aligntest (count
, 16, true);
14100 dest
= change_address (destmem
, DImode
, destptr
);
14101 emit_insn (gen_strset (destptr
, dest
, value
));
14102 emit_insn (gen_strset (destptr
, dest
, value
));
14106 dest
= change_address (destmem
, SImode
, destptr
);
14107 emit_insn (gen_strset (destptr
, dest
, value
));
14108 emit_insn (gen_strset (destptr
, dest
, value
));
14109 emit_insn (gen_strset (destptr
, dest
, value
));
14110 emit_insn (gen_strset (destptr
, dest
, value
));
14112 emit_label (label
);
14113 LABEL_NUSES (label
) = 1;
14117 rtx label
= ix86_expand_aligntest (count
, 8, true);
14120 dest
= change_address (destmem
, DImode
, destptr
);
14121 emit_insn (gen_strset (destptr
, dest
, value
));
14125 dest
= change_address (destmem
, SImode
, destptr
);
14126 emit_insn (gen_strset (destptr
, dest
, value
));
14127 emit_insn (gen_strset (destptr
, dest
, value
));
14129 emit_label (label
);
14130 LABEL_NUSES (label
) = 1;
14134 rtx label
= ix86_expand_aligntest (count
, 4, true);
14135 dest
= change_address (destmem
, SImode
, destptr
);
14136 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
14137 emit_label (label
);
14138 LABEL_NUSES (label
) = 1;
14142 rtx label
= ix86_expand_aligntest (count
, 2, true);
14143 dest
= change_address (destmem
, HImode
, destptr
);
14144 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
14145 emit_label (label
);
14146 LABEL_NUSES (label
) = 1;
14150 rtx label
= ix86_expand_aligntest (count
, 1, true);
14151 dest
= change_address (destmem
, QImode
, destptr
);
14152 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
14153 emit_label (label
);
14154 LABEL_NUSES (label
) = 1;
14158 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
14159 DESIRED_ALIGNMENT. */
14161 expand_movmem_prologue (rtx destmem
, rtx srcmem
,
14162 rtx destptr
, rtx srcptr
, rtx count
,
14163 int align
, int desired_alignment
)
14165 if (align
<= 1 && desired_alignment
> 1)
14167 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
14168 srcmem
= change_address (srcmem
, QImode
, srcptr
);
14169 destmem
= change_address (destmem
, QImode
, destptr
);
14170 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
14171 ix86_adjust_counter (count
, 1);
14172 emit_label (label
);
14173 LABEL_NUSES (label
) = 1;
14175 if (align
<= 2 && desired_alignment
> 2)
14177 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
14178 srcmem
= change_address (srcmem
, HImode
, srcptr
);
14179 destmem
= change_address (destmem
, HImode
, destptr
);
14180 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
14181 ix86_adjust_counter (count
, 2);
14182 emit_label (label
);
14183 LABEL_NUSES (label
) = 1;
14185 if (align
<= 4 && desired_alignment
> 4)
14187 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
14188 srcmem
= change_address (srcmem
, SImode
, srcptr
);
14189 destmem
= change_address (destmem
, SImode
, destptr
);
14190 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
14191 ix86_adjust_counter (count
, 4);
14192 emit_label (label
);
14193 LABEL_NUSES (label
) = 1;
14195 gcc_assert (desired_alignment
<= 8);
14198 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
14199 DESIRED_ALIGNMENT. */
14201 expand_setmem_prologue (rtx destmem
, rtx destptr
, rtx value
, rtx count
,
14202 int align
, int desired_alignment
)
14204 if (align
<= 1 && desired_alignment
> 1)
14206 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
14207 destmem
= change_address (destmem
, QImode
, destptr
);
14208 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (QImode
, value
)));
14209 ix86_adjust_counter (count
, 1);
14210 emit_label (label
);
14211 LABEL_NUSES (label
) = 1;
14213 if (align
<= 2 && desired_alignment
> 2)
14215 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
14216 destmem
= change_address (destmem
, HImode
, destptr
);
14217 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (HImode
, value
)));
14218 ix86_adjust_counter (count
, 2);
14219 emit_label (label
);
14220 LABEL_NUSES (label
) = 1;
14222 if (align
<= 4 && desired_alignment
> 4)
14224 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
14225 destmem
= change_address (destmem
, SImode
, destptr
);
14226 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (SImode
, value
)));
14227 ix86_adjust_counter (count
, 4);
14228 emit_label (label
);
14229 LABEL_NUSES (label
) = 1;
14231 gcc_assert (desired_alignment
<= 8);
14234 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
14235 static enum stringop_alg
14236 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
, bool memset
,
14237 int *dynamic_check
)
14239 const struct stringop_algs
* algs
;
14241 *dynamic_check
= -1;
14243 algs
= &ix86_cost
->memset
[TARGET_64BIT
!= 0];
14245 algs
= &ix86_cost
->memcpy
[TARGET_64BIT
!= 0];
14246 if (stringop_alg
!= no_stringop
)
14247 return stringop_alg
;
14248 /* rep; movq or rep; movl is the smallest variant. */
14249 else if (optimize_size
)
14251 if (!count
|| (count
& 3))
14252 return rep_prefix_1_byte
;
14254 return rep_prefix_4_byte
;
14256 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
14258 else if (expected_size
!= -1 && expected_size
< 4)
14259 return loop_1_byte
;
14260 else if (expected_size
!= -1)
14263 enum stringop_alg alg
= libcall
;
14264 for (i
= 0; i
< NAX_STRINGOP_ALGS
; i
++)
14266 gcc_assert (algs
->size
[i
].max
);
14267 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
14269 if (algs
->size
[i
].alg
!= libcall
)
14270 alg
= algs
->size
[i
].alg
;
14271 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
14272 last non-libcall inline algorithm. */
14273 if (TARGET_INLINE_ALL_STRINGOPS
)
14275 /* When the current size is best to be copied by a libcall,
14276 but we are still forced to inline, run the heuristic bellow
14277 that will pick code for medium sized blocks. */
14278 if (alg
!= libcall
)
14283 return algs
->size
[i
].alg
;
14286 gcc_assert (TARGET_INLINE_ALL_STRINGOPS
);
14288 /* When asked to inline the call anyway, try to pick meaningful choice.
14289 We look for maximal size of block that is faster to copy by hand and
14290 take blocks of at most of that size guessing that average size will
14291 be roughly half of the block.
14293 If this turns out to be bad, we might simply specify the preferred
14294 choice in ix86_costs. */
14295 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
14296 && algs
->unknown_size
== libcall
)
14299 enum stringop_alg alg
;
14302 for (i
= 0; i
< NAX_STRINGOP_ALGS
; i
++)
14303 if (algs
->size
[i
].alg
!= libcall
&& algs
->size
[i
].alg
)
14304 max
= algs
->size
[i
].max
;
14307 alg
= decide_alg (count
, max
/ 2, memset
, dynamic_check
);
14308 gcc_assert (*dynamic_check
== -1);
14309 gcc_assert (alg
!= libcall
);
14310 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
14311 *dynamic_check
= max
;
14314 return algs
->unknown_size
;
14317 /* Decide on alignment. We know that the operand is already aligned to ALIGN
14318 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
14320 decide_alignment (int align
,
14321 enum stringop_alg alg
,
14324 int desired_align
= 0;
14328 gcc_unreachable ();
14330 case unrolled_loop
:
14331 desired_align
= GET_MODE_SIZE (Pmode
);
14333 case rep_prefix_8_byte
:
14336 case rep_prefix_4_byte
:
14337 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14338 copying whole cacheline at once. */
14339 if (TARGET_PENTIUMPRO
)
14344 case rep_prefix_1_byte
:
14345 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14346 copying whole cacheline at once. */
14347 if (TARGET_PENTIUMPRO
)
14361 if (desired_align
< align
)
14362 desired_align
= align
;
14363 if (expected_size
!= -1 && expected_size
< 4)
14364 desired_align
= align
;
14365 return desired_align
;
14368 /* Return the smallest power of 2 greater than VAL. */
14370 smallest_pow2_greater_than (int val
)
14378 /* Expand string move (memcpy) operation. Use i386 string operations when
14379 profitable. expand_clrmem contains similar code. The code depends upon
14380 architecture, block size and alignment, but always has the same
14383 1) Prologue guard: Conditional that jumps up to epilogues for small
14384 blocks that can be handled by epilogue alone. This is faster but
14385 also needed for correctness, since prologue assume the block is larger
14386 than the desired alignment.
14388 Optional dynamic check for size and libcall for large
14389 blocks is emitted here too, with -minline-stringops-dynamically.
14391 2) Prologue: copy first few bytes in order to get destination aligned
14392 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
14393 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
14394 We emit either a jump tree on power of two sized blocks, or a byte loop.
14396 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
14397 with specified algorithm.
14399 4) Epilogue: code copying tail of the block that is too small to be
14400 handled by main body (or up to size guarded by prologue guard). */
14403 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
,
14404 rtx expected_align_exp
, rtx expected_size_exp
)
14410 rtx jump_around_label
= NULL
;
14411 HOST_WIDE_INT align
= 1;
14412 unsigned HOST_WIDE_INT count
= 0;
14413 HOST_WIDE_INT expected_size
= -1;
14414 int size_needed
= 0, epilogue_size_needed
;
14415 int desired_align
= 0;
14416 enum stringop_alg alg
;
14419 if (CONST_INT_P (align_exp
))
14420 align
= INTVAL (align_exp
);
14421 /* i386 can do misaligned access on reasonably increased cost. */
14422 if (CONST_INT_P (expected_align_exp
)
14423 && INTVAL (expected_align_exp
) > align
)
14424 align
= INTVAL (expected_align_exp
);
14425 if (CONST_INT_P (count_exp
))
14426 count
= expected_size
= INTVAL (count_exp
);
14427 if (CONST_INT_P (expected_size_exp
) && count
== 0)
14428 expected_size
= INTVAL (expected_size_exp
);
14430 /* Step 0: Decide on preferred algorithm, desired alignment and
14431 size of chunks to be copied by main loop. */
14433 alg
= decide_alg (count
, expected_size
, false, &dynamic_check
);
14434 desired_align
= decide_alignment (align
, alg
, expected_size
);
14436 if (!TARGET_ALIGN_STRINGOPS
)
14437 align
= desired_align
;
14439 if (alg
== libcall
)
14441 gcc_assert (alg
!= no_stringop
);
14443 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
14444 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
14445 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
14450 gcc_unreachable ();
14452 size_needed
= GET_MODE_SIZE (Pmode
);
14454 case unrolled_loop
:
14455 size_needed
= GET_MODE_SIZE (Pmode
) * (TARGET_64BIT
? 4 : 2);
14457 case rep_prefix_8_byte
:
14460 case rep_prefix_4_byte
:
14463 case rep_prefix_1_byte
:
14469 epilogue_size_needed
= size_needed
;
14471 /* Step 1: Prologue guard. */
14473 /* Alignment code needs count to be in register. */
14474 if (CONST_INT_P (count_exp
) && desired_align
> align
)
14476 enum machine_mode mode
= SImode
;
14477 if (TARGET_64BIT
&& (count
& ~0xffffffff))
14479 count_exp
= force_reg (mode
, count_exp
);
14481 gcc_assert (desired_align
>= 1 && align
>= 1);
14483 /* Ensure that alignment prologue won't copy past end of block. */
14484 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
14486 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
14487 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14488 Make sure it is power of 2. */
14489 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
14491 label
= gen_label_rtx ();
14492 emit_cmp_and_jump_insns (count_exp
,
14493 GEN_INT (epilogue_size_needed
),
14494 LTU
, 0, counter_mode (count_exp
), 1, label
);
14495 if (GET_CODE (count_exp
) == CONST_INT
)
14497 else if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
14498 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
14500 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
14502 /* Emit code to decide on runtime whether library call or inline should be
14504 if (dynamic_check
!= -1)
14506 rtx hot_label
= gen_label_rtx ();
14507 jump_around_label
= gen_label_rtx ();
14508 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
14509 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
14510 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
14511 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
14512 emit_jump (jump_around_label
);
14513 emit_label (hot_label
);
14516 /* Step 2: Alignment prologue. */
14518 if (desired_align
> align
)
14520 /* Except for the first move in epilogue, we no longer know
14521 constant offset in aliasing info. It don't seems to worth
14522 the pain to maintain it for the first move, so throw away
14524 src
= change_address (src
, BLKmode
, srcreg
);
14525 dst
= change_address (dst
, BLKmode
, destreg
);
14526 expand_movmem_prologue (dst
, src
, destreg
, srcreg
, count_exp
, align
,
14529 if (label
&& size_needed
== 1)
14531 emit_label (label
);
14532 LABEL_NUSES (label
) = 1;
14536 /* Step 3: Main loop. */
14542 gcc_unreachable ();
14544 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
14545 count_exp
, QImode
, 1, expected_size
);
14548 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
14549 count_exp
, Pmode
, 1, expected_size
);
14551 case unrolled_loop
:
14552 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
14553 registers for 4 temporaries anyway. */
14554 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
14555 count_exp
, Pmode
, TARGET_64BIT
? 4 : 2,
14558 case rep_prefix_8_byte
:
14559 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
14562 case rep_prefix_4_byte
:
14563 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
14566 case rep_prefix_1_byte
:
14567 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
14571 /* Adjust properly the offset of src and dest memory for aliasing. */
14572 if (CONST_INT_P (count_exp
))
14574 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
14575 (count
/ size_needed
) * size_needed
);
14576 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
14577 (count
/ size_needed
) * size_needed
);
14581 src
= change_address (src
, BLKmode
, srcreg
);
14582 dst
= change_address (dst
, BLKmode
, destreg
);
14585 /* Step 4: Epilogue to copy the remaining bytes. */
14589 /* When the main loop is done, COUNT_EXP might hold original count,
14590 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14591 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14592 bytes. Compensate if needed. */
14594 if (size_needed
< epilogue_size_needed
)
14597 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
14598 GEN_INT (size_needed
- 1), count_exp
, 1,
14600 if (tmp
!= count_exp
)
14601 emit_move_insn (count_exp
, tmp
);
14603 emit_label (label
);
14604 LABEL_NUSES (label
) = 1;
14607 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
14608 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
14609 epilogue_size_needed
);
14610 if (jump_around_label
)
14611 emit_label (jump_around_label
);
14615 /* Helper function for memcpy. For QImode value 0xXY produce
14616 0xXYXYXYXY of wide specified by MODE. This is essentially
14617 a * 0x10101010, but we can do slightly better than
14618 synth_mult by unwinding the sequence by hand on CPUs with
14621 promote_duplicated_reg (enum machine_mode mode
, rtx val
)
14623 enum machine_mode valmode
= GET_MODE (val
);
14625 int nops
= mode
== DImode
? 3 : 2;
14627 gcc_assert (mode
== SImode
|| mode
== DImode
);
14628 if (val
== const0_rtx
)
14629 return copy_to_mode_reg (mode
, const0_rtx
);
14630 if (CONST_INT_P (val
))
14632 HOST_WIDE_INT v
= INTVAL (val
) & 255;
14636 if (mode
== DImode
)
14637 v
|= (v
<< 16) << 16;
14638 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
14641 if (valmode
== VOIDmode
)
14643 if (valmode
!= QImode
)
14644 val
= gen_lowpart (QImode
, val
);
14645 if (mode
== QImode
)
14647 if (!TARGET_PARTIAL_REG_STALL
)
14649 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
14650 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
14651 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
14652 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
14654 rtx reg
= convert_modes (mode
, QImode
, val
, true);
14655 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
14656 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
14661 rtx reg
= convert_modes (mode
, QImode
, val
, true);
14663 if (!TARGET_PARTIAL_REG_STALL
)
14664 if (mode
== SImode
)
14665 emit_insn (gen_movsi_insv_1 (reg
, reg
));
14667 emit_insn (gen_movdi_insv_1_rex64 (reg
, reg
));
14670 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
14671 NULL
, 1, OPTAB_DIRECT
);
14673 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
14675 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
14676 NULL
, 1, OPTAB_DIRECT
);
14677 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
14678 if (mode
== SImode
)
14680 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
14681 NULL
, 1, OPTAB_DIRECT
);
14682 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
14687 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
14688 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
14689 alignment from ALIGN to DESIRED_ALIGN. */
14691 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
, int align
)
14696 && (size_needed
> 4 || (desired_align
> align
&& desired_align
> 4)))
14697 promoted_val
= promote_duplicated_reg (DImode
, val
);
14698 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
14699 promoted_val
= promote_duplicated_reg (SImode
, val
);
14700 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
14701 promoted_val
= promote_duplicated_reg (HImode
, val
);
14703 promoted_val
= val
;
14705 return promoted_val
;
14708 /* Expand string clear operation (bzero). Use i386 string operations when
14709 profitable. See expand_movmem comment for explanation of individual
14710 steps performed. */
14712 ix86_expand_setmem (rtx dst
, rtx count_exp
, rtx val_exp
, rtx align_exp
,
14713 rtx expected_align_exp
, rtx expected_size_exp
)
14718 rtx jump_around_label
= NULL
;
14719 HOST_WIDE_INT align
= 1;
14720 unsigned HOST_WIDE_INT count
= 0;
14721 HOST_WIDE_INT expected_size
= -1;
14722 int size_needed
= 0, epilogue_size_needed
;
14723 int desired_align
= 0;
14724 enum stringop_alg alg
;
14725 rtx promoted_val
= NULL
;
14726 bool force_loopy_epilogue
= false;
14729 if (CONST_INT_P (align_exp
))
14730 align
= INTVAL (align_exp
);
14731 /* i386 can do misaligned access on reasonably increased cost. */
14732 if (CONST_INT_P (expected_align_exp
)
14733 && INTVAL (expected_align_exp
) > align
)
14734 align
= INTVAL (expected_align_exp
);
14735 if (CONST_INT_P (count_exp
))
14736 count
= expected_size
= INTVAL (count_exp
);
14737 if (CONST_INT_P (expected_size_exp
) && count
== 0)
14738 expected_size
= INTVAL (expected_size_exp
);
14740 /* Step 0: Decide on preferred algorithm, desired alignment and
14741 size of chunks to be copied by main loop. */
14743 alg
= decide_alg (count
, expected_size
, true, &dynamic_check
);
14744 desired_align
= decide_alignment (align
, alg
, expected_size
);
14746 if (!TARGET_ALIGN_STRINGOPS
)
14747 align
= desired_align
;
14749 if (alg
== libcall
)
14751 gcc_assert (alg
!= no_stringop
);
14753 count_exp
= copy_to_mode_reg (counter_mode (count_exp
), count_exp
);
14754 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
14759 gcc_unreachable ();
14761 size_needed
= GET_MODE_SIZE (Pmode
);
14763 case unrolled_loop
:
14764 size_needed
= GET_MODE_SIZE (Pmode
) * 4;
14766 case rep_prefix_8_byte
:
14769 case rep_prefix_4_byte
:
14772 case rep_prefix_1_byte
:
14777 epilogue_size_needed
= size_needed
;
14779 /* Step 1: Prologue guard. */
14781 /* Alignment code needs count to be in register. */
14782 if (CONST_INT_P (count_exp
) && desired_align
> align
)
14784 enum machine_mode mode
= SImode
;
14785 if (TARGET_64BIT
&& (count
& ~0xffffffff))
14787 count_exp
= force_reg (mode
, count_exp
);
14789 /* Do the cheap promotion to allow better CSE across the
14790 main loop and epilogue (ie one load of the big constant in the
14791 front of all code. */
14792 if (CONST_INT_P (val_exp
))
14793 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
14794 desired_align
, align
);
14795 /* Ensure that alignment prologue won't copy past end of block. */
14796 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
14798 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
14799 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14800 Make sure it is power of 2. */
14801 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
14803 /* To improve performance of small blocks, we jump around the VAL
14804 promoting mode. This mean that if the promoted VAL is not constant,
14805 we might not use it in the epilogue and have to use byte
14807 if (epilogue_size_needed
> 2 && !promoted_val
)
14808 force_loopy_epilogue
= true;
14809 label
= gen_label_rtx ();
14810 emit_cmp_and_jump_insns (count_exp
,
14811 GEN_INT (epilogue_size_needed
),
14812 LTU
, 0, counter_mode (count_exp
), 1, label
);
14813 if (GET_CODE (count_exp
) == CONST_INT
)
14815 else if (expected_size
== -1 || expected_size
<= epilogue_size_needed
)
14816 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
14818 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
14820 if (dynamic_check
!= -1)
14822 rtx hot_label
= gen_label_rtx ();
14823 jump_around_label
= gen_label_rtx ();
14824 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
14825 LEU
, 0, counter_mode (count_exp
), 1, hot_label
);
14826 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
14827 set_storage_via_libcall (dst
, count_exp
, val_exp
, false);
14828 emit_jump (jump_around_label
);
14829 emit_label (hot_label
);
14832 /* Step 2: Alignment prologue. */
14834 /* Do the expensive promotion once we branched off the small blocks. */
14836 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
14837 desired_align
, align
);
14838 gcc_assert (desired_align
>= 1 && align
>= 1);
14840 if (desired_align
> align
)
14842 /* Except for the first move in epilogue, we no longer know
14843 constant offset in aliasing info. It don't seems to worth
14844 the pain to maintain it for the first move, so throw away
14846 dst
= change_address (dst
, BLKmode
, destreg
);
14847 expand_setmem_prologue (dst
, destreg
, promoted_val
, count_exp
, align
,
14850 if (label
&& size_needed
== 1)
14852 emit_label (label
);
14853 LABEL_NUSES (label
) = 1;
14857 /* Step 3: Main loop. */
14863 gcc_unreachable ();
14865 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
14866 count_exp
, QImode
, 1, expected_size
);
14869 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
14870 count_exp
, Pmode
, 1, expected_size
);
14872 case unrolled_loop
:
14873 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
14874 count_exp
, Pmode
, 4, expected_size
);
14876 case rep_prefix_8_byte
:
14877 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
14880 case rep_prefix_4_byte
:
14881 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
14884 case rep_prefix_1_byte
:
14885 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
14889 /* Adjust properly the offset of src and dest memory for aliasing. */
14890 if (CONST_INT_P (count_exp
))
14891 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
14892 (count
/ size_needed
) * size_needed
);
14894 dst
= change_address (dst
, BLKmode
, destreg
);
14896 /* Step 4: Epilogue to copy the remaining bytes. */
14900 /* When the main loop is done, COUNT_EXP might hold original count,
14901 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14902 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14903 bytes. Compensate if needed. */
14905 if (size_needed
< desired_align
- align
)
14908 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
14909 GEN_INT (size_needed
- 1), count_exp
, 1,
14911 size_needed
= desired_align
- align
+ 1;
14912 if (tmp
!= count_exp
)
14913 emit_move_insn (count_exp
, tmp
);
14915 emit_label (label
);
14916 LABEL_NUSES (label
) = 1;
14918 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
14920 if (force_loopy_epilogue
)
14921 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
14924 expand_setmem_epilogue (dst
, destreg
, promoted_val
, count_exp
,
14927 if (jump_around_label
)
14928 emit_label (jump_around_label
);
14932 /* Expand the appropriate insns for doing strlen if not just doing
14935 out = result, initialized with the start address
14936 align_rtx = alignment of the address.
14937 scratch = scratch register, initialized with the startaddress when
14938 not aligned, otherwise undefined
14940 This is just the body. It needs the initializations mentioned above and
14941 some address computing at the end. These things are done in i386.md. */
14944 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
14948 rtx align_2_label
= NULL_RTX
;
14949 rtx align_3_label
= NULL_RTX
;
14950 rtx align_4_label
= gen_label_rtx ();
14951 rtx end_0_label
= gen_label_rtx ();
14953 rtx tmpreg
= gen_reg_rtx (SImode
);
14954 rtx scratch
= gen_reg_rtx (SImode
);
14958 if (CONST_INT_P (align_rtx
))
14959 align
= INTVAL (align_rtx
);
14961 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
14963 /* Is there a known alignment and is it less than 4? */
14966 rtx scratch1
= gen_reg_rtx (Pmode
);
14967 emit_move_insn (scratch1
, out
);
14968 /* Is there a known alignment and is it not 2? */
14971 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
14972 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
14974 /* Leave just the 3 lower bits. */
14975 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
14976 NULL_RTX
, 0, OPTAB_WIDEN
);
14978 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
14979 Pmode
, 1, align_4_label
);
14980 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
14981 Pmode
, 1, align_2_label
);
14982 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
14983 Pmode
, 1, align_3_label
);
14987 /* Since the alignment is 2, we have to check 2 or 0 bytes;
14988 check if is aligned to 4 - byte. */
14990 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
14991 NULL_RTX
, 0, OPTAB_WIDEN
);
14993 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
14994 Pmode
, 1, align_4_label
);
14997 mem
= change_address (src
, QImode
, out
);
14999 /* Now compare the bytes. */
15001 /* Compare the first n unaligned byte on a byte per byte basis. */
15002 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
15003 QImode
, 1, end_0_label
);
15005 /* Increment the address. */
15007 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
15009 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
15011 /* Not needed with an alignment of 2 */
15014 emit_label (align_2_label
);
15016 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
15020 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
15022 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
15024 emit_label (align_3_label
);
15027 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
15031 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
15033 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
15036 /* Generate loop to check 4 bytes at a time. It is not a good idea to
15037 align this loop. It gives only huge programs, but does not help to
15039 emit_label (align_4_label
);
15041 mem
= change_address (src
, SImode
, out
);
15042 emit_move_insn (scratch
, mem
);
15044 emit_insn (gen_adddi3 (out
, out
, GEN_INT (4)));
15046 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
15048 /* This formula yields a nonzero result iff one of the bytes is zero.
15049 This saves three branches inside loop and many cycles. */
15051 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
15052 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
15053 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
15054 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
15055 gen_int_mode (0x80808080, SImode
)));
15056 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
15061 rtx reg
= gen_reg_rtx (SImode
);
15062 rtx reg2
= gen_reg_rtx (Pmode
);
15063 emit_move_insn (reg
, tmpreg
);
15064 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
15066 /* If zero is not in the first two bytes, move two bytes forward. */
15067 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
15068 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
15069 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
15070 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
15071 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
15074 /* Emit lea manually to avoid clobbering of flags. */
15075 emit_insn (gen_rtx_SET (SImode
, reg2
,
15076 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
15078 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
15079 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
15080 emit_insn (gen_rtx_SET (VOIDmode
, out
,
15081 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
15088 rtx end_2_label
= gen_label_rtx ();
15089 /* Is zero in the first two bytes? */
15091 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
15092 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
15093 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
15094 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
15095 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
15097 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
15098 JUMP_LABEL (tmp
) = end_2_label
;
15100 /* Not in the first two. Move two bytes forward. */
15101 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
15103 emit_insn (gen_adddi3 (out
, out
, const2_rtx
));
15105 emit_insn (gen_addsi3 (out
, out
, const2_rtx
));
15107 emit_label (end_2_label
);
15111 /* Avoid branch in fixing the byte. */
15112 tmpreg
= gen_lowpart (QImode
, tmpreg
);
15113 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
15114 cmp
= gen_rtx_LTU (Pmode
, gen_rtx_REG (CCmode
, 17), const0_rtx
);
15116 emit_insn (gen_subdi3_carry_rex64 (out
, out
, GEN_INT (3), cmp
));
15118 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3), cmp
));
15120 emit_label (end_0_label
);
15123 /* Expand strlen. */
15126 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
15128 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
15130 /* The generic case of strlen expander is long. Avoid it's
15131 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
15133 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
15134 && !TARGET_INLINE_ALL_STRINGOPS
15136 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
15139 addr
= force_reg (Pmode
, XEXP (src
, 0));
15140 scratch1
= gen_reg_rtx (Pmode
);
15142 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
15145 /* Well it seems that some optimizer does not combine a call like
15146 foo(strlen(bar), strlen(bar));
15147 when the move and the subtraction is done here. It does calculate
15148 the length just once when these instructions are done inside of
15149 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
15150 often used and I use one fewer register for the lifetime of
15151 output_strlen_unroll() this is better. */
15153 emit_move_insn (out
, addr
);
15155 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
15157 /* strlensi_unroll_1 returns the address of the zero at the end of
15158 the string, like memchr(), so compute the length by subtracting
15159 the start address. */
15161 emit_insn (gen_subdi3 (out
, out
, addr
));
15163 emit_insn (gen_subsi3 (out
, out
, addr
));
15168 scratch2
= gen_reg_rtx (Pmode
);
15169 scratch3
= gen_reg_rtx (Pmode
);
15170 scratch4
= force_reg (Pmode
, constm1_rtx
);
15172 emit_move_insn (scratch3
, addr
);
15173 eoschar
= force_reg (QImode
, eoschar
);
15175 src
= replace_equiv_address_nv (src
, scratch3
);
15177 /* If .md starts supporting :P, this can be done in .md. */
15178 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
15179 scratch4
), UNSPEC_SCAS
);
15180 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
15183 emit_insn (gen_one_cmpldi2 (scratch2
, scratch1
));
15184 emit_insn (gen_adddi3 (out
, scratch2
, constm1_rtx
));
15188 emit_insn (gen_one_cmplsi2 (scratch2
, scratch1
));
15189 emit_insn (gen_addsi3 (out
, scratch2
, constm1_rtx
));
15195 /* For given symbol (function) construct code to compute address of it's PLT
15196 entry in large x86-64 PIC model. */
15198 construct_plt_address (rtx symbol
)
15200 rtx tmp
= gen_reg_rtx (Pmode
);
15201 rtx unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, symbol
), UNSPEC_PLTOFF
);
15203 gcc_assert (GET_CODE (symbol
) == SYMBOL_REF
);
15204 gcc_assert (ix86_cmodel
== CM_LARGE_PIC
);
15206 emit_move_insn (tmp
, gen_rtx_CONST (Pmode
, unspec
));
15207 emit_insn (gen_adddi3 (tmp
, tmp
, pic_offset_table_rtx
));
15212 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
15213 rtx callarg2 ATTRIBUTE_UNUSED
,
15214 rtx pop
, int sibcall
)
15216 rtx use
= NULL
, call
;
15218 if (pop
== const0_rtx
)
15220 gcc_assert (!TARGET_64BIT
|| !pop
);
15222 if (TARGET_MACHO
&& !TARGET_64BIT
)
15225 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
15226 fnaddr
= machopic_indirect_call_target (fnaddr
);
15231 /* Static functions and indirect calls don't need the pic register. */
15232 if (flag_pic
&& (!TARGET_64BIT
|| ix86_cmodel
== CM_LARGE_PIC
)
15233 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
15234 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
15235 use_reg (&use
, pic_offset_table_rtx
);
15238 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
15240 rtx al
= gen_rtx_REG (QImode
, 0);
15241 emit_move_insn (al
, callarg2
);
15242 use_reg (&use
, al
);
15245 if (ix86_cmodel
== CM_LARGE_PIC
15246 && GET_CODE (fnaddr
) == MEM
15247 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
15248 && !local_symbolic_operand (XEXP (fnaddr
, 0), VOIDmode
))
15249 fnaddr
= gen_rtx_MEM (QImode
, construct_plt_address (XEXP (fnaddr
, 0)));
15250 else if (! call_insn_operand (XEXP (fnaddr
, 0), Pmode
))
15252 fnaddr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
15253 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
15255 if (sibcall
&& TARGET_64BIT
15256 && !constant_call_address_operand (XEXP (fnaddr
, 0), Pmode
))
15259 addr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
15260 fnaddr
= gen_rtx_REG (Pmode
, R11_REG
);
15261 emit_move_insn (fnaddr
, addr
);
15262 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
15265 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
15267 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
15270 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
15271 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
15272 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, call
, pop
));
15275 call
= emit_call_insn (call
);
15277 CALL_INSN_FUNCTION_USAGE (call
) = use
;
15281 /* Clear stack slot assignments remembered from previous functions.
15282 This is called from INIT_EXPANDERS once before RTL is emitted for each
15285 static struct machine_function
*
15286 ix86_init_machine_status (void)
15288 struct machine_function
*f
;
15290 f
= ggc_alloc_cleared (sizeof (struct machine_function
));
15291 f
->use_fast_prologue_epilogue_nregs
= -1;
15292 f
->tls_descriptor_call_expanded_p
= 0;
15297 /* Return a MEM corresponding to a stack slot with mode MODE.
15298 Allocate a new slot if necessary.
15300 The RTL for a function can have several slots available: N is
15301 which slot to use. */
15304 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
15306 struct stack_local_entry
*s
;
15308 gcc_assert (n
< MAX_386_STACK_LOCALS
);
15310 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
15311 if (s
->mode
== mode
&& s
->n
== n
)
15312 return copy_rtx (s
->rtl
);
15314 s
= (struct stack_local_entry
*)
15315 ggc_alloc (sizeof (struct stack_local_entry
));
15318 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
15320 s
->next
= ix86_stack_locals
;
15321 ix86_stack_locals
= s
;
15325 /* Construct the SYMBOL_REF for the tls_get_addr function. */
15327 static GTY(()) rtx ix86_tls_symbol
;
15329 ix86_tls_get_addr (void)
15332 if (!ix86_tls_symbol
)
15334 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
15335 (TARGET_ANY_GNU_TLS
15337 ? "___tls_get_addr"
15338 : "__tls_get_addr");
15341 return ix86_tls_symbol
;
15344 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
15346 static GTY(()) rtx ix86_tls_module_base_symbol
;
15348 ix86_tls_module_base (void)
15351 if (!ix86_tls_module_base_symbol
)
15353 ix86_tls_module_base_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
15354 "_TLS_MODULE_BASE_");
15355 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
15356 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
15359 return ix86_tls_module_base_symbol
;
15362 /* Calculate the length of the memory address in the instruction
15363 encoding. Does not include the one-byte modrm, opcode, or prefix. */
15366 memory_address_length (rtx addr
)
15368 struct ix86_address parts
;
15369 rtx base
, index
, disp
;
15373 if (GET_CODE (addr
) == PRE_DEC
15374 || GET_CODE (addr
) == POST_INC
15375 || GET_CODE (addr
) == PRE_MODIFY
15376 || GET_CODE (addr
) == POST_MODIFY
)
15379 ok
= ix86_decompose_address (addr
, &parts
);
15382 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
15383 parts
.base
= SUBREG_REG (parts
.base
);
15384 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
15385 parts
.index
= SUBREG_REG (parts
.index
);
15388 index
= parts
.index
;
15393 - esp as the base always wants an index,
15394 - ebp as the base always wants a displacement. */
15396 /* Register Indirect. */
15397 if (base
&& !index
&& !disp
)
15399 /* esp (for its index) and ebp (for its displacement) need
15400 the two-byte modrm form. */
15401 if (addr
== stack_pointer_rtx
15402 || addr
== arg_pointer_rtx
15403 || addr
== frame_pointer_rtx
15404 || addr
== hard_frame_pointer_rtx
)
15408 /* Direct Addressing. */
15409 else if (disp
&& !base
&& !index
)
15414 /* Find the length of the displacement constant. */
15417 if (base
&& satisfies_constraint_K (disp
))
15422 /* ebp always wants a displacement. */
15423 else if (base
== hard_frame_pointer_rtx
)
15426 /* An index requires the two-byte modrm form.... */
15428 /* ...like esp, which always wants an index. */
15429 || base
== stack_pointer_rtx
15430 || base
== arg_pointer_rtx
15431 || base
== frame_pointer_rtx
)
15438 /* Compute default value for "length_immediate" attribute. When SHORTFORM
15439 is set, expect that insn have 8bit immediate alternative. */
15441 ix86_attr_length_immediate_default (rtx insn
, int shortform
)
15445 extract_insn_cached (insn
);
15446 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
15447 if (CONSTANT_P (recog_data
.operand
[i
]))
15450 if (shortform
&& satisfies_constraint_K (recog_data
.operand
[i
]))
15454 switch (get_attr_mode (insn
))
15465 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
15470 fatal_insn ("unknown insn mode", insn
);
15476 /* Compute default value for "length_address" attribute. */
15478 ix86_attr_length_address_default (rtx insn
)
15482 if (get_attr_type (insn
) == TYPE_LEA
)
15484 rtx set
= PATTERN (insn
);
15486 if (GET_CODE (set
) == PARALLEL
)
15487 set
= XVECEXP (set
, 0, 0);
15489 gcc_assert (GET_CODE (set
) == SET
);
15491 return memory_address_length (SET_SRC (set
));
15494 extract_insn_cached (insn
);
15495 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
15496 if (MEM_P (recog_data
.operand
[i
]))
15498 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
15504 /* Return the maximum number of instructions a cpu can issue. */
15507 ix86_issue_rate (void)
15511 case PROCESSOR_PENTIUM
:
15515 case PROCESSOR_PENTIUMPRO
:
15516 case PROCESSOR_PENTIUM4
:
15517 case PROCESSOR_ATHLON
:
15519 case PROCESSOR_AMDFAM10
:
15520 case PROCESSOR_NOCONA
:
15521 case PROCESSOR_GENERIC32
:
15522 case PROCESSOR_GENERIC64
:
15525 case PROCESSOR_CORE2
:
15533 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
15534 by DEP_INSN and nothing set by DEP_INSN. */
15537 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
15541 /* Simplify the test for uninteresting insns. */
15542 if (insn_type
!= TYPE_SETCC
15543 && insn_type
!= TYPE_ICMOV
15544 && insn_type
!= TYPE_FCMOV
15545 && insn_type
!= TYPE_IBR
)
15548 if ((set
= single_set (dep_insn
)) != 0)
15550 set
= SET_DEST (set
);
15553 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
15554 && XVECLEN (PATTERN (dep_insn
), 0) == 2
15555 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
15556 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
15558 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
15559 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
15564 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
15567 /* This test is true if the dependent insn reads the flags but
15568 not any other potentially set register. */
15569 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
15572 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
15578 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
15579 address with operands set by DEP_INSN. */
15582 ix86_agi_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
15586 if (insn_type
== TYPE_LEA
15589 addr
= PATTERN (insn
);
15591 if (GET_CODE (addr
) == PARALLEL
)
15592 addr
= XVECEXP (addr
, 0, 0);
15594 gcc_assert (GET_CODE (addr
) == SET
);
15596 addr
= SET_SRC (addr
);
15601 extract_insn_cached (insn
);
15602 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
15603 if (MEM_P (recog_data
.operand
[i
]))
15605 addr
= XEXP (recog_data
.operand
[i
], 0);
15612 return modified_in_p (addr
, dep_insn
);
15616 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
15618 enum attr_type insn_type
, dep_insn_type
;
15619 enum attr_memory memory
;
15621 int dep_insn_code_number
;
15623 /* Anti and output dependencies have zero cost on all CPUs. */
15624 if (REG_NOTE_KIND (link
) != 0)
15627 dep_insn_code_number
= recog_memoized (dep_insn
);
15629 /* If we can't recognize the insns, we can't really do anything. */
15630 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
15633 insn_type
= get_attr_type (insn
);
15634 dep_insn_type
= get_attr_type (dep_insn
);
15638 case PROCESSOR_PENTIUM
:
15639 /* Address Generation Interlock adds a cycle of latency. */
15640 if (ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15643 /* ??? Compares pair with jump/setcc. */
15644 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
15647 /* Floating point stores require value to be ready one cycle earlier. */
15648 if (insn_type
== TYPE_FMOV
15649 && get_attr_memory (insn
) == MEMORY_STORE
15650 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15654 case PROCESSOR_PENTIUMPRO
:
15655 memory
= get_attr_memory (insn
);
15657 /* INT->FP conversion is expensive. */
15658 if (get_attr_fp_int_src (dep_insn
))
15661 /* There is one cycle extra latency between an FP op and a store. */
15662 if (insn_type
== TYPE_FMOV
15663 && (set
= single_set (dep_insn
)) != NULL_RTX
15664 && (set2
= single_set (insn
)) != NULL_RTX
15665 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
15666 && MEM_P (SET_DEST (set2
)))
15669 /* Show ability of reorder buffer to hide latency of load by executing
15670 in parallel with previous instruction in case
15671 previous instruction is not needed to compute the address. */
15672 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
15673 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15675 /* Claim moves to take one cycle, as core can issue one load
15676 at time and the next load can start cycle later. */
15677 if (dep_insn_type
== TYPE_IMOV
15678 || dep_insn_type
== TYPE_FMOV
)
15686 memory
= get_attr_memory (insn
);
15688 /* The esp dependency is resolved before the instruction is really
15690 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
15691 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
15694 /* INT->FP conversion is expensive. */
15695 if (get_attr_fp_int_src (dep_insn
))
15698 /* Show ability of reorder buffer to hide latency of load by executing
15699 in parallel with previous instruction in case
15700 previous instruction is not needed to compute the address. */
15701 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
15702 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15704 /* Claim moves to take one cycle, as core can issue one load
15705 at time and the next load can start cycle later. */
15706 if (dep_insn_type
== TYPE_IMOV
15707 || dep_insn_type
== TYPE_FMOV
)
15716 case PROCESSOR_ATHLON
:
15718 case PROCESSOR_AMDFAM10
:
15719 case PROCESSOR_GENERIC32
:
15720 case PROCESSOR_GENERIC64
:
15721 memory
= get_attr_memory (insn
);
15723 /* Show ability of reorder buffer to hide latency of load by executing
15724 in parallel with previous instruction in case
15725 previous instruction is not needed to compute the address. */
15726 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
15727 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15729 enum attr_unit unit
= get_attr_unit (insn
);
15732 /* Because of the difference between the length of integer and
15733 floating unit pipeline preparation stages, the memory operands
15734 for floating point are cheaper.
15736 ??? For Athlon it the difference is most probably 2. */
15737 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
15740 loadcost
= TARGET_ATHLON
? 2 : 0;
15742 if (cost
>= loadcost
)
15755 /* How many alternative schedules to try. This should be as wide as the
15756 scheduling freedom in the DFA, but no wider. Making this value too
15757 large results extra work for the scheduler. */
15760 ia32_multipass_dfa_lookahead (void)
15762 if (ix86_tune
== PROCESSOR_PENTIUM
)
15765 if (ix86_tune
== PROCESSOR_PENTIUMPRO
15766 || ix86_tune
== PROCESSOR_K6
)
15774 /* Compute the alignment given to a constant that is being placed in memory.
15775 EXP is the constant and ALIGN is the alignment that the object would
15777 The value of this function is used instead of that alignment to align
15781 ix86_constant_alignment (tree exp
, int align
)
15783 if (TREE_CODE (exp
) == REAL_CST
)
15785 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
15787 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
15790 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
15791 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
15792 return BITS_PER_WORD
;
15797 /* Compute the alignment for a static variable.
15798 TYPE is the data type, and ALIGN is the alignment that
15799 the object would ordinarily have. The value of this function is used
15800 instead of that alignment to align the object. */
15803 ix86_data_alignment (tree type
, int align
)
15805 int max_align
= optimize_size
? BITS_PER_WORD
: MIN (256, MAX_OFILE_ALIGNMENT
);
15807 if (AGGREGATE_TYPE_P (type
)
15808 && TYPE_SIZE (type
)
15809 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
15810 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
15811 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
15812 && align
< max_align
)
15815 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15816 to 16byte boundary. */
15819 if (AGGREGATE_TYPE_P (type
)
15820 && TYPE_SIZE (type
)
15821 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
15822 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
15823 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
15827 if (TREE_CODE (type
) == ARRAY_TYPE
)
15829 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
15831 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
15834 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
15837 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
15839 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
15842 else if ((TREE_CODE (type
) == RECORD_TYPE
15843 || TREE_CODE (type
) == UNION_TYPE
15844 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
15845 && TYPE_FIELDS (type
))
15847 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
15849 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
15852 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
15853 || TREE_CODE (type
) == INTEGER_TYPE
)
15855 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
15857 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
15864 /* Compute the alignment for a local variable.
15865 TYPE is the data type, and ALIGN is the alignment that
15866 the object would ordinarily have. The value of this macro is used
15867 instead of that alignment to align the object. */
15870 ix86_local_alignment (tree type
, int align
)
15872 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15873 to 16byte boundary. */
15876 if (AGGREGATE_TYPE_P (type
)
15877 && TYPE_SIZE (type
)
15878 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
15879 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
15880 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
15883 if (TREE_CODE (type
) == ARRAY_TYPE
)
15885 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
15887 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
15890 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
15892 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
15894 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
15897 else if ((TREE_CODE (type
) == RECORD_TYPE
15898 || TREE_CODE (type
) == UNION_TYPE
15899 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
15900 && TYPE_FIELDS (type
))
15902 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
15904 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
15907 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
15908 || TREE_CODE (type
) == INTEGER_TYPE
)
15911 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
15913 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
15919 /* Emit RTL insns to initialize the variable parts of a trampoline.
15920 FNADDR is an RTX for the address of the function's pure code.
15921 CXT is an RTX for the static chain value for the function. */
15923 x86_initialize_trampoline (rtx tramp
, rtx fnaddr
, rtx cxt
)
15927 /* Compute offset from the end of the jmp to the target function. */
15928 rtx disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
15929 plus_constant (tramp
, 10),
15930 NULL_RTX
, 1, OPTAB_DIRECT
);
15931 emit_move_insn (gen_rtx_MEM (QImode
, tramp
),
15932 gen_int_mode (0xb9, QImode
));
15933 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 1)), cxt
);
15934 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, 5)),
15935 gen_int_mode (0xe9, QImode
));
15936 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 6)), disp
);
15941 /* Try to load address using shorter movl instead of movabs.
15942 We may want to support movq for kernel mode, but kernel does not use
15943 trampolines at the moment. */
15944 if (x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
15946 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
15947 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15948 gen_int_mode (0xbb41, HImode
));
15949 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, offset
+ 2)),
15950 gen_lowpart (SImode
, fnaddr
));
15955 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15956 gen_int_mode (0xbb49, HImode
));
15957 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
15961 /* Load static chain using movabs to r10. */
15962 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15963 gen_int_mode (0xba49, HImode
));
15964 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
15967 /* Jump to the r11 */
15968 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15969 gen_int_mode (0xff49, HImode
));
15970 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, offset
+2)),
15971 gen_int_mode (0xe3, QImode
));
15973 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
15976 #ifdef ENABLE_EXECUTE_STACK
15977 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
15978 LCT_NORMAL
, VOIDmode
, 1, tramp
, Pmode
);
15982 /* Codes for all the SSE/MMX builtins. */
15985 IX86_BUILTIN_ADDPS
,
15986 IX86_BUILTIN_ADDSS
,
15987 IX86_BUILTIN_DIVPS
,
15988 IX86_BUILTIN_DIVSS
,
15989 IX86_BUILTIN_MULPS
,
15990 IX86_BUILTIN_MULSS
,
15991 IX86_BUILTIN_SUBPS
,
15992 IX86_BUILTIN_SUBSS
,
15994 IX86_BUILTIN_CMPEQPS
,
15995 IX86_BUILTIN_CMPLTPS
,
15996 IX86_BUILTIN_CMPLEPS
,
15997 IX86_BUILTIN_CMPGTPS
,
15998 IX86_BUILTIN_CMPGEPS
,
15999 IX86_BUILTIN_CMPNEQPS
,
16000 IX86_BUILTIN_CMPNLTPS
,
16001 IX86_BUILTIN_CMPNLEPS
,
16002 IX86_BUILTIN_CMPNGTPS
,
16003 IX86_BUILTIN_CMPNGEPS
,
16004 IX86_BUILTIN_CMPORDPS
,
16005 IX86_BUILTIN_CMPUNORDPS
,
16006 IX86_BUILTIN_CMPEQSS
,
16007 IX86_BUILTIN_CMPLTSS
,
16008 IX86_BUILTIN_CMPLESS
,
16009 IX86_BUILTIN_CMPNEQSS
,
16010 IX86_BUILTIN_CMPNLTSS
,
16011 IX86_BUILTIN_CMPNLESS
,
16012 IX86_BUILTIN_CMPNGTSS
,
16013 IX86_BUILTIN_CMPNGESS
,
16014 IX86_BUILTIN_CMPORDSS
,
16015 IX86_BUILTIN_CMPUNORDSS
,
16017 IX86_BUILTIN_COMIEQSS
,
16018 IX86_BUILTIN_COMILTSS
,
16019 IX86_BUILTIN_COMILESS
,
16020 IX86_BUILTIN_COMIGTSS
,
16021 IX86_BUILTIN_COMIGESS
,
16022 IX86_BUILTIN_COMINEQSS
,
16023 IX86_BUILTIN_UCOMIEQSS
,
16024 IX86_BUILTIN_UCOMILTSS
,
16025 IX86_BUILTIN_UCOMILESS
,
16026 IX86_BUILTIN_UCOMIGTSS
,
16027 IX86_BUILTIN_UCOMIGESS
,
16028 IX86_BUILTIN_UCOMINEQSS
,
16030 IX86_BUILTIN_CVTPI2PS
,
16031 IX86_BUILTIN_CVTPS2PI
,
16032 IX86_BUILTIN_CVTSI2SS
,
16033 IX86_BUILTIN_CVTSI642SS
,
16034 IX86_BUILTIN_CVTSS2SI
,
16035 IX86_BUILTIN_CVTSS2SI64
,
16036 IX86_BUILTIN_CVTTPS2PI
,
16037 IX86_BUILTIN_CVTTSS2SI
,
16038 IX86_BUILTIN_CVTTSS2SI64
,
16040 IX86_BUILTIN_MAXPS
,
16041 IX86_BUILTIN_MAXSS
,
16042 IX86_BUILTIN_MINPS
,
16043 IX86_BUILTIN_MINSS
,
16045 IX86_BUILTIN_LOADUPS
,
16046 IX86_BUILTIN_STOREUPS
,
16047 IX86_BUILTIN_MOVSS
,
16049 IX86_BUILTIN_MOVHLPS
,
16050 IX86_BUILTIN_MOVLHPS
,
16051 IX86_BUILTIN_LOADHPS
,
16052 IX86_BUILTIN_LOADLPS
,
16053 IX86_BUILTIN_STOREHPS
,
16054 IX86_BUILTIN_STORELPS
,
16056 IX86_BUILTIN_MASKMOVQ
,
16057 IX86_BUILTIN_MOVMSKPS
,
16058 IX86_BUILTIN_PMOVMSKB
,
16060 IX86_BUILTIN_MOVNTPS
,
16061 IX86_BUILTIN_MOVNTQ
,
16063 IX86_BUILTIN_LOADDQU
,
16064 IX86_BUILTIN_STOREDQU
,
16066 IX86_BUILTIN_PACKSSWB
,
16067 IX86_BUILTIN_PACKSSDW
,
16068 IX86_BUILTIN_PACKUSWB
,
16070 IX86_BUILTIN_PADDB
,
16071 IX86_BUILTIN_PADDW
,
16072 IX86_BUILTIN_PADDD
,
16073 IX86_BUILTIN_PADDQ
,
16074 IX86_BUILTIN_PADDSB
,
16075 IX86_BUILTIN_PADDSW
,
16076 IX86_BUILTIN_PADDUSB
,
16077 IX86_BUILTIN_PADDUSW
,
16078 IX86_BUILTIN_PSUBB
,
16079 IX86_BUILTIN_PSUBW
,
16080 IX86_BUILTIN_PSUBD
,
16081 IX86_BUILTIN_PSUBQ
,
16082 IX86_BUILTIN_PSUBSB
,
16083 IX86_BUILTIN_PSUBSW
,
16084 IX86_BUILTIN_PSUBUSB
,
16085 IX86_BUILTIN_PSUBUSW
,
16088 IX86_BUILTIN_PANDN
,
16092 IX86_BUILTIN_PAVGB
,
16093 IX86_BUILTIN_PAVGW
,
16095 IX86_BUILTIN_PCMPEQB
,
16096 IX86_BUILTIN_PCMPEQW
,
16097 IX86_BUILTIN_PCMPEQD
,
16098 IX86_BUILTIN_PCMPGTB
,
16099 IX86_BUILTIN_PCMPGTW
,
16100 IX86_BUILTIN_PCMPGTD
,
16102 IX86_BUILTIN_PMADDWD
,
16104 IX86_BUILTIN_PMAXSW
,
16105 IX86_BUILTIN_PMAXUB
,
16106 IX86_BUILTIN_PMINSW
,
16107 IX86_BUILTIN_PMINUB
,
16109 IX86_BUILTIN_PMULHUW
,
16110 IX86_BUILTIN_PMULHW
,
16111 IX86_BUILTIN_PMULLW
,
16113 IX86_BUILTIN_PSADBW
,
16114 IX86_BUILTIN_PSHUFW
,
16116 IX86_BUILTIN_PSLLW
,
16117 IX86_BUILTIN_PSLLD
,
16118 IX86_BUILTIN_PSLLQ
,
16119 IX86_BUILTIN_PSRAW
,
16120 IX86_BUILTIN_PSRAD
,
16121 IX86_BUILTIN_PSRLW
,
16122 IX86_BUILTIN_PSRLD
,
16123 IX86_BUILTIN_PSRLQ
,
16124 IX86_BUILTIN_PSLLWI
,
16125 IX86_BUILTIN_PSLLDI
,
16126 IX86_BUILTIN_PSLLQI
,
16127 IX86_BUILTIN_PSRAWI
,
16128 IX86_BUILTIN_PSRADI
,
16129 IX86_BUILTIN_PSRLWI
,
16130 IX86_BUILTIN_PSRLDI
,
16131 IX86_BUILTIN_PSRLQI
,
16133 IX86_BUILTIN_PUNPCKHBW
,
16134 IX86_BUILTIN_PUNPCKHWD
,
16135 IX86_BUILTIN_PUNPCKHDQ
,
16136 IX86_BUILTIN_PUNPCKLBW
,
16137 IX86_BUILTIN_PUNPCKLWD
,
16138 IX86_BUILTIN_PUNPCKLDQ
,
16140 IX86_BUILTIN_SHUFPS
,
16142 IX86_BUILTIN_RCPPS
,
16143 IX86_BUILTIN_RCPSS
,
16144 IX86_BUILTIN_RSQRTPS
,
16145 IX86_BUILTIN_RSQRTSS
,
16146 IX86_BUILTIN_SQRTPS
,
16147 IX86_BUILTIN_SQRTSS
,
16149 IX86_BUILTIN_UNPCKHPS
,
16150 IX86_BUILTIN_UNPCKLPS
,
16152 IX86_BUILTIN_ANDPS
,
16153 IX86_BUILTIN_ANDNPS
,
16155 IX86_BUILTIN_XORPS
,
16158 IX86_BUILTIN_LDMXCSR
,
16159 IX86_BUILTIN_STMXCSR
,
16160 IX86_BUILTIN_SFENCE
,
16162 /* 3DNow! Original */
16163 IX86_BUILTIN_FEMMS
,
16164 IX86_BUILTIN_PAVGUSB
,
16165 IX86_BUILTIN_PF2ID
,
16166 IX86_BUILTIN_PFACC
,
16167 IX86_BUILTIN_PFADD
,
16168 IX86_BUILTIN_PFCMPEQ
,
16169 IX86_BUILTIN_PFCMPGE
,
16170 IX86_BUILTIN_PFCMPGT
,
16171 IX86_BUILTIN_PFMAX
,
16172 IX86_BUILTIN_PFMIN
,
16173 IX86_BUILTIN_PFMUL
,
16174 IX86_BUILTIN_PFRCP
,
16175 IX86_BUILTIN_PFRCPIT1
,
16176 IX86_BUILTIN_PFRCPIT2
,
16177 IX86_BUILTIN_PFRSQIT1
,
16178 IX86_BUILTIN_PFRSQRT
,
16179 IX86_BUILTIN_PFSUB
,
16180 IX86_BUILTIN_PFSUBR
,
16181 IX86_BUILTIN_PI2FD
,
16182 IX86_BUILTIN_PMULHRW
,
16184 /* 3DNow! Athlon Extensions */
16185 IX86_BUILTIN_PF2IW
,
16186 IX86_BUILTIN_PFNACC
,
16187 IX86_BUILTIN_PFPNACC
,
16188 IX86_BUILTIN_PI2FW
,
16189 IX86_BUILTIN_PSWAPDSI
,
16190 IX86_BUILTIN_PSWAPDSF
,
16193 IX86_BUILTIN_ADDPD
,
16194 IX86_BUILTIN_ADDSD
,
16195 IX86_BUILTIN_DIVPD
,
16196 IX86_BUILTIN_DIVSD
,
16197 IX86_BUILTIN_MULPD
,
16198 IX86_BUILTIN_MULSD
,
16199 IX86_BUILTIN_SUBPD
,
16200 IX86_BUILTIN_SUBSD
,
16202 IX86_BUILTIN_CMPEQPD
,
16203 IX86_BUILTIN_CMPLTPD
,
16204 IX86_BUILTIN_CMPLEPD
,
16205 IX86_BUILTIN_CMPGTPD
,
16206 IX86_BUILTIN_CMPGEPD
,
16207 IX86_BUILTIN_CMPNEQPD
,
16208 IX86_BUILTIN_CMPNLTPD
,
16209 IX86_BUILTIN_CMPNLEPD
,
16210 IX86_BUILTIN_CMPNGTPD
,
16211 IX86_BUILTIN_CMPNGEPD
,
16212 IX86_BUILTIN_CMPORDPD
,
16213 IX86_BUILTIN_CMPUNORDPD
,
16214 IX86_BUILTIN_CMPEQSD
,
16215 IX86_BUILTIN_CMPLTSD
,
16216 IX86_BUILTIN_CMPLESD
,
16217 IX86_BUILTIN_CMPNEQSD
,
16218 IX86_BUILTIN_CMPNLTSD
,
16219 IX86_BUILTIN_CMPNLESD
,
16220 IX86_BUILTIN_CMPORDSD
,
16221 IX86_BUILTIN_CMPUNORDSD
,
16223 IX86_BUILTIN_COMIEQSD
,
16224 IX86_BUILTIN_COMILTSD
,
16225 IX86_BUILTIN_COMILESD
,
16226 IX86_BUILTIN_COMIGTSD
,
16227 IX86_BUILTIN_COMIGESD
,
16228 IX86_BUILTIN_COMINEQSD
,
16229 IX86_BUILTIN_UCOMIEQSD
,
16230 IX86_BUILTIN_UCOMILTSD
,
16231 IX86_BUILTIN_UCOMILESD
,
16232 IX86_BUILTIN_UCOMIGTSD
,
16233 IX86_BUILTIN_UCOMIGESD
,
16234 IX86_BUILTIN_UCOMINEQSD
,
16236 IX86_BUILTIN_MAXPD
,
16237 IX86_BUILTIN_MAXSD
,
16238 IX86_BUILTIN_MINPD
,
16239 IX86_BUILTIN_MINSD
,
16241 IX86_BUILTIN_ANDPD
,
16242 IX86_BUILTIN_ANDNPD
,
16244 IX86_BUILTIN_XORPD
,
16246 IX86_BUILTIN_SQRTPD
,
16247 IX86_BUILTIN_SQRTSD
,
16249 IX86_BUILTIN_UNPCKHPD
,
16250 IX86_BUILTIN_UNPCKLPD
,
16252 IX86_BUILTIN_SHUFPD
,
16254 IX86_BUILTIN_LOADUPD
,
16255 IX86_BUILTIN_STOREUPD
,
16256 IX86_BUILTIN_MOVSD
,
16258 IX86_BUILTIN_LOADHPD
,
16259 IX86_BUILTIN_LOADLPD
,
16261 IX86_BUILTIN_CVTDQ2PD
,
16262 IX86_BUILTIN_CVTDQ2PS
,
16264 IX86_BUILTIN_CVTPD2DQ
,
16265 IX86_BUILTIN_CVTPD2PI
,
16266 IX86_BUILTIN_CVTPD2PS
,
16267 IX86_BUILTIN_CVTTPD2DQ
,
16268 IX86_BUILTIN_CVTTPD2PI
,
16270 IX86_BUILTIN_CVTPI2PD
,
16271 IX86_BUILTIN_CVTSI2SD
,
16272 IX86_BUILTIN_CVTSI642SD
,
16274 IX86_BUILTIN_CVTSD2SI
,
16275 IX86_BUILTIN_CVTSD2SI64
,
16276 IX86_BUILTIN_CVTSD2SS
,
16277 IX86_BUILTIN_CVTSS2SD
,
16278 IX86_BUILTIN_CVTTSD2SI
,
16279 IX86_BUILTIN_CVTTSD2SI64
,
16281 IX86_BUILTIN_CVTPS2DQ
,
16282 IX86_BUILTIN_CVTPS2PD
,
16283 IX86_BUILTIN_CVTTPS2DQ
,
16285 IX86_BUILTIN_MOVNTI
,
16286 IX86_BUILTIN_MOVNTPD
,
16287 IX86_BUILTIN_MOVNTDQ
,
16290 IX86_BUILTIN_MASKMOVDQU
,
16291 IX86_BUILTIN_MOVMSKPD
,
16292 IX86_BUILTIN_PMOVMSKB128
,
16294 IX86_BUILTIN_PACKSSWB128
,
16295 IX86_BUILTIN_PACKSSDW128
,
16296 IX86_BUILTIN_PACKUSWB128
,
16298 IX86_BUILTIN_PADDB128
,
16299 IX86_BUILTIN_PADDW128
,
16300 IX86_BUILTIN_PADDD128
,
16301 IX86_BUILTIN_PADDQ128
,
16302 IX86_BUILTIN_PADDSB128
,
16303 IX86_BUILTIN_PADDSW128
,
16304 IX86_BUILTIN_PADDUSB128
,
16305 IX86_BUILTIN_PADDUSW128
,
16306 IX86_BUILTIN_PSUBB128
,
16307 IX86_BUILTIN_PSUBW128
,
16308 IX86_BUILTIN_PSUBD128
,
16309 IX86_BUILTIN_PSUBQ128
,
16310 IX86_BUILTIN_PSUBSB128
,
16311 IX86_BUILTIN_PSUBSW128
,
16312 IX86_BUILTIN_PSUBUSB128
,
16313 IX86_BUILTIN_PSUBUSW128
,
16315 IX86_BUILTIN_PAND128
,
16316 IX86_BUILTIN_PANDN128
,
16317 IX86_BUILTIN_POR128
,
16318 IX86_BUILTIN_PXOR128
,
16320 IX86_BUILTIN_PAVGB128
,
16321 IX86_BUILTIN_PAVGW128
,
16323 IX86_BUILTIN_PCMPEQB128
,
16324 IX86_BUILTIN_PCMPEQW128
,
16325 IX86_BUILTIN_PCMPEQD128
,
16326 IX86_BUILTIN_PCMPGTB128
,
16327 IX86_BUILTIN_PCMPGTW128
,
16328 IX86_BUILTIN_PCMPGTD128
,
16330 IX86_BUILTIN_PMADDWD128
,
16332 IX86_BUILTIN_PMAXSW128
,
16333 IX86_BUILTIN_PMAXUB128
,
16334 IX86_BUILTIN_PMINSW128
,
16335 IX86_BUILTIN_PMINUB128
,
16337 IX86_BUILTIN_PMULUDQ
,
16338 IX86_BUILTIN_PMULUDQ128
,
16339 IX86_BUILTIN_PMULHUW128
,
16340 IX86_BUILTIN_PMULHW128
,
16341 IX86_BUILTIN_PMULLW128
,
16343 IX86_BUILTIN_PSADBW128
,
16344 IX86_BUILTIN_PSHUFHW
,
16345 IX86_BUILTIN_PSHUFLW
,
16346 IX86_BUILTIN_PSHUFD
,
16348 IX86_BUILTIN_PSLLDQI128
,
16349 IX86_BUILTIN_PSLLWI128
,
16350 IX86_BUILTIN_PSLLDI128
,
16351 IX86_BUILTIN_PSLLQI128
,
16352 IX86_BUILTIN_PSRAWI128
,
16353 IX86_BUILTIN_PSRADI128
,
16354 IX86_BUILTIN_PSRLDQI128
,
16355 IX86_BUILTIN_PSRLWI128
,
16356 IX86_BUILTIN_PSRLDI128
,
16357 IX86_BUILTIN_PSRLQI128
,
16359 IX86_BUILTIN_PSLLDQ128
,
16360 IX86_BUILTIN_PSLLW128
,
16361 IX86_BUILTIN_PSLLD128
,
16362 IX86_BUILTIN_PSLLQ128
,
16363 IX86_BUILTIN_PSRAW128
,
16364 IX86_BUILTIN_PSRAD128
,
16365 IX86_BUILTIN_PSRLW128
,
16366 IX86_BUILTIN_PSRLD128
,
16367 IX86_BUILTIN_PSRLQ128
,
16369 IX86_BUILTIN_PUNPCKHBW128
,
16370 IX86_BUILTIN_PUNPCKHWD128
,
16371 IX86_BUILTIN_PUNPCKHDQ128
,
16372 IX86_BUILTIN_PUNPCKHQDQ128
,
16373 IX86_BUILTIN_PUNPCKLBW128
,
16374 IX86_BUILTIN_PUNPCKLWD128
,
16375 IX86_BUILTIN_PUNPCKLDQ128
,
16376 IX86_BUILTIN_PUNPCKLQDQ128
,
16378 IX86_BUILTIN_CLFLUSH
,
16379 IX86_BUILTIN_MFENCE
,
16380 IX86_BUILTIN_LFENCE
,
16382 /* Prescott New Instructions. */
16383 IX86_BUILTIN_ADDSUBPS
,
16384 IX86_BUILTIN_HADDPS
,
16385 IX86_BUILTIN_HSUBPS
,
16386 IX86_BUILTIN_MOVSHDUP
,
16387 IX86_BUILTIN_MOVSLDUP
,
16388 IX86_BUILTIN_ADDSUBPD
,
16389 IX86_BUILTIN_HADDPD
,
16390 IX86_BUILTIN_HSUBPD
,
16391 IX86_BUILTIN_LDDQU
,
16393 IX86_BUILTIN_MONITOR
,
16394 IX86_BUILTIN_MWAIT
,
16397 IX86_BUILTIN_PHADDW
,
16398 IX86_BUILTIN_PHADDD
,
16399 IX86_BUILTIN_PHADDSW
,
16400 IX86_BUILTIN_PHSUBW
,
16401 IX86_BUILTIN_PHSUBD
,
16402 IX86_BUILTIN_PHSUBSW
,
16403 IX86_BUILTIN_PMADDUBSW
,
16404 IX86_BUILTIN_PMULHRSW
,
16405 IX86_BUILTIN_PSHUFB
,
16406 IX86_BUILTIN_PSIGNB
,
16407 IX86_BUILTIN_PSIGNW
,
16408 IX86_BUILTIN_PSIGND
,
16409 IX86_BUILTIN_PALIGNR
,
16410 IX86_BUILTIN_PABSB
,
16411 IX86_BUILTIN_PABSW
,
16412 IX86_BUILTIN_PABSD
,
16414 IX86_BUILTIN_PHADDW128
,
16415 IX86_BUILTIN_PHADDD128
,
16416 IX86_BUILTIN_PHADDSW128
,
16417 IX86_BUILTIN_PHSUBW128
,
16418 IX86_BUILTIN_PHSUBD128
,
16419 IX86_BUILTIN_PHSUBSW128
,
16420 IX86_BUILTIN_PMADDUBSW128
,
16421 IX86_BUILTIN_PMULHRSW128
,
16422 IX86_BUILTIN_PSHUFB128
,
16423 IX86_BUILTIN_PSIGNB128
,
16424 IX86_BUILTIN_PSIGNW128
,
16425 IX86_BUILTIN_PSIGND128
,
16426 IX86_BUILTIN_PALIGNR128
,
16427 IX86_BUILTIN_PABSB128
,
16428 IX86_BUILTIN_PABSW128
,
16429 IX86_BUILTIN_PABSD128
,
16431 /* AMDFAM10 - SSE4A New Instructions. */
16432 IX86_BUILTIN_MOVNTSD
,
16433 IX86_BUILTIN_MOVNTSS
,
16434 IX86_BUILTIN_EXTRQI
,
16435 IX86_BUILTIN_EXTRQ
,
16436 IX86_BUILTIN_INSERTQI
,
16437 IX86_BUILTIN_INSERTQ
,
16440 IX86_BUILTIN_BLENDPD
,
16441 IX86_BUILTIN_BLENDPS
,
16442 IX86_BUILTIN_BLENDVPD
,
16443 IX86_BUILTIN_BLENDVPS
,
16444 IX86_BUILTIN_PBLENDVB128
,
16445 IX86_BUILTIN_PBLENDW128
,
16450 IX86_BUILTIN_INSERTPS128
,
16452 IX86_BUILTIN_MOVNTDQA
,
16453 IX86_BUILTIN_MPSADBW128
,
16454 IX86_BUILTIN_PACKUSDW128
,
16455 IX86_BUILTIN_PCMPEQQ
,
16456 IX86_BUILTIN_PHMINPOSUW128
,
16458 IX86_BUILTIN_PMAXSB128
,
16459 IX86_BUILTIN_PMAXSD128
,
16460 IX86_BUILTIN_PMAXUD128
,
16461 IX86_BUILTIN_PMAXUW128
,
16463 IX86_BUILTIN_PMINSB128
,
16464 IX86_BUILTIN_PMINSD128
,
16465 IX86_BUILTIN_PMINUD128
,
16466 IX86_BUILTIN_PMINUW128
,
16468 IX86_BUILTIN_PMOVSXBW128
,
16469 IX86_BUILTIN_PMOVSXBD128
,
16470 IX86_BUILTIN_PMOVSXBQ128
,
16471 IX86_BUILTIN_PMOVSXWD128
,
16472 IX86_BUILTIN_PMOVSXWQ128
,
16473 IX86_BUILTIN_PMOVSXDQ128
,
16475 IX86_BUILTIN_PMOVZXBW128
,
16476 IX86_BUILTIN_PMOVZXBD128
,
16477 IX86_BUILTIN_PMOVZXBQ128
,
16478 IX86_BUILTIN_PMOVZXWD128
,
16479 IX86_BUILTIN_PMOVZXWQ128
,
16480 IX86_BUILTIN_PMOVZXDQ128
,
16482 IX86_BUILTIN_PMULDQ128
,
16483 IX86_BUILTIN_PMULLD128
,
16485 IX86_BUILTIN_ROUNDPD
,
16486 IX86_BUILTIN_ROUNDPS
,
16487 IX86_BUILTIN_ROUNDSD
,
16488 IX86_BUILTIN_ROUNDSS
,
16490 IX86_BUILTIN_PTESTZ
,
16491 IX86_BUILTIN_PTESTC
,
16492 IX86_BUILTIN_PTESTNZC
,
16494 IX86_BUILTIN_VEC_INIT_V2SI
,
16495 IX86_BUILTIN_VEC_INIT_V4HI
,
16496 IX86_BUILTIN_VEC_INIT_V8QI
,
16497 IX86_BUILTIN_VEC_EXT_V2DF
,
16498 IX86_BUILTIN_VEC_EXT_V2DI
,
16499 IX86_BUILTIN_VEC_EXT_V4SF
,
16500 IX86_BUILTIN_VEC_EXT_V4SI
,
16501 IX86_BUILTIN_VEC_EXT_V8HI
,
16502 IX86_BUILTIN_VEC_EXT_V2SI
,
16503 IX86_BUILTIN_VEC_EXT_V4HI
,
16504 IX86_BUILTIN_VEC_EXT_V16QI
,
16505 IX86_BUILTIN_VEC_SET_V2DI
,
16506 IX86_BUILTIN_VEC_SET_V4SF
,
16507 IX86_BUILTIN_VEC_SET_V4SI
,
16508 IX86_BUILTIN_VEC_SET_V8HI
,
16509 IX86_BUILTIN_VEC_SET_V4HI
,
16510 IX86_BUILTIN_VEC_SET_V16QI
,
16515 /* Table for the ix86 builtin decls. */
16516 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
16518 /* Add a ix86 target builtin function with CODE, NAME and TYPE. Do so,
16519 * if the target_flags include one of MASK. Stores the function decl
16520 * in the ix86_builtins array.
16521 * Returns the function decl or NULL_TREE, if the builtin was not added. */
16524 def_builtin (int mask
, const char *name
, tree type
, enum ix86_builtins code
)
16526 tree decl
= NULL_TREE
;
16528 if (mask
& target_flags
16529 && (!(mask
& MASK_64BIT
) || TARGET_64BIT
))
16531 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
16533 ix86_builtins
[(int) code
] = decl
;
16539 /* Like def_builtin, but also marks the function decl "const". */
16542 def_builtin_const (int mask
, const char *name
, tree type
,
16543 enum ix86_builtins code
)
16545 tree decl
= def_builtin (mask
, name
, type
, code
);
16547 TREE_READONLY (decl
) = 1;
16551 /* Bits for builtin_description.flag. */
16553 /* Set when we don't support the comparison natively, and should
16554 swap_comparison in order to support it. */
16555 #define BUILTIN_DESC_SWAP_OPERANDS 1
16557 struct builtin_description
16559 const unsigned int mask
;
16560 const enum insn_code icode
;
16561 const char *const name
;
16562 const enum ix86_builtins code
;
16563 const enum rtx_code comparison
;
16564 const unsigned int flag
;
16567 static const struct builtin_description bdesc_comi
[] =
16569 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
16570 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
16571 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
16572 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
16573 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
16574 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
16575 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
16576 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
16577 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
16578 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
16579 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
16580 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
16581 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
16582 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
16583 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
16584 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
16585 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
16586 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
16587 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
16588 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
16589 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
16590 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
16591 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
16592 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
16595 static const struct builtin_description bdesc_ptest
[] =
16598 { MASK_SSE4_1
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ
, EQ
, 0 },
16599 { MASK_SSE4_1
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC
, LTU
, 0 },
16600 { MASK_SSE4_1
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC
, GTU
, 0 },
16603 /* SSE builtins with 3 arguments and the last argument must be a 8 bit
16604 constant or xmm0. */
16605 static const struct builtin_description bdesc_sse_3arg
[] =
16608 { MASK_SSE4_1
, CODE_FOR_sse4_1_blendpd
, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD
, 0, 0 },
16609 { MASK_SSE4_1
, CODE_FOR_sse4_1_blendps
, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS
, 0, 0 },
16610 { MASK_SSE4_1
, CODE_FOR_sse4_1_blendvpd
, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD
, 0, 0 },
16611 { MASK_SSE4_1
, CODE_FOR_sse4_1_blendvps
, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS
, 0, 0 },
16612 { MASK_SSE4_1
, CODE_FOR_sse4_1_dppd
, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD
, 0, 0 },
16613 { MASK_SSE4_1
, CODE_FOR_sse4_1_dpps
, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS
, 0, 0 },
16614 { MASK_SSE4_1
, CODE_FOR_sse4_1_insertps
, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128
, 0, 0 },
16615 { MASK_SSE4_1
, CODE_FOR_sse4_1_mpsadbw
, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128
, 0, 0 },
16616 { MASK_SSE4_1
, CODE_FOR_sse4_1_pblendvb
, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128
, 0, 0 },
16617 { MASK_SSE4_1
, CODE_FOR_sse4_1_pblendw
, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128
, 0, 0 },
16618 { MASK_SSE4_1
, CODE_FOR_sse4_1_roundsd
, 0, IX86_BUILTIN_ROUNDSD
, 0, 0 },
16619 { MASK_SSE4_1
, CODE_FOR_sse4_1_roundss
, 0, IX86_BUILTIN_ROUNDSS
, 0, 0 },
16622 static const struct builtin_description bdesc_2arg
[] =
16625 { MASK_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, 0, 0 },
16626 { MASK_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, 0, 0 },
16627 { MASK_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, 0, 0 },
16628 { MASK_SSE
, CODE_FOR_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, 0, 0 },
16629 { MASK_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, 0, 0 },
16630 { MASK_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, 0, 0 },
16631 { MASK_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, 0, 0 },
16632 { MASK_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, 0, 0 },
16634 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, 0 },
16635 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, 0 },
16636 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, 0 },
16637 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
,
16638 BUILTIN_DESC_SWAP_OPERANDS
},
16639 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
,
16640 BUILTIN_DESC_SWAP_OPERANDS
},
16641 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, 0 },
16642 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, 0 },
16643 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, 0 },
16644 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, 0 },
16645 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
,
16646 BUILTIN_DESC_SWAP_OPERANDS
},
16647 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
,
16648 BUILTIN_DESC_SWAP_OPERANDS
},
16649 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, 0 },
16650 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, 0 },
16651 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, 0 },
16652 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, 0 },
16653 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, 0 },
16654 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, 0 },
16655 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, 0 },
16656 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, 0 },
16657 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, UNGE
,
16658 BUILTIN_DESC_SWAP_OPERANDS
},
16659 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, UNGT
,
16660 BUILTIN_DESC_SWAP_OPERANDS
},
16661 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, ORDERED
, 0 },
16663 { MASK_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, 0, 0 },
16664 { MASK_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, 0, 0 },
16665 { MASK_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, 0, 0 },
16666 { MASK_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, 0, 0 },
16668 { MASK_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, 0, 0 },
16669 { MASK_SSE
, CODE_FOR_sse_nandv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, 0, 0 },
16670 { MASK_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, 0, 0 },
16671 { MASK_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, 0, 0 },
16673 { MASK_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, 0, 0 },
16674 { MASK_SSE
, CODE_FOR_sse_movhlps
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, 0, 0 },
16675 { MASK_SSE
, CODE_FOR_sse_movlhps
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, 0, 0 },
16676 { MASK_SSE
, CODE_FOR_sse_unpckhps
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, 0, 0 },
16677 { MASK_SSE
, CODE_FOR_sse_unpcklps
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, 0, 0 },
16680 { MASK_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, 0, 0 },
16681 { MASK_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, 0, 0 },
16682 { MASK_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, 0, 0 },
16683 { MASK_SSE2
, CODE_FOR_mmx_adddi3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, 0, 0 },
16684 { MASK_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, 0, 0 },
16685 { MASK_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, 0, 0 },
16686 { MASK_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, 0, 0 },
16687 { MASK_SSE2
, CODE_FOR_mmx_subdi3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, 0, 0 },
16689 { MASK_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, 0, 0 },
16690 { MASK_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, 0, 0 },
16691 { MASK_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, 0, 0 },
16692 { MASK_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, 0, 0 },
16693 { MASK_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, 0, 0 },
16694 { MASK_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, 0, 0 },
16695 { MASK_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, 0, 0 },
16696 { MASK_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, 0, 0 },
16698 { MASK_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, 0, 0 },
16699 { MASK_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, 0, 0 },
16700 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, 0, 0 },
16702 { MASK_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, 0, 0 },
16703 { MASK_MMX
, CODE_FOR_mmx_nandv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, 0, 0 },
16704 { MASK_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, 0, 0 },
16705 { MASK_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, 0, 0 },
16707 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, 0, 0 },
16708 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, 0, 0 },
16710 { MASK_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, 0, 0 },
16711 { MASK_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, 0, 0 },
16712 { MASK_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, 0, 0 },
16713 { MASK_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, 0, 0 },
16714 { MASK_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, 0, 0 },
16715 { MASK_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, 0, 0 },
16717 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, 0, 0 },
16718 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, 0, 0 },
16719 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, 0, 0 },
16720 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, 0, 0 },
16722 { MASK_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, 0, 0 },
16723 { MASK_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, 0, 0 },
16724 { MASK_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, 0, 0 },
16725 { MASK_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, 0, 0 },
16726 { MASK_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, 0, 0 },
16727 { MASK_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, 0, 0 },
16730 { MASK_MMX
, CODE_FOR_mmx_packsswb
, 0, IX86_BUILTIN_PACKSSWB
, 0, 0 },
16731 { MASK_MMX
, CODE_FOR_mmx_packssdw
, 0, IX86_BUILTIN_PACKSSDW
, 0, 0 },
16732 { MASK_MMX
, CODE_FOR_mmx_packuswb
, 0, IX86_BUILTIN_PACKUSWB
, 0, 0 },
16734 { MASK_SSE
, CODE_FOR_sse_cvtpi2ps
, 0, IX86_BUILTIN_CVTPI2PS
, 0, 0 },
16735 { MASK_SSE
, CODE_FOR_sse_cvtsi2ss
, 0, IX86_BUILTIN_CVTSI2SS
, 0, 0 },
16736 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvtsi2ssq
, 0, IX86_BUILTIN_CVTSI642SS
, 0, 0 },
16738 { MASK_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLW
, 0, 0 },
16739 { MASK_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLWI
, 0, 0 },
16740 { MASK_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLD
, 0, 0 },
16741 { MASK_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLDI
, 0, 0 },
16742 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQ
, 0, 0 },
16743 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQI
, 0, 0 },
16745 { MASK_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLW
, 0, 0 },
16746 { MASK_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLWI
, 0, 0 },
16747 { MASK_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLD
, 0, 0 },
16748 { MASK_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLDI
, 0, 0 },
16749 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQ
, 0, 0 },
16750 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQI
, 0, 0 },
16752 { MASK_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAW
, 0, 0 },
16753 { MASK_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAWI
, 0, 0 },
16754 { MASK_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRAD
, 0, 0 },
16755 { MASK_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRADI
, 0, 0 },
16757 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_psadbw
, 0, IX86_BUILTIN_PSADBW
, 0, 0 },
16758 { MASK_MMX
, CODE_FOR_mmx_pmaddwd
, 0, IX86_BUILTIN_PMADDWD
, 0, 0 },
16761 { MASK_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, 0, 0 },
16762 { MASK_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, 0, 0 },
16763 { MASK_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, 0, 0 },
16764 { MASK_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, 0, 0 },
16765 { MASK_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, 0, 0 },
16766 { MASK_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, 0, 0 },
16767 { MASK_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, 0, 0 },
16768 { MASK_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, 0, 0 },
16770 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, 0 },
16771 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, 0 },
16772 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, 0 },
16773 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
,
16774 BUILTIN_DESC_SWAP_OPERANDS
},
16775 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
,
16776 BUILTIN_DESC_SWAP_OPERANDS
},
16777 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, 0 },
16778 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, 0 },
16779 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, 0 },
16780 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, 0 },
16781 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
,
16782 BUILTIN_DESC_SWAP_OPERANDS
},
16783 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
,
16784 BUILTIN_DESC_SWAP_OPERANDS
},
16785 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, 0 },
16786 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, 0 },
16787 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, 0 },
16788 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, 0 },
16789 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, 0 },
16790 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, 0 },
16791 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, 0 },
16792 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, 0 },
16793 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, 0 },
16795 { MASK_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, 0, 0 },
16796 { MASK_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, 0, 0 },
16797 { MASK_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, 0, 0 },
16798 { MASK_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, 0, 0 },
16800 { MASK_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, 0, 0 },
16801 { MASK_SSE2
, CODE_FOR_sse2_nandv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, 0, 0 },
16802 { MASK_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, 0, 0 },
16803 { MASK_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, 0, 0 },
16805 { MASK_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, 0, 0 },
16806 { MASK_SSE2
, CODE_FOR_sse2_unpckhpd
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, 0, 0 },
16807 { MASK_SSE2
, CODE_FOR_sse2_unpcklpd
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, 0, 0 },
16810 { MASK_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, 0, 0 },
16811 { MASK_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, 0, 0 },
16812 { MASK_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, 0, 0 },
16813 { MASK_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, 0, 0 },
16814 { MASK_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, 0, 0 },
16815 { MASK_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, 0, 0 },
16816 { MASK_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, 0, 0 },
16817 { MASK_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, 0, 0 },
16819 { MASK_MMX
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, 0, 0 },
16820 { MASK_MMX
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, 0, 0 },
16821 { MASK_MMX
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, 0, 0 },
16822 { MASK_MMX
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, 0, 0 },
16823 { MASK_MMX
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, 0, 0 },
16824 { MASK_MMX
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, 0, 0 },
16825 { MASK_MMX
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, 0, 0 },
16826 { MASK_MMX
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, 0, 0 },
16828 { MASK_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, 0, 0 },
16829 { MASK_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, 0, 0 },
16831 { MASK_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, 0, 0 },
16832 { MASK_SSE2
, CODE_FOR_sse2_nandv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, 0, 0 },
16833 { MASK_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, 0, 0 },
16834 { MASK_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, 0, 0 },
16836 { MASK_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, 0, 0 },
16837 { MASK_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, 0, 0 },
16839 { MASK_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, 0, 0 },
16840 { MASK_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, 0, 0 },
16841 { MASK_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, 0, 0 },
16842 { MASK_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, 0, 0 },
16843 { MASK_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, 0, 0 },
16844 { MASK_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, 0, 0 },
16846 { MASK_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, 0, 0 },
16847 { MASK_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, 0, 0 },
16848 { MASK_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, 0, 0 },
16849 { MASK_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, 0, 0 },
16851 { MASK_SSE2
, CODE_FOR_sse2_punpckhbw
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, 0, 0 },
16852 { MASK_SSE2
, CODE_FOR_sse2_punpckhwd
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, 0, 0 },
16853 { MASK_SSE2
, CODE_FOR_sse2_punpckhdq
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, 0, 0 },
16854 { MASK_SSE2
, CODE_FOR_sse2_punpckhqdq
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, 0, 0 },
16855 { MASK_SSE2
, CODE_FOR_sse2_punpcklbw
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, 0, 0 },
16856 { MASK_SSE2
, CODE_FOR_sse2_punpcklwd
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, 0, 0 },
16857 { MASK_SSE2
, CODE_FOR_sse2_punpckldq
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, 0, 0 },
16858 { MASK_SSE2
, CODE_FOR_sse2_punpcklqdq
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, 0, 0 },
16860 { MASK_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, 0, 0 },
16861 { MASK_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, 0, 0 },
16862 { MASK_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, 0, 0 },
16864 { MASK_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, 0, 0 },
16865 { MASK_SSE2
, CODE_FOR_sse2_psadbw
, 0, IX86_BUILTIN_PSADBW128
, 0, 0 },
16867 { MASK_SSE2
, CODE_FOR_sse2_umulsidi3
, 0, IX86_BUILTIN_PMULUDQ
, 0, 0 },
16868 { MASK_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, 0, IX86_BUILTIN_PMULUDQ128
, 0, 0 },
16870 { MASK_SSE2
, CODE_FOR_ashlv8hi3
, 0, IX86_BUILTIN_PSLLWI128
, 0, 0 },
16871 { MASK_SSE2
, CODE_FOR_ashlv4si3
, 0, IX86_BUILTIN_PSLLDI128
, 0, 0 },
16872 { MASK_SSE2
, CODE_FOR_ashlv2di3
, 0, IX86_BUILTIN_PSLLQI128
, 0, 0 },
16874 { MASK_SSE2
, CODE_FOR_lshrv8hi3
, 0, IX86_BUILTIN_PSRLWI128
, 0, 0 },
16875 { MASK_SSE2
, CODE_FOR_lshrv4si3
, 0, IX86_BUILTIN_PSRLDI128
, 0, 0 },
16876 { MASK_SSE2
, CODE_FOR_lshrv2di3
, 0, IX86_BUILTIN_PSRLQI128
, 0, 0 },
16878 { MASK_SSE2
, CODE_FOR_ashrv8hi3
, 0, IX86_BUILTIN_PSRAWI128
, 0, 0 },
16879 { MASK_SSE2
, CODE_FOR_ashrv4si3
, 0, IX86_BUILTIN_PSRADI128
, 0, 0 },
16881 { MASK_SSE2
, CODE_FOR_sse2_pmaddwd
, 0, IX86_BUILTIN_PMADDWD128
, 0, 0 },
16883 { MASK_SSE2
, CODE_FOR_sse2_cvtsi2sd
, 0, IX86_BUILTIN_CVTSI2SD
, 0, 0 },
16884 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, 0, IX86_BUILTIN_CVTSI642SD
, 0, 0 },
16885 { MASK_SSE2
, CODE_FOR_sse2_cvtsd2ss
, 0, IX86_BUILTIN_CVTSD2SS
, 0, 0 },
16886 { MASK_SSE2
, CODE_FOR_sse2_cvtss2sd
, 0, IX86_BUILTIN_CVTSS2SD
, 0, 0 },
16889 { MASK_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, 0, 0 },
16890 { MASK_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, 0, 0 },
16891 { MASK_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, 0, 0 },
16892 { MASK_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, 0, 0 },
16893 { MASK_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, 0, 0 },
16894 { MASK_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, 0, 0 },
16897 { MASK_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, 0, 0 },
16898 { MASK_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, 0, 0 },
16899 { MASK_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, 0, 0 },
16900 { MASK_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, 0, 0 },
16901 { MASK_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, 0, 0 },
16902 { MASK_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, 0, 0 },
16903 { MASK_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, 0, 0 },
16904 { MASK_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, 0, 0 },
16905 { MASK_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, 0, 0 },
16906 { MASK_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, 0, 0 },
16907 { MASK_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, 0, 0 },
16908 { MASK_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, 0, 0 },
16909 { MASK_SSSE3
, CODE_FOR_ssse3_pmaddubswv8hi3
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, 0, 0 },
16910 { MASK_SSSE3
, CODE_FOR_ssse3_pmaddubswv4hi3
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, 0, 0 },
16911 { MASK_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, 0, 0 },
16912 { MASK_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, 0, 0 },
16913 { MASK_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, 0, 0 },
16914 { MASK_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, 0, 0 },
16915 { MASK_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, 0, 0 },
16916 { MASK_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, 0, 0 },
16917 { MASK_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, 0, 0 },
16918 { MASK_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, 0, 0 },
16919 { MASK_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, 0, 0 },
16920 { MASK_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, 0, 0 },
16923 { MASK_SSE4_1
, CODE_FOR_sse4_1_packusdw
, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128
, 0, 0 },
16924 { MASK_SSE4_1
, CODE_FOR_sse4_1_eqv2di3
, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ
, 0, 0 },
16925 { MASK_SSE4_1
, CODE_FOR_smaxv16qi3
, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128
, 0, 0 },
16926 { MASK_SSE4_1
, CODE_FOR_smaxv4si3
, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128
, 0, 0 },
16927 { MASK_SSE4_1
, CODE_FOR_umaxv4si3
, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128
, 0, 0 },
16928 { MASK_SSE4_1
, CODE_FOR_umaxv8hi3
, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128
, 0, 0 },
16929 { MASK_SSE4_1
, CODE_FOR_sminv16qi3
, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128
, 0, 0 },
16930 { MASK_SSE4_1
, CODE_FOR_sminv4si3
, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128
, 0, 0 },
16931 { MASK_SSE4_1
, CODE_FOR_uminv4si3
, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128
, 0, 0 },
16932 { MASK_SSE4_1
, CODE_FOR_uminv8hi3
, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128
, 0, 0 },
16933 { MASK_SSE4_1
, CODE_FOR_sse4_1_mulv2siv2di3
, 0, IX86_BUILTIN_PMULDQ128
, 0, 0 },
16934 { MASK_SSE4_1
, CODE_FOR_mulv4si3
, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128
, 0, 0 },
16937 static const struct builtin_description bdesc_1arg
[] =
16939 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB
, 0, 0 },
16940 { MASK_SSE
, CODE_FOR_sse_movmskps
, 0, IX86_BUILTIN_MOVMSKPS
, 0, 0 },
16942 { MASK_SSE
, CODE_FOR_sqrtv4sf2
, 0, IX86_BUILTIN_SQRTPS
, 0, 0 },
16943 { MASK_SSE
, CODE_FOR_sse_rsqrtv4sf2
, 0, IX86_BUILTIN_RSQRTPS
, 0, 0 },
16944 { MASK_SSE
, CODE_FOR_sse_rcpv4sf2
, 0, IX86_BUILTIN_RCPPS
, 0, 0 },
16946 { MASK_SSE
, CODE_FOR_sse_cvtps2pi
, 0, IX86_BUILTIN_CVTPS2PI
, 0, 0 },
16947 { MASK_SSE
, CODE_FOR_sse_cvtss2si
, 0, IX86_BUILTIN_CVTSS2SI
, 0, 0 },
16948 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvtss2siq
, 0, IX86_BUILTIN_CVTSS2SI64
, 0, 0 },
16949 { MASK_SSE
, CODE_FOR_sse_cvttps2pi
, 0, IX86_BUILTIN_CVTTPS2PI
, 0, 0 },
16950 { MASK_SSE
, CODE_FOR_sse_cvttss2si
, 0, IX86_BUILTIN_CVTTSS2SI
, 0, 0 },
16951 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvttss2siq
, 0, IX86_BUILTIN_CVTTSS2SI64
, 0, 0 },
16953 { MASK_SSE2
, CODE_FOR_sse2_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB128
, 0, 0 },
16954 { MASK_SSE2
, CODE_FOR_sse2_movmskpd
, 0, IX86_BUILTIN_MOVMSKPD
, 0, 0 },
16956 { MASK_SSE2
, CODE_FOR_sqrtv2df2
, 0, IX86_BUILTIN_SQRTPD
, 0, 0 },
16958 { MASK_SSE2
, CODE_FOR_sse2_cvtdq2pd
, 0, IX86_BUILTIN_CVTDQ2PD
, 0, 0 },
16959 { MASK_SSE2
, CODE_FOR_sse2_cvtdq2ps
, 0, IX86_BUILTIN_CVTDQ2PS
, 0, 0 },
16961 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2dq
, 0, IX86_BUILTIN_CVTPD2DQ
, 0, 0 },
16962 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2pi
, 0, IX86_BUILTIN_CVTPD2PI
, 0, 0 },
16963 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2ps
, 0, IX86_BUILTIN_CVTPD2PS
, 0, 0 },
16964 { MASK_SSE2
, CODE_FOR_sse2_cvttpd2dq
, 0, IX86_BUILTIN_CVTTPD2DQ
, 0, 0 },
16965 { MASK_SSE2
, CODE_FOR_sse2_cvttpd2pi
, 0, IX86_BUILTIN_CVTTPD2PI
, 0, 0 },
16967 { MASK_SSE2
, CODE_FOR_sse2_cvtpi2pd
, 0, IX86_BUILTIN_CVTPI2PD
, 0, 0 },
16969 { MASK_SSE2
, CODE_FOR_sse2_cvtsd2si
, 0, IX86_BUILTIN_CVTSD2SI
, 0, 0 },
16970 { MASK_SSE2
, CODE_FOR_sse2_cvttsd2si
, 0, IX86_BUILTIN_CVTTSD2SI
, 0, 0 },
16971 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvtsd2siq
, 0, IX86_BUILTIN_CVTSD2SI64
, 0, 0 },
16972 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvttsd2siq
, 0, IX86_BUILTIN_CVTTSD2SI64
, 0, 0 },
16974 { MASK_SSE2
, CODE_FOR_sse2_cvtps2dq
, 0, IX86_BUILTIN_CVTPS2DQ
, 0, 0 },
16975 { MASK_SSE2
, CODE_FOR_sse2_cvtps2pd
, 0, IX86_BUILTIN_CVTPS2PD
, 0, 0 },
16976 { MASK_SSE2
, CODE_FOR_sse2_cvttps2dq
, 0, IX86_BUILTIN_CVTTPS2DQ
, 0, 0 },
16979 { MASK_SSE3
, CODE_FOR_sse3_movshdup
, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP
, 0, 0 },
16980 { MASK_SSE3
, CODE_FOR_sse3_movsldup
, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP
, 0, 0 },
16983 { MASK_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, 0, 0 },
16984 { MASK_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, 0, 0 },
16985 { MASK_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, 0, 0 },
16986 { MASK_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, 0, 0 },
16987 { MASK_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, 0, 0 },
16988 { MASK_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, 0, 0 },
16991 { MASK_SSE4_1
, CODE_FOR_sse4_1_extendv8qiv8hi2
, 0, IX86_BUILTIN_PMOVSXBW128
, 0, 0 },
16992 { MASK_SSE4_1
, CODE_FOR_sse4_1_extendv4qiv4si2
, 0, IX86_BUILTIN_PMOVSXBD128
, 0, 0 },
16993 { MASK_SSE4_1
, CODE_FOR_sse4_1_extendv2qiv2di2
, 0, IX86_BUILTIN_PMOVSXBQ128
, 0, 0 },
16994 { MASK_SSE4_1
, CODE_FOR_sse4_1_extendv4hiv4si2
, 0, IX86_BUILTIN_PMOVSXWD128
, 0, 0 },
16995 { MASK_SSE4_1
, CODE_FOR_sse4_1_extendv2hiv2di2
, 0, IX86_BUILTIN_PMOVSXWQ128
, 0, 0 },
16996 { MASK_SSE4_1
, CODE_FOR_sse4_1_extendv2siv2di2
, 0, IX86_BUILTIN_PMOVSXDQ128
, 0, 0 },
16997 { MASK_SSE4_1
, CODE_FOR_sse4_1_zero_extendv8qiv8hi2
, 0, IX86_BUILTIN_PMOVZXBW128
, 0, 0 },
16998 { MASK_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4qiv4si2
, 0, IX86_BUILTIN_PMOVZXBD128
, 0, 0 },
16999 { MASK_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2qiv2di2
, 0, IX86_BUILTIN_PMOVZXBQ128
, 0, 0 },
17000 { MASK_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4hiv4si2
, 0, IX86_BUILTIN_PMOVZXWD128
, 0, 0 },
17001 { MASK_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2hiv2di2
, 0, IX86_BUILTIN_PMOVZXWQ128
, 0, 0 },
17002 { MASK_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2siv2di2
, 0, IX86_BUILTIN_PMOVZXDQ128
, 0, 0 },
17003 { MASK_SSE4_1
, CODE_FOR_sse4_1_phminposuw
, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128
, 0, 0 },
17005 /* Fake 1 arg builtins with a constant smaller than 8 bits as the
17007 { MASK_SSE4_1
, CODE_FOR_sse4_1_roundpd
, 0, IX86_BUILTIN_ROUNDPD
, 0, 0 },
17008 { MASK_SSE4_1
, CODE_FOR_sse4_1_roundps
, 0, IX86_BUILTIN_ROUNDPS
, 0, 0 },
17011 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
17012 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
17015 ix86_init_mmx_sse_builtins (void)
17017 const struct builtin_description
* d
;
17020 tree V16QI_type_node
= build_vector_type_for_mode (char_type_node
, V16QImode
);
17021 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
17022 tree V2SF_type_node
= build_vector_type_for_mode (float_type_node
, V2SFmode
);
17023 tree V2DI_type_node
17024 = build_vector_type_for_mode (long_long_integer_type_node
, V2DImode
);
17025 tree V2DF_type_node
= build_vector_type_for_mode (double_type_node
, V2DFmode
);
17026 tree V4SF_type_node
= build_vector_type_for_mode (float_type_node
, V4SFmode
);
17027 tree V4SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V4SImode
);
17028 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
17029 tree V8QI_type_node
= build_vector_type_for_mode (char_type_node
, V8QImode
);
17030 tree V8HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V8HImode
);
17032 tree pchar_type_node
= build_pointer_type (char_type_node
);
17033 tree pcchar_type_node
= build_pointer_type (
17034 build_type_variant (char_type_node
, 1, 0));
17035 tree pfloat_type_node
= build_pointer_type (float_type_node
);
17036 tree pcfloat_type_node
= build_pointer_type (
17037 build_type_variant (float_type_node
, 1, 0));
17038 tree pv2si_type_node
= build_pointer_type (V2SI_type_node
);
17039 tree pv2di_type_node
= build_pointer_type (V2DI_type_node
);
17040 tree pdi_type_node
= build_pointer_type (long_long_unsigned_type_node
);
17043 tree int_ftype_v4sf_v4sf
17044 = build_function_type_list (integer_type_node
,
17045 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
17046 tree v4si_ftype_v4sf_v4sf
17047 = build_function_type_list (V4SI_type_node
,
17048 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
17049 /* MMX/SSE/integer conversions. */
17050 tree int_ftype_v4sf
17051 = build_function_type_list (integer_type_node
,
17052 V4SF_type_node
, NULL_TREE
);
17053 tree int64_ftype_v4sf
17054 = build_function_type_list (long_long_integer_type_node
,
17055 V4SF_type_node
, NULL_TREE
);
17056 tree int_ftype_v8qi
17057 = build_function_type_list (integer_type_node
, V8QI_type_node
, NULL_TREE
);
17058 tree v4sf_ftype_v4sf_int
17059 = build_function_type_list (V4SF_type_node
,
17060 V4SF_type_node
, integer_type_node
, NULL_TREE
);
17061 tree v4sf_ftype_v4sf_int64
17062 = build_function_type_list (V4SF_type_node
,
17063 V4SF_type_node
, long_long_integer_type_node
,
17065 tree v4sf_ftype_v4sf_v2si
17066 = build_function_type_list (V4SF_type_node
,
17067 V4SF_type_node
, V2SI_type_node
, NULL_TREE
);
17069 /* Miscellaneous. */
17070 tree v8qi_ftype_v4hi_v4hi
17071 = build_function_type_list (V8QI_type_node
,
17072 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
17073 tree v4hi_ftype_v2si_v2si
17074 = build_function_type_list (V4HI_type_node
,
17075 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
17076 tree v4sf_ftype_v4sf_v4sf_int
17077 = build_function_type_list (V4SF_type_node
,
17078 V4SF_type_node
, V4SF_type_node
,
17079 integer_type_node
, NULL_TREE
);
17080 tree v2si_ftype_v4hi_v4hi
17081 = build_function_type_list (V2SI_type_node
,
17082 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
17083 tree v4hi_ftype_v4hi_int
17084 = build_function_type_list (V4HI_type_node
,
17085 V4HI_type_node
, integer_type_node
, NULL_TREE
);
17086 tree v4hi_ftype_v4hi_di
17087 = build_function_type_list (V4HI_type_node
,
17088 V4HI_type_node
, long_long_unsigned_type_node
,
17090 tree v2si_ftype_v2si_di
17091 = build_function_type_list (V2SI_type_node
,
17092 V2SI_type_node
, long_long_unsigned_type_node
,
17094 tree void_ftype_void
17095 = build_function_type (void_type_node
, void_list_node
);
17096 tree void_ftype_unsigned
17097 = build_function_type_list (void_type_node
, unsigned_type_node
, NULL_TREE
);
17098 tree void_ftype_unsigned_unsigned
17099 = build_function_type_list (void_type_node
, unsigned_type_node
,
17100 unsigned_type_node
, NULL_TREE
);
17101 tree void_ftype_pcvoid_unsigned_unsigned
17102 = build_function_type_list (void_type_node
, const_ptr_type_node
,
17103 unsigned_type_node
, unsigned_type_node
,
17105 tree unsigned_ftype_void
17106 = build_function_type (unsigned_type_node
, void_list_node
);
17107 tree v2si_ftype_v4sf
17108 = build_function_type_list (V2SI_type_node
, V4SF_type_node
, NULL_TREE
);
17109 /* Loads/stores. */
17110 tree void_ftype_v8qi_v8qi_pchar
17111 = build_function_type_list (void_type_node
,
17112 V8QI_type_node
, V8QI_type_node
,
17113 pchar_type_node
, NULL_TREE
);
17114 tree v4sf_ftype_pcfloat
17115 = build_function_type_list (V4SF_type_node
, pcfloat_type_node
, NULL_TREE
);
17116 /* @@@ the type is bogus */
17117 tree v4sf_ftype_v4sf_pv2si
17118 = build_function_type_list (V4SF_type_node
,
17119 V4SF_type_node
, pv2si_type_node
, NULL_TREE
);
17120 tree void_ftype_pv2si_v4sf
17121 = build_function_type_list (void_type_node
,
17122 pv2si_type_node
, V4SF_type_node
, NULL_TREE
);
17123 tree void_ftype_pfloat_v4sf
17124 = build_function_type_list (void_type_node
,
17125 pfloat_type_node
, V4SF_type_node
, NULL_TREE
);
17126 tree void_ftype_pdi_di
17127 = build_function_type_list (void_type_node
,
17128 pdi_type_node
, long_long_unsigned_type_node
,
17130 tree void_ftype_pv2di_v2di
17131 = build_function_type_list (void_type_node
,
17132 pv2di_type_node
, V2DI_type_node
, NULL_TREE
);
17133 /* Normal vector unops. */
17134 tree v4sf_ftype_v4sf
17135 = build_function_type_list (V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
17136 tree v16qi_ftype_v16qi
17137 = build_function_type_list (V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
17138 tree v8hi_ftype_v8hi
17139 = build_function_type_list (V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
17140 tree v4si_ftype_v4si
17141 = build_function_type_list (V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
17142 tree v8qi_ftype_v8qi
17143 = build_function_type_list (V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
17144 tree v4hi_ftype_v4hi
17145 = build_function_type_list (V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
17147 /* Normal vector binops. */
17148 tree v4sf_ftype_v4sf_v4sf
17149 = build_function_type_list (V4SF_type_node
,
17150 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
17151 tree v8qi_ftype_v8qi_v8qi
17152 = build_function_type_list (V8QI_type_node
,
17153 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
17154 tree v4hi_ftype_v4hi_v4hi
17155 = build_function_type_list (V4HI_type_node
,
17156 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
17157 tree v2si_ftype_v2si_v2si
17158 = build_function_type_list (V2SI_type_node
,
17159 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
17160 tree di_ftype_di_di
17161 = build_function_type_list (long_long_unsigned_type_node
,
17162 long_long_unsigned_type_node
,
17163 long_long_unsigned_type_node
, NULL_TREE
);
17165 tree di_ftype_di_di_int
17166 = build_function_type_list (long_long_unsigned_type_node
,
17167 long_long_unsigned_type_node
,
17168 long_long_unsigned_type_node
,
17169 integer_type_node
, NULL_TREE
);
17171 tree v2si_ftype_v2sf
17172 = build_function_type_list (V2SI_type_node
, V2SF_type_node
, NULL_TREE
);
17173 tree v2sf_ftype_v2si
17174 = build_function_type_list (V2SF_type_node
, V2SI_type_node
, NULL_TREE
);
17175 tree v2si_ftype_v2si
17176 = build_function_type_list (V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
17177 tree v2sf_ftype_v2sf
17178 = build_function_type_list (V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
17179 tree v2sf_ftype_v2sf_v2sf
17180 = build_function_type_list (V2SF_type_node
,
17181 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
17182 tree v2si_ftype_v2sf_v2sf
17183 = build_function_type_list (V2SI_type_node
,
17184 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
17185 tree pint_type_node
= build_pointer_type (integer_type_node
);
17186 tree pdouble_type_node
= build_pointer_type (double_type_node
);
17187 tree pcdouble_type_node
= build_pointer_type (
17188 build_type_variant (double_type_node
, 1, 0));
17189 tree int_ftype_v2df_v2df
17190 = build_function_type_list (integer_type_node
,
17191 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
17193 tree void_ftype_pcvoid
17194 = build_function_type_list (void_type_node
, const_ptr_type_node
, NULL_TREE
);
17195 tree v4sf_ftype_v4si
17196 = build_function_type_list (V4SF_type_node
, V4SI_type_node
, NULL_TREE
);
17197 tree v4si_ftype_v4sf
17198 = build_function_type_list (V4SI_type_node
, V4SF_type_node
, NULL_TREE
);
17199 tree v2df_ftype_v4si
17200 = build_function_type_list (V2DF_type_node
, V4SI_type_node
, NULL_TREE
);
17201 tree v4si_ftype_v2df
17202 = build_function_type_list (V4SI_type_node
, V2DF_type_node
, NULL_TREE
);
17203 tree v2si_ftype_v2df
17204 = build_function_type_list (V2SI_type_node
, V2DF_type_node
, NULL_TREE
);
17205 tree v4sf_ftype_v2df
17206 = build_function_type_list (V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
17207 tree v2df_ftype_v2si
17208 = build_function_type_list (V2DF_type_node
, V2SI_type_node
, NULL_TREE
);
17209 tree v2df_ftype_v4sf
17210 = build_function_type_list (V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
17211 tree int_ftype_v2df
17212 = build_function_type_list (integer_type_node
, V2DF_type_node
, NULL_TREE
);
17213 tree int64_ftype_v2df
17214 = build_function_type_list (long_long_integer_type_node
,
17215 V2DF_type_node
, NULL_TREE
);
17216 tree v2df_ftype_v2df_int
17217 = build_function_type_list (V2DF_type_node
,
17218 V2DF_type_node
, integer_type_node
, NULL_TREE
);
17219 tree v2df_ftype_v2df_int64
17220 = build_function_type_list (V2DF_type_node
,
17221 V2DF_type_node
, long_long_integer_type_node
,
17223 tree v4sf_ftype_v4sf_v2df
17224 = build_function_type_list (V4SF_type_node
,
17225 V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
17226 tree v2df_ftype_v2df_v4sf
17227 = build_function_type_list (V2DF_type_node
,
17228 V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
17229 tree v2df_ftype_v2df_v2df_int
17230 = build_function_type_list (V2DF_type_node
,
17231 V2DF_type_node
, V2DF_type_node
,
17234 tree v2df_ftype_v2df_pcdouble
17235 = build_function_type_list (V2DF_type_node
,
17236 V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
17237 tree void_ftype_pdouble_v2df
17238 = build_function_type_list (void_type_node
,
17239 pdouble_type_node
, V2DF_type_node
, NULL_TREE
);
17240 tree void_ftype_pint_int
17241 = build_function_type_list (void_type_node
,
17242 pint_type_node
, integer_type_node
, NULL_TREE
);
17243 tree void_ftype_v16qi_v16qi_pchar
17244 = build_function_type_list (void_type_node
,
17245 V16QI_type_node
, V16QI_type_node
,
17246 pchar_type_node
, NULL_TREE
);
17247 tree v2df_ftype_pcdouble
17248 = build_function_type_list (V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
17249 tree v2df_ftype_v2df_v2df
17250 = build_function_type_list (V2DF_type_node
,
17251 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
17252 tree v16qi_ftype_v16qi_v16qi
17253 = build_function_type_list (V16QI_type_node
,
17254 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
17255 tree v8hi_ftype_v8hi_v8hi
17256 = build_function_type_list (V8HI_type_node
,
17257 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
17258 tree v4si_ftype_v4si_v4si
17259 = build_function_type_list (V4SI_type_node
,
17260 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
17261 tree v2di_ftype_v2di_v2di
17262 = build_function_type_list (V2DI_type_node
,
17263 V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
17264 tree v2di_ftype_v2df_v2df
17265 = build_function_type_list (V2DI_type_node
,
17266 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
17267 tree v2df_ftype_v2df
17268 = build_function_type_list (V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
17269 tree v2di_ftype_v2di_int
17270 = build_function_type_list (V2DI_type_node
,
17271 V2DI_type_node
, integer_type_node
, NULL_TREE
);
17272 tree v2di_ftype_v2di_v2di_int
17273 = build_function_type_list (V2DI_type_node
, V2DI_type_node
,
17274 V2DI_type_node
, integer_type_node
, NULL_TREE
);
17275 tree v4si_ftype_v4si_int
17276 = build_function_type_list (V4SI_type_node
,
17277 V4SI_type_node
, integer_type_node
, NULL_TREE
);
17278 tree v8hi_ftype_v8hi_int
17279 = build_function_type_list (V8HI_type_node
,
17280 V8HI_type_node
, integer_type_node
, NULL_TREE
);
17281 tree v4si_ftype_v8hi_v8hi
17282 = build_function_type_list (V4SI_type_node
,
17283 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
17284 tree di_ftype_v8qi_v8qi
17285 = build_function_type_list (long_long_unsigned_type_node
,
17286 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
17287 tree di_ftype_v2si_v2si
17288 = build_function_type_list (long_long_unsigned_type_node
,
17289 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
17290 tree v2di_ftype_v16qi_v16qi
17291 = build_function_type_list (V2DI_type_node
,
17292 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
17293 tree v2di_ftype_v4si_v4si
17294 = build_function_type_list (V2DI_type_node
,
17295 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
17296 tree int_ftype_v16qi
17297 = build_function_type_list (integer_type_node
, V16QI_type_node
, NULL_TREE
);
17298 tree v16qi_ftype_pcchar
17299 = build_function_type_list (V16QI_type_node
, pcchar_type_node
, NULL_TREE
);
17300 tree void_ftype_pchar_v16qi
17301 = build_function_type_list (void_type_node
,
17302 pchar_type_node
, V16QI_type_node
, NULL_TREE
);
17304 tree v2di_ftype_v2di_unsigned_unsigned
17305 = build_function_type_list (V2DI_type_node
, V2DI_type_node
,
17306 unsigned_type_node
, unsigned_type_node
,
17308 tree v2di_ftype_v2di_v2di_unsigned_unsigned
17309 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, V2DI_type_node
,
17310 unsigned_type_node
, unsigned_type_node
,
17312 tree v2di_ftype_v2di_v16qi
17313 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, V16QI_type_node
,
17315 tree v2df_ftype_v2df_v2df_v2df
17316 = build_function_type_list (V2DF_type_node
,
17317 V2DF_type_node
, V2DF_type_node
,
17318 V2DF_type_node
, NULL_TREE
);
17319 tree v4sf_ftype_v4sf_v4sf_v4sf
17320 = build_function_type_list (V4SF_type_node
,
17321 V4SF_type_node
, V4SF_type_node
,
17322 V4SF_type_node
, NULL_TREE
);
17323 tree v8hi_ftype_v16qi
17324 = build_function_type_list (V8HI_type_node
, V16QI_type_node
,
17326 tree v4si_ftype_v16qi
17327 = build_function_type_list (V4SI_type_node
, V16QI_type_node
,
17329 tree v2di_ftype_v16qi
17330 = build_function_type_list (V2DI_type_node
, V16QI_type_node
,
17332 tree v4si_ftype_v8hi
17333 = build_function_type_list (V4SI_type_node
, V8HI_type_node
,
17335 tree v2di_ftype_v8hi
17336 = build_function_type_list (V2DI_type_node
, V8HI_type_node
,
17338 tree v2di_ftype_v4si
17339 = build_function_type_list (V2DI_type_node
, V4SI_type_node
,
17341 tree v2di_ftype_pv2di
17342 = build_function_type_list (V2DI_type_node
, pv2di_type_node
,
17344 tree v16qi_ftype_v16qi_v16qi_int
17345 = build_function_type_list (V16QI_type_node
, V16QI_type_node
,
17346 V16QI_type_node
, integer_type_node
,
17348 tree v16qi_ftype_v16qi_v16qi_v16qi
17349 = build_function_type_list (V16QI_type_node
, V16QI_type_node
,
17350 V16QI_type_node
, V16QI_type_node
,
17352 tree v8hi_ftype_v8hi_v8hi_int
17353 = build_function_type_list (V8HI_type_node
, V8HI_type_node
,
17354 V8HI_type_node
, integer_type_node
,
17356 tree v4si_ftype_v4si_v4si_int
17357 = build_function_type_list (V4SI_type_node
, V4SI_type_node
,
17358 V4SI_type_node
, integer_type_node
,
17360 tree int_ftype_v2di_v2di
17361 = build_function_type_list (integer_type_node
,
17362 V2DI_type_node
, V2DI_type_node
,
17366 tree float128_type
;
17369 /* The __float80 type. */
17370 if (TYPE_MODE (long_double_type_node
) == XFmode
)
17371 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
17375 /* The __float80 type. */
17376 float80_type
= make_node (REAL_TYPE
);
17377 TYPE_PRECISION (float80_type
) = 80;
17378 layout_type (float80_type
);
17379 (*lang_hooks
.types
.register_builtin_type
) (float80_type
, "__float80");
17384 float128_type
= make_node (REAL_TYPE
);
17385 TYPE_PRECISION (float128_type
) = 128;
17386 layout_type (float128_type
);
17387 (*lang_hooks
.types
.register_builtin_type
) (float128_type
, "__float128");
17390 /* Add all SSE builtins that are more or less simple operations on
17392 for (i
= 0, d
= bdesc_sse_3arg
;
17393 i
< ARRAY_SIZE (bdesc_sse_3arg
);
17396 /* Use one of the operands; the target can have a different mode for
17397 mask-generating compares. */
17398 enum machine_mode mode
;
17403 mode
= insn_data
[d
->icode
].operand
[1].mode
;
17408 type
= v16qi_ftype_v16qi_v16qi_int
;
17411 type
= v8hi_ftype_v8hi_v8hi_int
;
17414 type
= v4si_ftype_v4si_v4si_int
;
17417 type
= v2di_ftype_v2di_v2di_int
;
17420 type
= v2df_ftype_v2df_v2df_int
;
17423 type
= v4sf_ftype_v4sf_v4sf_int
;
17426 gcc_unreachable ();
17429 /* Override for variable blends. */
17432 case CODE_FOR_sse4_1_blendvpd
:
17433 type
= v2df_ftype_v2df_v2df_v2df
;
17435 case CODE_FOR_sse4_1_blendvps
:
17436 type
= v4sf_ftype_v4sf_v4sf_v4sf
;
17438 case CODE_FOR_sse4_1_pblendvb
:
17439 type
= v16qi_ftype_v16qi_v16qi_v16qi
;
17445 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
17448 /* Add all builtins that are more or less simple operations on two
17450 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
17452 /* Use one of the operands; the target can have a different mode for
17453 mask-generating compares. */
17454 enum machine_mode mode
;
17459 mode
= insn_data
[d
->icode
].operand
[1].mode
;
17464 type
= v16qi_ftype_v16qi_v16qi
;
17467 type
= v8hi_ftype_v8hi_v8hi
;
17470 type
= v4si_ftype_v4si_v4si
;
17473 type
= v2di_ftype_v2di_v2di
;
17476 type
= v2df_ftype_v2df_v2df
;
17479 type
= v4sf_ftype_v4sf_v4sf
;
17482 type
= v8qi_ftype_v8qi_v8qi
;
17485 type
= v4hi_ftype_v4hi_v4hi
;
17488 type
= v2si_ftype_v2si_v2si
;
17491 type
= di_ftype_di_di
;
17495 gcc_unreachable ();
17498 /* Override for comparisons. */
17499 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
17500 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
)
17501 type
= v4si_ftype_v4sf_v4sf
;
17503 if (d
->icode
== CODE_FOR_sse2_maskcmpv2df3
17504 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
17505 type
= v2di_ftype_v2df_v2df
;
17507 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
17510 /* Add all builtins that are more or less simple operations on 1 operand. */
17511 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
17513 enum machine_mode mode
;
17518 mode
= insn_data
[d
->icode
].operand
[1].mode
;
17523 type
= v16qi_ftype_v16qi
;
17526 type
= v8hi_ftype_v8hi
;
17529 type
= v4si_ftype_v4si
;
17532 type
= v2df_ftype_v2df
;
17535 type
= v4sf_ftype_v4sf
;
17538 type
= v8qi_ftype_v8qi
;
17541 type
= v4hi_ftype_v4hi
;
17544 type
= v2si_ftype_v2si
;
17551 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
17554 /* Add the remaining MMX insns with somewhat more complicated types. */
17555 def_builtin (MASK_MMX
, "__builtin_ia32_emms", void_ftype_void
, IX86_BUILTIN_EMMS
);
17556 def_builtin (MASK_MMX
, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSLLW
);
17557 def_builtin (MASK_MMX
, "__builtin_ia32_pslld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSLLD
);
17558 def_builtin (MASK_MMX
, "__builtin_ia32_psllq", di_ftype_di_di
, IX86_BUILTIN_PSLLQ
);
17560 def_builtin (MASK_MMX
, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRLW
);
17561 def_builtin (MASK_MMX
, "__builtin_ia32_psrld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRLD
);
17562 def_builtin (MASK_MMX
, "__builtin_ia32_psrlq", di_ftype_di_di
, IX86_BUILTIN_PSRLQ
);
17564 def_builtin (MASK_MMX
, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRAW
);
17565 def_builtin (MASK_MMX
, "__builtin_ia32_psrad", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRAD
);
17567 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int
, IX86_BUILTIN_PSHUFW
);
17568 def_builtin (MASK_MMX
, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi
, IX86_BUILTIN_PMADDWD
);
17570 /* comi/ucomi insns. */
17571 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
17572 if (d
->mask
== MASK_SSE2
)
17573 def_builtin (d
->mask
, d
->name
, int_ftype_v2df_v2df
, d
->code
);
17575 def_builtin (d
->mask
, d
->name
, int_ftype_v4sf_v4sf
, d
->code
);
17578 for (i
= 0, d
= bdesc_ptest
; i
< ARRAY_SIZE (bdesc_ptest
); i
++, d
++)
17579 def_builtin (d
->mask
, d
->name
, int_ftype_v2di_v2di
, d
->code
);
17581 def_builtin (MASK_MMX
, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKSSWB
);
17582 def_builtin (MASK_MMX
, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si
, IX86_BUILTIN_PACKSSDW
);
17583 def_builtin (MASK_MMX
, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKUSWB
);
17585 def_builtin (MASK_SSE
, "__builtin_ia32_ldmxcsr", void_ftype_unsigned
, IX86_BUILTIN_LDMXCSR
);
17586 def_builtin (MASK_SSE
, "__builtin_ia32_stmxcsr", unsigned_ftype_void
, IX86_BUILTIN_STMXCSR
);
17587 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si
, IX86_BUILTIN_CVTPI2PS
);
17588 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTPS2PI
);
17589 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int
, IX86_BUILTIN_CVTSI2SS
);
17590 def_builtin_const (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64
, IX86_BUILTIN_CVTSI642SS
);
17591 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI
);
17592 def_builtin_const (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI64
);
17593 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2PI
);
17594 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvttss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI
);
17595 def_builtin_const (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI64
);
17597 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar
, IX86_BUILTIN_MASKMOVQ
);
17599 def_builtin (MASK_SSE
, "__builtin_ia32_loadups", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADUPS
);
17600 def_builtin (MASK_SSE
, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREUPS
);
17602 def_builtin (MASK_SSE
, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADHPS
);
17603 def_builtin (MASK_SSE
, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADLPS
);
17604 def_builtin (MASK_SSE
, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STOREHPS
);
17605 def_builtin (MASK_SSE
, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STORELPS
);
17607 def_builtin (MASK_SSE
, "__builtin_ia32_movmskps", int_ftype_v4sf
, IX86_BUILTIN_MOVMSKPS
);
17608 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pmovmskb", int_ftype_v8qi
, IX86_BUILTIN_PMOVMSKB
);
17609 def_builtin (MASK_SSE
, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTPS
);
17610 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_movntq", void_ftype_pdi_di
, IX86_BUILTIN_MOVNTQ
);
17612 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_sfence", void_ftype_void
, IX86_BUILTIN_SFENCE
);
17614 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi
, IX86_BUILTIN_PSADBW
);
17616 def_builtin (MASK_SSE
, "__builtin_ia32_rcpps", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPPS
);
17617 def_builtin (MASK_SSE
, "__builtin_ia32_rcpss", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPSS
);
17618 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTPS
);
17619 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTSS
);
17620 def_builtin_const (MASK_SSE
, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTPS
);
17621 def_builtin_const (MASK_SSE
, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTSS
);
17623 def_builtin (MASK_SSE
, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_SHUFPS
);
17625 /* Original 3DNow! */
17626 def_builtin (MASK_3DNOW
, "__builtin_ia32_femms", void_ftype_void
, IX86_BUILTIN_FEMMS
);
17627 def_builtin (MASK_3DNOW
, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi
, IX86_BUILTIN_PAVGUSB
);
17628 def_builtin (MASK_3DNOW
, "__builtin_ia32_pf2id", v2si_ftype_v2sf
, IX86_BUILTIN_PF2ID
);
17629 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFACC
);
17630 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFADD
);
17631 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPEQ
);
17632 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGE
);
17633 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGT
);
17634 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMAX
);
17635 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMIN
);
17636 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMUL
);
17637 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRCP
);
17638 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT1
);
17639 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT2
);
17640 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRSQRT
);
17641 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRSQIT1
);
17642 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUB
);
17643 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUBR
);
17644 def_builtin (MASK_3DNOW
, "__builtin_ia32_pi2fd", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FD
);
17645 def_builtin (MASK_3DNOW
, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi
, IX86_BUILTIN_PMULHRW
);
17647 /* 3DNow! extension as used in the Athlon CPU. */
17648 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pf2iw", v2si_ftype_v2sf
, IX86_BUILTIN_PF2IW
);
17649 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFNACC
);
17650 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFPNACC
);
17651 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pi2fw", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FW
);
17652 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf
, IX86_BUILTIN_PSWAPDSF
);
17653 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsi", v2si_ftype_v2si
, IX86_BUILTIN_PSWAPDSI
);
17656 def_builtin (MASK_SSE2
, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar
, IX86_BUILTIN_MASKMOVDQU
);
17658 def_builtin (MASK_SSE2
, "__builtin_ia32_loadupd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADUPD
);
17659 def_builtin (MASK_SSE2
, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREUPD
);
17661 def_builtin (MASK_SSE2
, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADHPD
);
17662 def_builtin (MASK_SSE2
, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADLPD
);
17664 def_builtin (MASK_SSE2
, "__builtin_ia32_movmskpd", int_ftype_v2df
, IX86_BUILTIN_MOVMSKPD
);
17665 def_builtin (MASK_SSE2
, "__builtin_ia32_pmovmskb128", int_ftype_v16qi
, IX86_BUILTIN_PMOVMSKB128
);
17666 def_builtin (MASK_SSE2
, "__builtin_ia32_movnti", void_ftype_pint_int
, IX86_BUILTIN_MOVNTI
);
17667 def_builtin (MASK_SSE2
, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTPD
);
17668 def_builtin (MASK_SSE2
, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di
, IX86_BUILTIN_MOVNTDQ
);
17670 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufd", v4si_ftype_v4si_int
, IX86_BUILTIN_PSHUFD
);
17671 def_builtin (MASK_SSE2
, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFLW
);
17672 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFHW
);
17673 def_builtin (MASK_SSE2
, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi
, IX86_BUILTIN_PSADBW128
);
17675 def_builtin_const (MASK_SSE2
, "__builtin_ia32_sqrtpd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTPD
);
17676 def_builtin_const (MASK_SSE2
, "__builtin_ia32_sqrtsd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTSD
);
17678 def_builtin (MASK_SSE2
, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int
, IX86_BUILTIN_SHUFPD
);
17680 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si
, IX86_BUILTIN_CVTDQ2PD
);
17681 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si
, IX86_BUILTIN_CVTDQ2PS
);
17683 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTPD2DQ
);
17684 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTPD2PI
);
17685 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df
, IX86_BUILTIN_CVTPD2PS
);
17686 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTTPD2DQ
);
17687 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTTPD2PI
);
17689 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si
, IX86_BUILTIN_CVTPI2PD
);
17691 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTSD2SI
);
17692 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI
);
17693 def_builtin_const (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTSD2SI64
);
17694 def_builtin_const (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI64
);
17696 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTPS2DQ
);
17697 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf
, IX86_BUILTIN_CVTPS2PD
);
17698 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2DQ
);
17700 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int
, IX86_BUILTIN_CVTSI2SD
);
17701 def_builtin_const (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64
, IX86_BUILTIN_CVTSI642SD
);
17702 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df
, IX86_BUILTIN_CVTSD2SS
);
17703 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf
, IX86_BUILTIN_CVTSS2SD
);
17705 def_builtin (MASK_SSE2
, "__builtin_ia32_clflush", void_ftype_pcvoid
, IX86_BUILTIN_CLFLUSH
);
17706 def_builtin (MASK_SSE2
, "__builtin_ia32_lfence", void_ftype_void
, IX86_BUILTIN_LFENCE
);
17707 def_builtin (MASK_SSE2
, "__builtin_ia32_mfence", void_ftype_void
, IX86_BUILTIN_MFENCE
);
17709 def_builtin (MASK_SSE2
, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar
, IX86_BUILTIN_LOADDQU
);
17710 def_builtin (MASK_SSE2
, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQU
);
17712 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si
, IX86_BUILTIN_PMULUDQ
);
17713 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si
, IX86_BUILTIN_PMULUDQ128
);
17715 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLDQI128
);
17716 def_builtin (MASK_SSE2
, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSLLWI128
);
17717 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSLLDI128
);
17718 def_builtin (MASK_SSE2
, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLQI128
);
17719 def_builtin (MASK_SSE2
, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi
, IX86_BUILTIN_PSLLW128
);
17720 def_builtin (MASK_SSE2
, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si
, IX86_BUILTIN_PSLLD128
);
17721 def_builtin (MASK_SSE2
, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSLLQ128
);
17723 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLDQI128
);
17724 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRLWI128
);
17725 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRLDI128
);
17726 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLQI128
);
17727 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi
, IX86_BUILTIN_PSRLW128
);
17728 def_builtin (MASK_SSE2
, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si
, IX86_BUILTIN_PSRLD128
);
17729 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSRLQ128
);
17731 def_builtin (MASK_SSE2
, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRAWI128
);
17732 def_builtin (MASK_SSE2
, "__builtin_ia32_psradi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRADI128
);
17733 def_builtin (MASK_SSE2
, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi
, IX86_BUILTIN_PSRAW128
);
17734 def_builtin (MASK_SSE2
, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si
, IX86_BUILTIN_PSRAD128
);
17736 def_builtin (MASK_SSE2
, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi
, IX86_BUILTIN_PMADDWD128
);
17738 /* Prescott New Instructions. */
17739 def_builtin (MASK_SSE3
, "__builtin_ia32_monitor",
17740 void_ftype_pcvoid_unsigned_unsigned
,
17741 IX86_BUILTIN_MONITOR
);
17742 def_builtin (MASK_SSE3
, "__builtin_ia32_mwait",
17743 void_ftype_unsigned_unsigned
,
17744 IX86_BUILTIN_MWAIT
);
17745 def_builtin (MASK_SSE3
, "__builtin_ia32_lddqu",
17746 v16qi_ftype_pcchar
, IX86_BUILTIN_LDDQU
);
17749 def_builtin (MASK_SSSE3
, "__builtin_ia32_palignr128",
17750 v2di_ftype_v2di_v2di_int
, IX86_BUILTIN_PALIGNR128
);
17751 def_builtin (MASK_SSSE3
, "__builtin_ia32_palignr", di_ftype_di_di_int
,
17752 IX86_BUILTIN_PALIGNR
);
17755 def_builtin (MASK_SSE4_1
, "__builtin_ia32_movntdqa",
17756 v2di_ftype_pv2di
, IX86_BUILTIN_MOVNTDQA
);
17757 def_builtin (MASK_SSE4_1
, "__builtin_ia32_pmovsxbw128",
17758 v8hi_ftype_v16qi
, IX86_BUILTIN_PMOVSXBW128
);
17759 def_builtin (MASK_SSE4_1
, "__builtin_ia32_pmovsxbd128",
17760 v4si_ftype_v16qi
, IX86_BUILTIN_PMOVSXBD128
);
17761 def_builtin (MASK_SSE4_1
, "__builtin_ia32_pmovsxbq128",
17762 v2di_ftype_v16qi
, IX86_BUILTIN_PMOVSXBQ128
);
17763 def_builtin (MASK_SSE4_1
, "__builtin_ia32_pmovsxwd128",
17764 v4si_ftype_v8hi
, IX86_BUILTIN_PMOVSXWD128
);
17765 def_builtin (MASK_SSE4_1
, "__builtin_ia32_pmovsxwq128",
17766 v2di_ftype_v8hi
, IX86_BUILTIN_PMOVSXWQ128
);
17767 def_builtin (MASK_SSE4_1
, "__builtin_ia32_pmovsxdq128",
17768 v2di_ftype_v4si
, IX86_BUILTIN_PMOVSXDQ128
);
17769 def_builtin (MASK_SSE4_1
, "__builtin_ia32_pmovzxbw128",
17770 v8hi_ftype_v16qi
, IX86_BUILTIN_PMOVZXBW128
);
17771 def_builtin (MASK_SSE4_1
, "__builtin_ia32_pmovzxbd128",
17772 v4si_ftype_v16qi
, IX86_BUILTIN_PMOVZXBD128
);
17773 def_builtin (MASK_SSE4_1
, "__builtin_ia32_pmovzxbq128",
17774 v2di_ftype_v16qi
, IX86_BUILTIN_PMOVZXBQ128
);
17775 def_builtin (MASK_SSE4_1
, "__builtin_ia32_pmovzxwd128",
17776 v4si_ftype_v8hi
, IX86_BUILTIN_PMOVZXWD128
);
17777 def_builtin (MASK_SSE4_1
, "__builtin_ia32_pmovzxwq128",
17778 v2di_ftype_v8hi
, IX86_BUILTIN_PMOVZXWQ128
);
17779 def_builtin (MASK_SSE4_1
, "__builtin_ia32_pmovzxdq128",
17780 v2di_ftype_v4si
, IX86_BUILTIN_PMOVZXDQ128
);
17781 def_builtin (MASK_SSE4_1
, "__builtin_ia32_pmuldq128",
17782 v2di_ftype_v4si_v4si
, IX86_BUILTIN_PMULDQ128
);
17783 def_builtin_const (MASK_SSE4_1
, "__builtin_ia32_roundpd",
17784 v2df_ftype_v2df_int
, IX86_BUILTIN_ROUNDPD
);
17785 def_builtin_const (MASK_SSE4_1
, "__builtin_ia32_roundps",
17786 v4sf_ftype_v4sf_int
, IX86_BUILTIN_ROUNDPS
);
17787 def_builtin_const (MASK_SSE4_1
, "__builtin_ia32_roundsd",
17788 v2df_ftype_v2df_v2df_int
, IX86_BUILTIN_ROUNDSD
);
17789 def_builtin_const (MASK_SSE4_1
, "__builtin_ia32_roundss",
17790 v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_ROUNDSS
);
17792 /* AMDFAM10 SSE4A New built-ins */
17793 def_builtin (MASK_SSE4A
, "__builtin_ia32_movntsd",
17794 void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTSD
);
17795 def_builtin (MASK_SSE4A
, "__builtin_ia32_movntss",
17796 void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTSS
);
17797 def_builtin (MASK_SSE4A
, "__builtin_ia32_extrqi",
17798 v2di_ftype_v2di_unsigned_unsigned
, IX86_BUILTIN_EXTRQI
);
17799 def_builtin (MASK_SSE4A
, "__builtin_ia32_extrq",
17800 v2di_ftype_v2di_v16qi
, IX86_BUILTIN_EXTRQ
);
17801 def_builtin (MASK_SSE4A
, "__builtin_ia32_insertqi",
17802 v2di_ftype_v2di_v2di_unsigned_unsigned
, IX86_BUILTIN_INSERTQI
);
17803 def_builtin (MASK_SSE4A
, "__builtin_ia32_insertq",
17804 v2di_ftype_v2di_v2di
, IX86_BUILTIN_INSERTQ
);
17806 /* Access to the vec_init patterns. */
17807 ftype
= build_function_type_list (V2SI_type_node
, integer_type_node
,
17808 integer_type_node
, NULL_TREE
);
17809 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v2si",
17810 ftype
, IX86_BUILTIN_VEC_INIT_V2SI
);
17812 ftype
= build_function_type_list (V4HI_type_node
, short_integer_type_node
,
17813 short_integer_type_node
,
17814 short_integer_type_node
,
17815 short_integer_type_node
, NULL_TREE
);
17816 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v4hi",
17817 ftype
, IX86_BUILTIN_VEC_INIT_V4HI
);
17819 ftype
= build_function_type_list (V8QI_type_node
, char_type_node
,
17820 char_type_node
, char_type_node
,
17821 char_type_node
, char_type_node
,
17822 char_type_node
, char_type_node
,
17823 char_type_node
, NULL_TREE
);
17824 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v8qi",
17825 ftype
, IX86_BUILTIN_VEC_INIT_V8QI
);
17827 /* Access to the vec_extract patterns. */
17828 ftype
= build_function_type_list (double_type_node
, V2DF_type_node
,
17829 integer_type_node
, NULL_TREE
);
17830 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v2df",
17831 ftype
, IX86_BUILTIN_VEC_EXT_V2DF
);
17833 ftype
= build_function_type_list (long_long_integer_type_node
,
17834 V2DI_type_node
, integer_type_node
,
17836 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v2di",
17837 ftype
, IX86_BUILTIN_VEC_EXT_V2DI
);
17839 ftype
= build_function_type_list (float_type_node
, V4SF_type_node
,
17840 integer_type_node
, NULL_TREE
);
17841 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v4sf",
17842 ftype
, IX86_BUILTIN_VEC_EXT_V4SF
);
17844 ftype
= build_function_type_list (intSI_type_node
, V4SI_type_node
,
17845 integer_type_node
, NULL_TREE
);
17846 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v4si",
17847 ftype
, IX86_BUILTIN_VEC_EXT_V4SI
);
17849 ftype
= build_function_type_list (intHI_type_node
, V8HI_type_node
,
17850 integer_type_node
, NULL_TREE
);
17851 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v8hi",
17852 ftype
, IX86_BUILTIN_VEC_EXT_V8HI
);
17854 ftype
= build_function_type_list (intHI_type_node
, V4HI_type_node
,
17855 integer_type_node
, NULL_TREE
);
17856 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_vec_ext_v4hi",
17857 ftype
, IX86_BUILTIN_VEC_EXT_V4HI
);
17859 ftype
= build_function_type_list (intSI_type_node
, V2SI_type_node
,
17860 integer_type_node
, NULL_TREE
);
17861 def_builtin (MASK_MMX
, "__builtin_ia32_vec_ext_v2si",
17862 ftype
, IX86_BUILTIN_VEC_EXT_V2SI
);
17864 ftype
= build_function_type_list (intQI_type_node
, V16QI_type_node
,
17865 integer_type_node
, NULL_TREE
);
17866 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v16qi",
17867 ftype
, IX86_BUILTIN_VEC_EXT_V16QI
);
17869 /* Access to the vec_set patterns. */
17870 ftype
= build_function_type_list (V2DI_type_node
, V2DI_type_node
,
17872 integer_type_node
, NULL_TREE
);
17873 def_builtin (MASK_SSE4_1
| MASK_64BIT
, "__builtin_ia32_vec_set_v2di",
17874 ftype
, IX86_BUILTIN_VEC_SET_V2DI
);
17876 ftype
= build_function_type_list (V4SF_type_node
, V4SF_type_node
,
17878 integer_type_node
, NULL_TREE
);
17879 def_builtin (MASK_SSE4_1
, "__builtin_ia32_vec_set_v4sf",
17880 ftype
, IX86_BUILTIN_VEC_SET_V4SF
);
17882 ftype
= build_function_type_list (V4SI_type_node
, V4SI_type_node
,
17884 integer_type_node
, NULL_TREE
);
17885 def_builtin (MASK_SSE4_1
, "__builtin_ia32_vec_set_v4si",
17886 ftype
, IX86_BUILTIN_VEC_SET_V4SI
);
17888 ftype
= build_function_type_list (V8HI_type_node
, V8HI_type_node
,
17890 integer_type_node
, NULL_TREE
);
17891 def_builtin (MASK_SSE
, "__builtin_ia32_vec_set_v8hi",
17892 ftype
, IX86_BUILTIN_VEC_SET_V8HI
);
17894 ftype
= build_function_type_list (V4HI_type_node
, V4HI_type_node
,
17896 integer_type_node
, NULL_TREE
);
17897 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_vec_set_v4hi",
17898 ftype
, IX86_BUILTIN_VEC_SET_V4HI
);
17900 ftype
= build_function_type_list (V16QI_type_node
, V16QI_type_node
,
17902 integer_type_node
, NULL_TREE
);
17903 def_builtin (MASK_SSE4_1
, "__builtin_ia32_vec_set_v16qi",
17904 ftype
, IX86_BUILTIN_VEC_SET_V16QI
);
17908 ix86_init_builtins (void)
17911 ix86_init_mmx_sse_builtins ();
17914 /* Errors in the source file can cause expand_expr to return const0_rtx
17915 where we expect a vector. To avoid crashing, use one of the vector
17916 clear instructions. */
17918 safe_vector_operand (rtx x
, enum machine_mode mode
)
17920 if (x
== const0_rtx
)
17921 x
= CONST0_RTX (mode
);
17925 /* Subroutine of ix86_expand_builtin to take care of SSE insns with
17926 4 operands. The third argument must be a constant smaller than 8
17930 ix86_expand_sse_4_operands_builtin (enum insn_code icode
, tree exp
,
17934 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17935 tree arg1
= CALL_EXPR_ARG (exp
, 1);
17936 tree arg2
= CALL_EXPR_ARG (exp
, 2);
17937 rtx op0
= expand_normal (arg0
);
17938 rtx op1
= expand_normal (arg1
);
17939 rtx op2
= expand_normal (arg2
);
17940 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
17941 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
17942 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
17943 enum machine_mode mode2
;
17946 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
17947 op0
= copy_to_mode_reg (mode0
, op0
);
17948 if ((optimize
&& !register_operand (op1
, mode1
))
17949 || !(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
17950 op1
= copy_to_mode_reg (mode1
, op1
);
17954 case CODE_FOR_sse4_1_blendvpd
:
17955 case CODE_FOR_sse4_1_blendvps
:
17956 case CODE_FOR_sse4_1_pblendvb
:
17957 /* The third argument of variable blends must be xmm0. */
17958 xmm0
= gen_rtx_REG (tmode
, FIRST_SSE_REG
);
17959 emit_move_insn (xmm0
, op2
);
17963 mode2
= insn_data
[icode
].operand
[2].mode
;
17964 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
17968 case CODE_FOR_sse4_1_roundsd
:
17969 case CODE_FOR_sse4_1_roundss
:
17970 error ("the third argument must be a 4-bit immediate");
17973 error ("the third argument must be a 8-bit immediate");
17983 || GET_MODE (target
) != tmode
17984 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17985 target
= gen_reg_rtx (tmode
);
17986 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
17993 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
17996 ix86_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
17999 tree arg0
= CALL_EXPR_ARG (exp
, 0);
18000 tree arg1
= CALL_EXPR_ARG (exp
, 1);
18001 rtx op0
= expand_normal (arg0
);
18002 rtx op1
= expand_normal (arg1
);
18003 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
18004 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
18005 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
18007 if (VECTOR_MODE_P (mode0
))
18008 op0
= safe_vector_operand (op0
, mode0
);
18009 if (VECTOR_MODE_P (mode1
))
18010 op1
= safe_vector_operand (op1
, mode1
);
18012 if (optimize
|| !target
18013 || GET_MODE (target
) != tmode
18014 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18015 target
= gen_reg_rtx (tmode
);
18017 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
18019 rtx x
= gen_reg_rtx (V4SImode
);
18020 emit_insn (gen_sse2_loadd (x
, op1
));
18021 op1
= gen_lowpart (TImode
, x
);
18024 /* The insn must want input operands in the same modes as the
18026 gcc_assert ((GET_MODE (op0
) == mode0
|| GET_MODE (op0
) == VOIDmode
)
18027 && (GET_MODE (op1
) == mode1
|| GET_MODE (op1
) == VOIDmode
));
18029 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
18030 op0
= copy_to_mode_reg (mode0
, op0
);
18031 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
18032 op1
= copy_to_mode_reg (mode1
, op1
);
18034 /* ??? Using ix86_fixup_binary_operands is problematic when
18035 we've got mismatched modes. Fake it. */
18041 if (tmode
== mode0
&& tmode
== mode1
)
18043 target
= ix86_fixup_binary_operands (UNKNOWN
, tmode
, xops
);
18047 else if (optimize
|| !ix86_binary_operator_ok (UNKNOWN
, tmode
, xops
))
18049 op0
= force_reg (mode0
, op0
);
18050 op1
= force_reg (mode1
, op1
);
18051 target
= gen_reg_rtx (tmode
);
18054 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
18061 /* Subroutine of ix86_expand_builtin to take care of stores. */
18064 ix86_expand_store_builtin (enum insn_code icode
, tree exp
)
18067 tree arg0
= CALL_EXPR_ARG (exp
, 0);
18068 tree arg1
= CALL_EXPR_ARG (exp
, 1);
18069 rtx op0
= expand_normal (arg0
);
18070 rtx op1
= expand_normal (arg1
);
18071 enum machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
18072 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
18074 if (VECTOR_MODE_P (mode1
))
18075 op1
= safe_vector_operand (op1
, mode1
);
18077 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
18078 op1
= copy_to_mode_reg (mode1
, op1
);
18080 pat
= GEN_FCN (icode
) (op0
, op1
);
18086 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
18089 ix86_expand_unop_builtin (enum insn_code icode
, tree exp
,
18090 rtx target
, int do_load
)
18093 tree arg0
= CALL_EXPR_ARG (exp
, 0);
18094 rtx op0
= expand_normal (arg0
);
18095 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
18096 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
18098 if (optimize
|| !target
18099 || GET_MODE (target
) != tmode
18100 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18101 target
= gen_reg_rtx (tmode
);
18103 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
18106 if (VECTOR_MODE_P (mode0
))
18107 op0
= safe_vector_operand (op0
, mode0
);
18109 if ((optimize
&& !register_operand (op0
, mode0
))
18110 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
18111 op0
= copy_to_mode_reg (mode0
, op0
);
18116 case CODE_FOR_sse4_1_roundpd
:
18117 case CODE_FOR_sse4_1_roundps
:
18119 tree arg1
= CALL_EXPR_ARG (exp
, 1);
18120 rtx op1
= expand_normal (arg1
);
18121 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
18123 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
18125 error ("the second argument must be a 4-bit immediate");
18128 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
18132 pat
= GEN_FCN (icode
) (target
, op0
);
18142 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
18143 sqrtss, rsqrtss, rcpss. */
18146 ix86_expand_unop1_builtin (enum insn_code icode
, tree exp
, rtx target
)
18149 tree arg0
= CALL_EXPR_ARG (exp
, 0);
18150 rtx op1
, op0
= expand_normal (arg0
);
18151 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
18152 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
18154 if (optimize
|| !target
18155 || GET_MODE (target
) != tmode
18156 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18157 target
= gen_reg_rtx (tmode
);
18159 if (VECTOR_MODE_P (mode0
))
18160 op0
= safe_vector_operand (op0
, mode0
);
18162 if ((optimize
&& !register_operand (op0
, mode0
))
18163 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
18164 op0
= copy_to_mode_reg (mode0
, op0
);
18167 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode0
))
18168 op1
= copy_to_mode_reg (mode0
, op1
);
18170 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
18177 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
18180 ix86_expand_sse_compare (const struct builtin_description
*d
, tree exp
,
18184 tree arg0
= CALL_EXPR_ARG (exp
, 0);
18185 tree arg1
= CALL_EXPR_ARG (exp
, 1);
18186 rtx op0
= expand_normal (arg0
);
18187 rtx op1
= expand_normal (arg1
);
18189 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
18190 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
18191 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
18192 enum rtx_code comparison
= d
->comparison
;
18194 if (VECTOR_MODE_P (mode0
))
18195 op0
= safe_vector_operand (op0
, mode0
);
18196 if (VECTOR_MODE_P (mode1
))
18197 op1
= safe_vector_operand (op1
, mode1
);
18199 /* Swap operands if we have a comparison that isn't available in
18201 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
18203 rtx tmp
= gen_reg_rtx (mode1
);
18204 emit_move_insn (tmp
, op1
);
18209 if (optimize
|| !target
18210 || GET_MODE (target
) != tmode
18211 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode
))
18212 target
= gen_reg_rtx (tmode
);
18214 if ((optimize
&& !register_operand (op0
, mode0
))
18215 || ! (*insn_data
[d
->icode
].operand
[1].predicate
) (op0
, mode0
))
18216 op0
= copy_to_mode_reg (mode0
, op0
);
18217 if ((optimize
&& !register_operand (op1
, mode1
))
18218 || ! (*insn_data
[d
->icode
].operand
[2].predicate
) (op1
, mode1
))
18219 op1
= copy_to_mode_reg (mode1
, op1
);
18221 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
18222 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
18229 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
18232 ix86_expand_sse_comi (const struct builtin_description
*d
, tree exp
,
18236 tree arg0
= CALL_EXPR_ARG (exp
, 0);
18237 tree arg1
= CALL_EXPR_ARG (exp
, 1);
18238 rtx op0
= expand_normal (arg0
);
18239 rtx op1
= expand_normal (arg1
);
18240 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
18241 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
18242 enum rtx_code comparison
= d
->comparison
;
18244 if (VECTOR_MODE_P (mode0
))
18245 op0
= safe_vector_operand (op0
, mode0
);
18246 if (VECTOR_MODE_P (mode1
))
18247 op1
= safe_vector_operand (op1
, mode1
);
18249 /* Swap operands if we have a comparison that isn't available in
18251 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
18258 target
= gen_reg_rtx (SImode
);
18259 emit_move_insn (target
, const0_rtx
);
18260 target
= gen_rtx_SUBREG (QImode
, target
, 0);
18262 if ((optimize
&& !register_operand (op0
, mode0
))
18263 || !(*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
18264 op0
= copy_to_mode_reg (mode0
, op0
);
18265 if ((optimize
&& !register_operand (op1
, mode1
))
18266 || !(*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
18267 op1
= copy_to_mode_reg (mode1
, op1
);
18269 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
18273 emit_insn (gen_rtx_SET (VOIDmode
,
18274 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
18275 gen_rtx_fmt_ee (comparison
, QImode
,
18279 return SUBREG_REG (target
);
18282 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
18285 ix86_expand_sse_ptest (const struct builtin_description
*d
, tree exp
,
18289 tree arg0
= CALL_EXPR_ARG (exp
, 0);
18290 tree arg1
= CALL_EXPR_ARG (exp
, 1);
18291 rtx op0
= expand_normal (arg0
);
18292 rtx op1
= expand_normal (arg1
);
18293 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
18294 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
18295 enum rtx_code comparison
= d
->comparison
;
18297 if (VECTOR_MODE_P (mode0
))
18298 op0
= safe_vector_operand (op0
, mode0
);
18299 if (VECTOR_MODE_P (mode1
))
18300 op1
= safe_vector_operand (op1
, mode1
);
18302 target
= gen_reg_rtx (SImode
);
18303 emit_move_insn (target
, const0_rtx
);
18304 target
= gen_rtx_SUBREG (QImode
, target
, 0);
18306 if ((optimize
&& !register_operand (op0
, mode0
))
18307 || !(*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
18308 op0
= copy_to_mode_reg (mode0
, op0
);
18309 if ((optimize
&& !register_operand (op1
, mode1
))
18310 || !(*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
18311 op1
= copy_to_mode_reg (mode1
, op1
);
18313 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
18317 emit_insn (gen_rtx_SET (VOIDmode
,
18318 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
18319 gen_rtx_fmt_ee (comparison
, QImode
,
18323 return SUBREG_REG (target
);
18326 /* Return the integer constant in ARG. Constrain it to be in the range
18327 of the subparts of VEC_TYPE; issue an error if not. */
18330 get_element_number (tree vec_type
, tree arg
)
18332 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
18334 if (!host_integerp (arg
, 1)
18335 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
18337 error ("selector must be an integer constant in the range 0..%wi", max
);
18344 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
18345 ix86_expand_vector_init. We DO have language-level syntax for this, in
18346 the form of (type){ init-list }. Except that since we can't place emms
18347 instructions from inside the compiler, we can't allow the use of MMX
18348 registers unless the user explicitly asks for it. So we do *not* define
18349 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
18350 we have builtins invoked by mmintrin.h that gives us license to emit
18351 these sorts of instructions. */
18354 ix86_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
18356 enum machine_mode tmode
= TYPE_MODE (type
);
18357 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
18358 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
18359 rtvec v
= rtvec_alloc (n_elt
);
18361 gcc_assert (VECTOR_MODE_P (tmode
));
18362 gcc_assert (call_expr_nargs (exp
) == n_elt
);
18364 for (i
= 0; i
< n_elt
; ++i
)
18366 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
18367 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
18370 if (!target
|| !register_operand (target
, tmode
))
18371 target
= gen_reg_rtx (tmode
);
18373 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
18377 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
18378 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
18379 had a language-level syntax for referencing vector elements. */
18382 ix86_expand_vec_ext_builtin (tree exp
, rtx target
)
18384 enum machine_mode tmode
, mode0
;
18389 arg0
= CALL_EXPR_ARG (exp
, 0);
18390 arg1
= CALL_EXPR_ARG (exp
, 1);
18392 op0
= expand_normal (arg0
);
18393 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
18395 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
18396 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
18397 gcc_assert (VECTOR_MODE_P (mode0
));
18399 op0
= force_reg (mode0
, op0
);
18401 if (optimize
|| !target
|| !register_operand (target
, tmode
))
18402 target
= gen_reg_rtx (tmode
);
18404 ix86_expand_vector_extract (true, target
, op0
, elt
);
18409 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
18410 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
18411 a language-level syntax for referencing vector elements. */
18414 ix86_expand_vec_set_builtin (tree exp
)
18416 enum machine_mode tmode
, mode1
;
18417 tree arg0
, arg1
, arg2
;
18419 rtx op0
, op1
, target
;
18421 arg0
= CALL_EXPR_ARG (exp
, 0);
18422 arg1
= CALL_EXPR_ARG (exp
, 1);
18423 arg2
= CALL_EXPR_ARG (exp
, 2);
18425 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
18426 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
18427 gcc_assert (VECTOR_MODE_P (tmode
));
18429 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, 0);
18430 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, 0);
18431 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
18433 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
18434 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
18436 op0
= force_reg (tmode
, op0
);
18437 op1
= force_reg (mode1
, op1
);
18439 /* OP0 is the source of these builtin functions and shouldn't be
18440 modified. Create a copy, use it and return it as target. */
18441 target
= gen_reg_rtx (tmode
);
18442 emit_move_insn (target
, op0
);
18443 ix86_expand_vector_set (true, target
, op1
, elt
);
18448 /* Expand an expression EXP that calls a built-in function,
18449 with result going to TARGET if that's convenient
18450 (and in mode MODE if that's convenient).
18451 SUBTARGET may be used as the target for computing one of EXP's operands.
18452 IGNORE is nonzero if the value is to be ignored. */
18455 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
18456 enum machine_mode mode ATTRIBUTE_UNUSED
,
18457 int ignore ATTRIBUTE_UNUSED
)
18459 const struct builtin_description
*d
;
18461 enum insn_code icode
;
18462 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
18463 tree arg0
, arg1
, arg2
, arg3
;
18464 rtx op0
, op1
, op2
, op3
, pat
;
18465 enum machine_mode tmode
, mode0
, mode1
, mode2
, mode3
, mode4
;
18466 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
18470 case IX86_BUILTIN_EMMS
:
18471 emit_insn (gen_mmx_emms ());
18474 case IX86_BUILTIN_SFENCE
:
18475 emit_insn (gen_sse_sfence ());
18478 case IX86_BUILTIN_MASKMOVQ
:
18479 case IX86_BUILTIN_MASKMOVDQU
:
18480 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
18481 ? CODE_FOR_mmx_maskmovq
18482 : CODE_FOR_sse2_maskmovdqu
);
18483 /* Note the arg order is different from the operand order. */
18484 arg1
= CALL_EXPR_ARG (exp
, 0);
18485 arg2
= CALL_EXPR_ARG (exp
, 1);
18486 arg0
= CALL_EXPR_ARG (exp
, 2);
18487 op0
= expand_normal (arg0
);
18488 op1
= expand_normal (arg1
);
18489 op2
= expand_normal (arg2
);
18490 mode0
= insn_data
[icode
].operand
[0].mode
;
18491 mode1
= insn_data
[icode
].operand
[1].mode
;
18492 mode2
= insn_data
[icode
].operand
[2].mode
;
18494 op0
= force_reg (Pmode
, op0
);
18495 op0
= gen_rtx_MEM (mode1
, op0
);
18497 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
18498 op0
= copy_to_mode_reg (mode0
, op0
);
18499 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
18500 op1
= copy_to_mode_reg (mode1
, op1
);
18501 if (! (*insn_data
[icode
].operand
[2].predicate
) (op2
, mode2
))
18502 op2
= copy_to_mode_reg (mode2
, op2
);
18503 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
18509 case IX86_BUILTIN_SQRTSS
:
18510 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2
, exp
, target
);
18511 case IX86_BUILTIN_RSQRTSS
:
18512 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2
, exp
, target
);
18513 case IX86_BUILTIN_RCPSS
:
18514 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2
, exp
, target
);
18516 case IX86_BUILTIN_LOADUPS
:
18517 return ix86_expand_unop_builtin (CODE_FOR_sse_movups
, exp
, target
, 1);
18519 case IX86_BUILTIN_STOREUPS
:
18520 return ix86_expand_store_builtin (CODE_FOR_sse_movups
, exp
);
18522 case IX86_BUILTIN_LOADHPS
:
18523 case IX86_BUILTIN_LOADLPS
:
18524 case IX86_BUILTIN_LOADHPD
:
18525 case IX86_BUILTIN_LOADLPD
:
18526 icode
= (fcode
== IX86_BUILTIN_LOADHPS
? CODE_FOR_sse_loadhps
18527 : fcode
== IX86_BUILTIN_LOADLPS
? CODE_FOR_sse_loadlps
18528 : fcode
== IX86_BUILTIN_LOADHPD
? CODE_FOR_sse2_loadhpd
18529 : CODE_FOR_sse2_loadlpd
);
18530 arg0
= CALL_EXPR_ARG (exp
, 0);
18531 arg1
= CALL_EXPR_ARG (exp
, 1);
18532 op0
= expand_normal (arg0
);
18533 op1
= expand_normal (arg1
);
18534 tmode
= insn_data
[icode
].operand
[0].mode
;
18535 mode0
= insn_data
[icode
].operand
[1].mode
;
18536 mode1
= insn_data
[icode
].operand
[2].mode
;
18538 op0
= force_reg (mode0
, op0
);
18539 op1
= gen_rtx_MEM (mode1
, copy_to_mode_reg (Pmode
, op1
));
18540 if (optimize
|| target
== 0
18541 || GET_MODE (target
) != tmode
18542 || !register_operand (target
, tmode
))
18543 target
= gen_reg_rtx (tmode
);
18544 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
18550 case IX86_BUILTIN_STOREHPS
:
18551 case IX86_BUILTIN_STORELPS
:
18552 icode
= (fcode
== IX86_BUILTIN_STOREHPS
? CODE_FOR_sse_storehps
18553 : CODE_FOR_sse_storelps
);
18554 arg0
= CALL_EXPR_ARG (exp
, 0);
18555 arg1
= CALL_EXPR_ARG (exp
, 1);
18556 op0
= expand_normal (arg0
);
18557 op1
= expand_normal (arg1
);
18558 mode0
= insn_data
[icode
].operand
[0].mode
;
18559 mode1
= insn_data
[icode
].operand
[1].mode
;
18561 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
18562 op1
= force_reg (mode1
, op1
);
18564 pat
= GEN_FCN (icode
) (op0
, op1
);
18570 case IX86_BUILTIN_MOVNTPS
:
18571 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf
, exp
);
18572 case IX86_BUILTIN_MOVNTQ
:
18573 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi
, exp
);
18575 case IX86_BUILTIN_LDMXCSR
:
18576 op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
18577 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
18578 emit_move_insn (target
, op0
);
18579 emit_insn (gen_sse_ldmxcsr (target
));
18582 case IX86_BUILTIN_STMXCSR
:
18583 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
18584 emit_insn (gen_sse_stmxcsr (target
));
18585 return copy_to_mode_reg (SImode
, target
);
18587 case IX86_BUILTIN_SHUFPS
:
18588 case IX86_BUILTIN_SHUFPD
:
18589 icode
= (fcode
== IX86_BUILTIN_SHUFPS
18590 ? CODE_FOR_sse_shufps
18591 : CODE_FOR_sse2_shufpd
);
18592 arg0
= CALL_EXPR_ARG (exp
, 0);
18593 arg1
= CALL_EXPR_ARG (exp
, 1);
18594 arg2
= CALL_EXPR_ARG (exp
, 2);
18595 op0
= expand_normal (arg0
);
18596 op1
= expand_normal (arg1
);
18597 op2
= expand_normal (arg2
);
18598 tmode
= insn_data
[icode
].operand
[0].mode
;
18599 mode0
= insn_data
[icode
].operand
[1].mode
;
18600 mode1
= insn_data
[icode
].operand
[2].mode
;
18601 mode2
= insn_data
[icode
].operand
[3].mode
;
18603 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
18604 op0
= copy_to_mode_reg (mode0
, op0
);
18605 if ((optimize
&& !register_operand (op1
, mode1
))
18606 || !(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
18607 op1
= copy_to_mode_reg (mode1
, op1
);
18608 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
18610 /* @@@ better error message */
18611 error ("mask must be an immediate");
18612 return gen_reg_rtx (tmode
);
18614 if (optimize
|| target
== 0
18615 || GET_MODE (target
) != tmode
18616 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18617 target
= gen_reg_rtx (tmode
);
18618 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
18624 case IX86_BUILTIN_PSHUFW
:
18625 case IX86_BUILTIN_PSHUFD
:
18626 case IX86_BUILTIN_PSHUFHW
:
18627 case IX86_BUILTIN_PSHUFLW
:
18628 icode
= ( fcode
== IX86_BUILTIN_PSHUFHW
? CODE_FOR_sse2_pshufhw
18629 : fcode
== IX86_BUILTIN_PSHUFLW
? CODE_FOR_sse2_pshuflw
18630 : fcode
== IX86_BUILTIN_PSHUFD
? CODE_FOR_sse2_pshufd
18631 : CODE_FOR_mmx_pshufw
);
18632 arg0
= CALL_EXPR_ARG (exp
, 0);
18633 arg1
= CALL_EXPR_ARG (exp
, 1);
18634 op0
= expand_normal (arg0
);
18635 op1
= expand_normal (arg1
);
18636 tmode
= insn_data
[icode
].operand
[0].mode
;
18637 mode1
= insn_data
[icode
].operand
[1].mode
;
18638 mode2
= insn_data
[icode
].operand
[2].mode
;
18640 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18641 op0
= copy_to_mode_reg (mode1
, op0
);
18642 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18644 /* @@@ better error message */
18645 error ("mask must be an immediate");
18649 || GET_MODE (target
) != tmode
18650 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18651 target
= gen_reg_rtx (tmode
);
18652 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
18658 case IX86_BUILTIN_PSLLWI128
:
18659 icode
= CODE_FOR_ashlv8hi3
;
18661 case IX86_BUILTIN_PSLLDI128
:
18662 icode
= CODE_FOR_ashlv4si3
;
18664 case IX86_BUILTIN_PSLLQI128
:
18665 icode
= CODE_FOR_ashlv2di3
;
18667 case IX86_BUILTIN_PSRAWI128
:
18668 icode
= CODE_FOR_ashrv8hi3
;
18670 case IX86_BUILTIN_PSRADI128
:
18671 icode
= CODE_FOR_ashrv4si3
;
18673 case IX86_BUILTIN_PSRLWI128
:
18674 icode
= CODE_FOR_lshrv8hi3
;
18676 case IX86_BUILTIN_PSRLDI128
:
18677 icode
= CODE_FOR_lshrv4si3
;
18679 case IX86_BUILTIN_PSRLQI128
:
18680 icode
= CODE_FOR_lshrv2di3
;
18683 arg0
= CALL_EXPR_ARG (exp
, 0);
18684 arg1
= CALL_EXPR_ARG (exp
, 1);
18685 op0
= expand_normal (arg0
);
18686 op1
= expand_normal (arg1
);
18688 if (!CONST_INT_P (op1
))
18690 error ("shift must be an immediate");
18693 if (INTVAL (op1
) < 0 || INTVAL (op1
) > 255)
18694 op1
= GEN_INT (255);
18696 tmode
= insn_data
[icode
].operand
[0].mode
;
18697 mode1
= insn_data
[icode
].operand
[1].mode
;
18698 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18699 op0
= copy_to_reg (op0
);
18701 target
= gen_reg_rtx (tmode
);
18702 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
18708 case IX86_BUILTIN_PSLLW128
:
18709 icode
= CODE_FOR_ashlv8hi3
;
18711 case IX86_BUILTIN_PSLLD128
:
18712 icode
= CODE_FOR_ashlv4si3
;
18714 case IX86_BUILTIN_PSLLQ128
:
18715 icode
= CODE_FOR_ashlv2di3
;
18717 case IX86_BUILTIN_PSRAW128
:
18718 icode
= CODE_FOR_ashrv8hi3
;
18720 case IX86_BUILTIN_PSRAD128
:
18721 icode
= CODE_FOR_ashrv4si3
;
18723 case IX86_BUILTIN_PSRLW128
:
18724 icode
= CODE_FOR_lshrv8hi3
;
18726 case IX86_BUILTIN_PSRLD128
:
18727 icode
= CODE_FOR_lshrv4si3
;
18729 case IX86_BUILTIN_PSRLQ128
:
18730 icode
= CODE_FOR_lshrv2di3
;
18733 arg0
= CALL_EXPR_ARG (exp
, 0);
18734 arg1
= CALL_EXPR_ARG (exp
, 1);
18735 op0
= expand_normal (arg0
);
18736 op1
= expand_normal (arg1
);
18738 tmode
= insn_data
[icode
].operand
[0].mode
;
18739 mode1
= insn_data
[icode
].operand
[1].mode
;
18741 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18742 op0
= copy_to_reg (op0
);
18744 op1
= simplify_gen_subreg (TImode
, op1
, GET_MODE (op1
), 0);
18745 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, TImode
))
18746 op1
= copy_to_reg (op1
);
18748 target
= gen_reg_rtx (tmode
);
18749 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
18755 case IX86_BUILTIN_PSLLDQI128
:
18756 case IX86_BUILTIN_PSRLDQI128
:
18757 icode
= (fcode
== IX86_BUILTIN_PSLLDQI128
? CODE_FOR_sse2_ashlti3
18758 : CODE_FOR_sse2_lshrti3
);
18759 arg0
= CALL_EXPR_ARG (exp
, 0);
18760 arg1
= CALL_EXPR_ARG (exp
, 1);
18761 op0
= expand_normal (arg0
);
18762 op1
= expand_normal (arg1
);
18763 tmode
= insn_data
[icode
].operand
[0].mode
;
18764 mode1
= insn_data
[icode
].operand
[1].mode
;
18765 mode2
= insn_data
[icode
].operand
[2].mode
;
18767 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18769 op0
= copy_to_reg (op0
);
18770 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
18772 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18774 error ("shift must be an immediate");
18777 target
= gen_reg_rtx (V2DImode
);
18778 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, V2DImode
, 0),
18785 case IX86_BUILTIN_FEMMS
:
18786 emit_insn (gen_mmx_femms ());
18789 case IX86_BUILTIN_PAVGUSB
:
18790 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3
, exp
, target
);
18792 case IX86_BUILTIN_PF2ID
:
18793 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id
, exp
, target
, 0);
18795 case IX86_BUILTIN_PFACC
:
18796 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3
, exp
, target
);
18798 case IX86_BUILTIN_PFADD
:
18799 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3
, exp
, target
);
18801 case IX86_BUILTIN_PFCMPEQ
:
18802 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3
, exp
, target
);
18804 case IX86_BUILTIN_PFCMPGE
:
18805 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3
, exp
, target
);
18807 case IX86_BUILTIN_PFCMPGT
:
18808 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3
, exp
, target
);
18810 case IX86_BUILTIN_PFMAX
:
18811 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3
, exp
, target
);
18813 case IX86_BUILTIN_PFMIN
:
18814 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3
, exp
, target
);
18816 case IX86_BUILTIN_PFMUL
:
18817 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3
, exp
, target
);
18819 case IX86_BUILTIN_PFRCP
:
18820 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2
, exp
, target
, 0);
18822 case IX86_BUILTIN_PFRCPIT1
:
18823 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3
, exp
, target
);
18825 case IX86_BUILTIN_PFRCPIT2
:
18826 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3
, exp
, target
);
18828 case IX86_BUILTIN_PFRSQIT1
:
18829 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3
, exp
, target
);
18831 case IX86_BUILTIN_PFRSQRT
:
18832 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2
, exp
, target
, 0);
18834 case IX86_BUILTIN_PFSUB
:
18835 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3
, exp
, target
);
18837 case IX86_BUILTIN_PFSUBR
:
18838 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3
, exp
, target
);
18840 case IX86_BUILTIN_PI2FD
:
18841 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2
, exp
, target
, 0);
18843 case IX86_BUILTIN_PMULHRW
:
18844 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3
, exp
, target
);
18846 case IX86_BUILTIN_PF2IW
:
18847 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw
, exp
, target
, 0);
18849 case IX86_BUILTIN_PFNACC
:
18850 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3
, exp
, target
);
18852 case IX86_BUILTIN_PFPNACC
:
18853 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3
, exp
, target
);
18855 case IX86_BUILTIN_PI2FW
:
18856 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw
, exp
, target
, 0);
18858 case IX86_BUILTIN_PSWAPDSI
:
18859 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2
, exp
, target
, 0);
18861 case IX86_BUILTIN_PSWAPDSF
:
18862 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2
, exp
, target
, 0);
18864 case IX86_BUILTIN_SQRTSD
:
18865 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2
, exp
, target
);
18866 case IX86_BUILTIN_LOADUPD
:
18867 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd
, exp
, target
, 1);
18868 case IX86_BUILTIN_STOREUPD
:
18869 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd
, exp
);
18871 case IX86_BUILTIN_MFENCE
:
18872 emit_insn (gen_sse2_mfence ());
18874 case IX86_BUILTIN_LFENCE
:
18875 emit_insn (gen_sse2_lfence ());
18878 case IX86_BUILTIN_CLFLUSH
:
18879 arg0
= CALL_EXPR_ARG (exp
, 0);
18880 op0
= expand_normal (arg0
);
18881 icode
= CODE_FOR_sse2_clflush
;
18882 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, Pmode
))
18883 op0
= copy_to_mode_reg (Pmode
, op0
);
18885 emit_insn (gen_sse2_clflush (op0
));
18888 case IX86_BUILTIN_MOVNTPD
:
18889 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df
, exp
);
18890 case IX86_BUILTIN_MOVNTDQ
:
18891 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di
, exp
);
18892 case IX86_BUILTIN_MOVNTI
:
18893 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi
, exp
);
18895 case IX86_BUILTIN_LOADDQU
:
18896 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu
, exp
, target
, 1);
18897 case IX86_BUILTIN_STOREDQU
:
18898 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu
, exp
);
18900 case IX86_BUILTIN_MONITOR
:
18901 arg0
= CALL_EXPR_ARG (exp
, 0);
18902 arg1
= CALL_EXPR_ARG (exp
, 1);
18903 arg2
= CALL_EXPR_ARG (exp
, 2);
18904 op0
= expand_normal (arg0
);
18905 op1
= expand_normal (arg1
);
18906 op2
= expand_normal (arg2
);
18908 op0
= copy_to_mode_reg (Pmode
, op0
);
18910 op1
= copy_to_mode_reg (SImode
, op1
);
18912 op2
= copy_to_mode_reg (SImode
, op2
);
18914 emit_insn (gen_sse3_monitor (op0
, op1
, op2
));
18916 emit_insn (gen_sse3_monitor64 (op0
, op1
, op2
));
18919 case IX86_BUILTIN_MWAIT
:
18920 arg0
= CALL_EXPR_ARG (exp
, 0);
18921 arg1
= CALL_EXPR_ARG (exp
, 1);
18922 op0
= expand_normal (arg0
);
18923 op1
= expand_normal (arg1
);
18925 op0
= copy_to_mode_reg (SImode
, op0
);
18927 op1
= copy_to_mode_reg (SImode
, op1
);
18928 emit_insn (gen_sse3_mwait (op0
, op1
));
18931 case IX86_BUILTIN_LDDQU
:
18932 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu
, exp
,
18935 case IX86_BUILTIN_PALIGNR
:
18936 case IX86_BUILTIN_PALIGNR128
:
18937 if (fcode
== IX86_BUILTIN_PALIGNR
)
18939 icode
= CODE_FOR_ssse3_palignrdi
;
18944 icode
= CODE_FOR_ssse3_palignrti
;
18947 arg0
= CALL_EXPR_ARG (exp
, 0);
18948 arg1
= CALL_EXPR_ARG (exp
, 1);
18949 arg2
= CALL_EXPR_ARG (exp
, 2);
18950 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
18951 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
18952 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
18953 tmode
= insn_data
[icode
].operand
[0].mode
;
18954 mode1
= insn_data
[icode
].operand
[1].mode
;
18955 mode2
= insn_data
[icode
].operand
[2].mode
;
18956 mode3
= insn_data
[icode
].operand
[3].mode
;
18958 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18960 op0
= copy_to_reg (op0
);
18961 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
18963 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18965 op1
= copy_to_reg (op1
);
18966 op1
= simplify_gen_subreg (mode2
, op1
, GET_MODE (op1
), 0);
18968 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
18970 error ("shift must be an immediate");
18973 target
= gen_reg_rtx (mode
);
18974 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, mode
, 0),
18981 case IX86_BUILTIN_MOVNTDQA
:
18982 return ix86_expand_unop_builtin (CODE_FOR_sse4_1_movntdqa
, exp
,
18985 case IX86_BUILTIN_MOVNTSD
:
18986 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df
, exp
);
18988 case IX86_BUILTIN_MOVNTSS
:
18989 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf
, exp
);
18991 case IX86_BUILTIN_INSERTQ
:
18992 case IX86_BUILTIN_EXTRQ
:
18993 icode
= (fcode
== IX86_BUILTIN_EXTRQ
18994 ? CODE_FOR_sse4a_extrq
18995 : CODE_FOR_sse4a_insertq
);
18996 arg0
= CALL_EXPR_ARG (exp
, 0);
18997 arg1
= CALL_EXPR_ARG (exp
, 1);
18998 op0
= expand_normal (arg0
);
18999 op1
= expand_normal (arg1
);
19000 tmode
= insn_data
[icode
].operand
[0].mode
;
19001 mode1
= insn_data
[icode
].operand
[1].mode
;
19002 mode2
= insn_data
[icode
].operand
[2].mode
;
19003 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
19004 op0
= copy_to_mode_reg (mode1
, op0
);
19005 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
19006 op1
= copy_to_mode_reg (mode2
, op1
);
19007 if (optimize
|| target
== 0
19008 || GET_MODE (target
) != tmode
19009 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19010 target
= gen_reg_rtx (tmode
);
19011 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
19017 case IX86_BUILTIN_EXTRQI
:
19018 icode
= CODE_FOR_sse4a_extrqi
;
19019 arg0
= CALL_EXPR_ARG (exp
, 0);
19020 arg1
= CALL_EXPR_ARG (exp
, 1);
19021 arg2
= CALL_EXPR_ARG (exp
, 2);
19022 op0
= expand_normal (arg0
);
19023 op1
= expand_normal (arg1
);
19024 op2
= expand_normal (arg2
);
19025 tmode
= insn_data
[icode
].operand
[0].mode
;
19026 mode1
= insn_data
[icode
].operand
[1].mode
;
19027 mode2
= insn_data
[icode
].operand
[2].mode
;
19028 mode3
= insn_data
[icode
].operand
[3].mode
;
19029 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
19030 op0
= copy_to_mode_reg (mode1
, op0
);
19031 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
19033 error ("index mask must be an immediate");
19034 return gen_reg_rtx (tmode
);
19036 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
19038 error ("length mask must be an immediate");
19039 return gen_reg_rtx (tmode
);
19041 if (optimize
|| target
== 0
19042 || GET_MODE (target
) != tmode
19043 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19044 target
= gen_reg_rtx (tmode
);
19045 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
19051 case IX86_BUILTIN_INSERTQI
:
19052 icode
= CODE_FOR_sse4a_insertqi
;
19053 arg0
= CALL_EXPR_ARG (exp
, 0);
19054 arg1
= CALL_EXPR_ARG (exp
, 1);
19055 arg2
= CALL_EXPR_ARG (exp
, 2);
19056 arg3
= CALL_EXPR_ARG (exp
, 3);
19057 op0
= expand_normal (arg0
);
19058 op1
= expand_normal (arg1
);
19059 op2
= expand_normal (arg2
);
19060 op3
= expand_normal (arg3
);
19061 tmode
= insn_data
[icode
].operand
[0].mode
;
19062 mode1
= insn_data
[icode
].operand
[1].mode
;
19063 mode2
= insn_data
[icode
].operand
[2].mode
;
19064 mode3
= insn_data
[icode
].operand
[3].mode
;
19065 mode4
= insn_data
[icode
].operand
[4].mode
;
19067 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
19068 op0
= copy_to_mode_reg (mode1
, op0
);
19070 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
19071 op1
= copy_to_mode_reg (mode2
, op1
);
19073 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
19075 error ("index mask must be an immediate");
19076 return gen_reg_rtx (tmode
);
19078 if (! (*insn_data
[icode
].operand
[4].predicate
) (op3
, mode4
))
19080 error ("length mask must be an immediate");
19081 return gen_reg_rtx (tmode
);
19083 if (optimize
|| target
== 0
19084 || GET_MODE (target
) != tmode
19085 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19086 target
= gen_reg_rtx (tmode
);
19087 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
, op3
);
19093 case IX86_BUILTIN_VEC_INIT_V2SI
:
19094 case IX86_BUILTIN_VEC_INIT_V4HI
:
19095 case IX86_BUILTIN_VEC_INIT_V8QI
:
19096 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
19098 case IX86_BUILTIN_VEC_EXT_V2DF
:
19099 case IX86_BUILTIN_VEC_EXT_V2DI
:
19100 case IX86_BUILTIN_VEC_EXT_V4SF
:
19101 case IX86_BUILTIN_VEC_EXT_V4SI
:
19102 case IX86_BUILTIN_VEC_EXT_V8HI
:
19103 case IX86_BUILTIN_VEC_EXT_V2SI
:
19104 case IX86_BUILTIN_VEC_EXT_V4HI
:
19105 case IX86_BUILTIN_VEC_EXT_V16QI
:
19106 return ix86_expand_vec_ext_builtin (exp
, target
);
19108 case IX86_BUILTIN_VEC_SET_V2DI
:
19109 case IX86_BUILTIN_VEC_SET_V4SF
:
19110 case IX86_BUILTIN_VEC_SET_V4SI
:
19111 case IX86_BUILTIN_VEC_SET_V8HI
:
19112 case IX86_BUILTIN_VEC_SET_V4HI
:
19113 case IX86_BUILTIN_VEC_SET_V16QI
:
19114 return ix86_expand_vec_set_builtin (exp
);
19120 for (i
= 0, d
= bdesc_sse_3arg
;
19121 i
< ARRAY_SIZE (bdesc_sse_3arg
);
19123 if (d
->code
== fcode
)
19124 return ix86_expand_sse_4_operands_builtin (d
->icode
, exp
,
19127 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
19128 if (d
->code
== fcode
)
19130 /* Compares are treated specially. */
19131 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
19132 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
19133 || d
->icode
== CODE_FOR_sse2_maskcmpv2df3
19134 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
19135 return ix86_expand_sse_compare (d
, exp
, target
);
19137 return ix86_expand_binop_builtin (d
->icode
, exp
, target
);
19140 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
19141 if (d
->code
== fcode
)
19142 return ix86_expand_unop_builtin (d
->icode
, exp
, target
, 0);
19144 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
19145 if (d
->code
== fcode
)
19146 return ix86_expand_sse_comi (d
, exp
, target
);
19148 for (i
= 0, d
= bdesc_ptest
; i
< ARRAY_SIZE (bdesc_ptest
); i
++, d
++)
19149 if (d
->code
== fcode
)
19150 return ix86_expand_sse_ptest (d
, exp
, target
);
19152 gcc_unreachable ();
19155 /* Returns a function decl for a vectorized version of the builtin function
19156 with builtin function code FN and the result vector type TYPE, or NULL_TREE
19157 if it is not available. */
19160 ix86_builtin_vectorized_function (enum built_in_function fn
, tree type_out
,
19163 enum machine_mode in_mode
, out_mode
;
19166 if (TREE_CODE (type_out
) != VECTOR_TYPE
19167 || TREE_CODE (type_in
) != VECTOR_TYPE
)
19170 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
19171 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
19172 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
19173 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
19177 case BUILT_IN_SQRT
:
19178 if (out_mode
== DFmode
&& out_n
== 2
19179 && in_mode
== DFmode
&& in_n
== 2)
19180 return ix86_builtins
[IX86_BUILTIN_SQRTPD
];
19183 case BUILT_IN_SQRTF
:
19184 if (out_mode
== SFmode
&& out_n
== 4
19185 && in_mode
== SFmode
&& in_n
== 4)
19186 return ix86_builtins
[IX86_BUILTIN_SQRTPS
];
19189 case BUILT_IN_LRINTF
:
19190 if (out_mode
== SImode
&& out_n
== 4
19191 && in_mode
== SFmode
&& in_n
== 4)
19192 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ
];
19202 /* Returns a decl of a function that implements conversion of the
19203 input vector of type TYPE, or NULL_TREE if it is not available. */
19206 ix86_builtin_conversion (enum tree_code code
, tree type
)
19208 if (TREE_CODE (type
) != VECTOR_TYPE
)
19214 switch (TYPE_MODE (type
))
19217 return ix86_builtins
[IX86_BUILTIN_CVTDQ2PS
];
19222 case FIX_TRUNC_EXPR
:
19223 switch (TYPE_MODE (type
))
19226 return ix86_builtins
[IX86_BUILTIN_CVTTPS2DQ
];
19236 /* Store OPERAND to the memory after reload is completed. This means
19237 that we can't easily use assign_stack_local. */
19239 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
19243 gcc_assert (reload_completed
);
19244 if (TARGET_RED_ZONE
)
19246 result
= gen_rtx_MEM (mode
,
19247 gen_rtx_PLUS (Pmode
,
19249 GEN_INT (-RED_ZONE_SIZE
)));
19250 emit_move_insn (result
, operand
);
19252 else if (!TARGET_RED_ZONE
&& TARGET_64BIT
)
19258 operand
= gen_lowpart (DImode
, operand
);
19262 gen_rtx_SET (VOIDmode
,
19263 gen_rtx_MEM (DImode
,
19264 gen_rtx_PRE_DEC (DImode
,
19265 stack_pointer_rtx
)),
19269 gcc_unreachable ();
19271 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
19280 split_di (&operand
, 1, operands
, operands
+ 1);
19282 gen_rtx_SET (VOIDmode
,
19283 gen_rtx_MEM (SImode
,
19284 gen_rtx_PRE_DEC (Pmode
,
19285 stack_pointer_rtx
)),
19288 gen_rtx_SET (VOIDmode
,
19289 gen_rtx_MEM (SImode
,
19290 gen_rtx_PRE_DEC (Pmode
,
19291 stack_pointer_rtx
)),
19296 /* Store HImodes as SImodes. */
19297 operand
= gen_lowpart (SImode
, operand
);
19301 gen_rtx_SET (VOIDmode
,
19302 gen_rtx_MEM (GET_MODE (operand
),
19303 gen_rtx_PRE_DEC (SImode
,
19304 stack_pointer_rtx
)),
19308 gcc_unreachable ();
19310 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
19315 /* Free operand from the memory. */
19317 ix86_free_from_memory (enum machine_mode mode
)
19319 if (!TARGET_RED_ZONE
)
19323 if (mode
== DImode
|| TARGET_64BIT
)
19327 /* Use LEA to deallocate stack space. In peephole2 it will be converted
19328 to pop or add instruction if registers are available. */
19329 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
19330 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
19335 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
19336 QImode must go into class Q_REGS.
19337 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
19338 movdf to do mem-to-mem moves through integer regs. */
19340 ix86_preferred_reload_class (rtx x
, enum reg_class
class)
19342 enum machine_mode mode
= GET_MODE (x
);
19344 /* We're only allowed to return a subclass of CLASS. Many of the
19345 following checks fail for NO_REGS, so eliminate that early. */
19346 if (class == NO_REGS
)
19349 /* All classes can load zeros. */
19350 if (x
== CONST0_RTX (mode
))
19353 /* Force constants into memory if we are loading a (nonzero) constant into
19354 an MMX or SSE register. This is because there are no MMX/SSE instructions
19355 to load from a constant. */
19357 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
19360 /* Prefer SSE regs only, if we can use them for math. */
19361 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
19362 return SSE_CLASS_P (class) ? class : NO_REGS
;
19364 /* Floating-point constants need more complex checks. */
19365 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
19367 /* General regs can load everything. */
19368 if (reg_class_subset_p (class, GENERAL_REGS
))
19371 /* Floats can load 0 and 1 plus some others. Note that we eliminated
19372 zero above. We only want to wind up preferring 80387 registers if
19373 we plan on doing computation with them. */
19375 && standard_80387_constant_p (x
))
19377 /* Limit class to non-sse. */
19378 if (class == FLOAT_SSE_REGS
)
19380 if (class == FP_TOP_SSE_REGS
)
19382 if (class == FP_SECOND_SSE_REGS
)
19383 return FP_SECOND_REG
;
19384 if (class == FLOAT_INT_REGS
|| class == FLOAT_REGS
)
19391 /* Generally when we see PLUS here, it's the function invariant
19392 (plus soft-fp const_int). Which can only be computed into general
19394 if (GET_CODE (x
) == PLUS
)
19395 return reg_class_subset_p (class, GENERAL_REGS
) ? class : NO_REGS
;
19397 /* QImode constants are easy to load, but non-constant QImode data
19398 must go into Q_REGS. */
19399 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
19401 if (reg_class_subset_p (class, Q_REGS
))
19403 if (reg_class_subset_p (Q_REGS
, class))
19411 /* Discourage putting floating-point values in SSE registers unless
19412 SSE math is being used, and likewise for the 387 registers. */
19414 ix86_preferred_output_reload_class (rtx x
, enum reg_class
class)
19416 enum machine_mode mode
= GET_MODE (x
);
19418 /* Restrict the output reload class to the register bank that we are doing
19419 math on. If we would like not to return a subset of CLASS, reject this
19420 alternative: if reload cannot do this, it will still use its choice. */
19421 mode
= GET_MODE (x
);
19422 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
19423 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS
: NO_REGS
;
19425 if (X87_FLOAT_MODE_P (mode
))
19427 if (class == FP_TOP_SSE_REGS
)
19429 else if (class == FP_SECOND_SSE_REGS
)
19430 return FP_SECOND_REG
;
19432 return FLOAT_CLASS_P (class) ? class : NO_REGS
;
19438 /* If we are copying between general and FP registers, we need a memory
19439 location. The same is true for SSE and MMX registers.
19441 The macro can't work reliably when one of the CLASSES is class containing
19442 registers from multiple units (SSE, MMX, integer). We avoid this by never
19443 combining those units in single alternative in the machine description.
19444 Ensure that this constraint holds to avoid unexpected surprises.
19446 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
19447 enforce these sanity checks. */
19450 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
19451 enum machine_mode mode
, int strict
)
19453 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
19454 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
19455 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
19456 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
19457 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
19458 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
19460 gcc_assert (!strict
);
19464 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
19467 /* ??? This is a lie. We do have moves between mmx/general, and for
19468 mmx/sse2. But by saying we need secondary memory we discourage the
19469 register allocator from using the mmx registers unless needed. */
19470 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
19473 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
19475 /* SSE1 doesn't have any direct moves from other classes. */
19479 /* If the target says that inter-unit moves are more expensive
19480 than moving through memory, then don't generate them. */
19481 if (!TARGET_INTER_UNIT_MOVES
)
19484 /* Between SSE and general, we have moves no larger than word size. */
19485 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
19492 /* Return true if the registers in CLASS cannot represent the change from
19493 modes FROM to TO. */
19496 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
19497 enum reg_class
class)
19502 /* x87 registers can't do subreg at all, as all values are reformatted
19503 to extended precision. */
19504 if (MAYBE_FLOAT_CLASS_P (class))
19507 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
19509 /* Vector registers do not support QI or HImode loads. If we don't
19510 disallow a change to these modes, reload will assume it's ok to
19511 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
19512 the vec_dupv4hi pattern. */
19513 if (GET_MODE_SIZE (from
) < 4)
19516 /* Vector registers do not support subreg with nonzero offsets, which
19517 are otherwise valid for integer registers. Since we can't see
19518 whether we have a nonzero offset from here, prohibit all
19519 nonparadoxical subregs changing size. */
19520 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
19527 /* Return the cost of moving data from a register in class CLASS1 to
19528 one in class CLASS2.
19530 It is not required that the cost always equal 2 when FROM is the same as TO;
19531 on some machines it is expensive to move between registers if they are not
19532 general registers. */
19535 ix86_register_move_cost (enum machine_mode mode
, enum reg_class class1
,
19536 enum reg_class class2
)
19538 /* In case we require secondary memory, compute cost of the store followed
19539 by load. In order to avoid bad register allocation choices, we need
19540 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
19542 if (ix86_secondary_memory_needed (class1
, class2
, mode
, 0))
19546 cost
+= MAX (MEMORY_MOVE_COST (mode
, class1
, 0),
19547 MEMORY_MOVE_COST (mode
, class1
, 1));
19548 cost
+= MAX (MEMORY_MOVE_COST (mode
, class2
, 0),
19549 MEMORY_MOVE_COST (mode
, class2
, 1));
19551 /* In case of copying from general_purpose_register we may emit multiple
19552 stores followed by single load causing memory size mismatch stall.
19553 Count this as arbitrarily high cost of 20. */
19554 if (CLASS_MAX_NREGS (class1
, mode
) > CLASS_MAX_NREGS (class2
, mode
))
19557 /* In the case of FP/MMX moves, the registers actually overlap, and we
19558 have to switch modes in order to treat them differently. */
19559 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
19560 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
19566 /* Moves between SSE/MMX and integer unit are expensive. */
19567 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
19568 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
19569 return ix86_cost
->mmxsse_to_integer
;
19570 if (MAYBE_FLOAT_CLASS_P (class1
))
19571 return ix86_cost
->fp_move
;
19572 if (MAYBE_SSE_CLASS_P (class1
))
19573 return ix86_cost
->sse_move
;
19574 if (MAYBE_MMX_CLASS_P (class1
))
19575 return ix86_cost
->mmx_move
;
19579 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
19582 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
19584 /* Flags and only flags can only hold CCmode values. */
19585 if (CC_REGNO_P (regno
))
19586 return GET_MODE_CLASS (mode
) == MODE_CC
;
19587 if (GET_MODE_CLASS (mode
) == MODE_CC
19588 || GET_MODE_CLASS (mode
) == MODE_RANDOM
19589 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
19591 if (FP_REGNO_P (regno
))
19592 return VALID_FP_MODE_P (mode
);
19593 if (SSE_REGNO_P (regno
))
19595 /* We implement the move patterns for all vector modes into and
19596 out of SSE registers, even when no operation instructions
19598 return (VALID_SSE_REG_MODE (mode
)
19599 || VALID_SSE2_REG_MODE (mode
)
19600 || VALID_MMX_REG_MODE (mode
)
19601 || VALID_MMX_REG_MODE_3DNOW (mode
));
19603 if (MMX_REGNO_P (regno
))
19605 /* We implement the move patterns for 3DNOW modes even in MMX mode,
19606 so if the register is available at all, then we can move data of
19607 the given mode into or out of it. */
19608 return (VALID_MMX_REG_MODE (mode
)
19609 || VALID_MMX_REG_MODE_3DNOW (mode
));
19612 if (mode
== QImode
)
19614 /* Take care for QImode values - they can be in non-QI regs,
19615 but then they do cause partial register stalls. */
19616 if (regno
< 4 || TARGET_64BIT
)
19618 if (!TARGET_PARTIAL_REG_STALL
)
19620 return reload_in_progress
|| reload_completed
;
19622 /* We handle both integer and floats in the general purpose registers. */
19623 else if (VALID_INT_MODE_P (mode
))
19625 else if (VALID_FP_MODE_P (mode
))
19627 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
19628 on to use that value in smaller contexts, this can easily force a
19629 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
19630 supporting DImode, allow it. */
19631 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
19637 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
19638 tieable integer mode. */
19641 ix86_tieable_integer_mode_p (enum machine_mode mode
)
19650 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
19653 return TARGET_64BIT
;
19660 /* Return true if MODE1 is accessible in a register that can hold MODE2
19661 without copying. That is, all register classes that can hold MODE2
19662 can also hold MODE1. */
19665 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
19667 if (mode1
== mode2
)
19670 if (ix86_tieable_integer_mode_p (mode1
)
19671 && ix86_tieable_integer_mode_p (mode2
))
19674 /* MODE2 being XFmode implies fp stack or general regs, which means we
19675 can tie any smaller floating point modes to it. Note that we do not
19676 tie this with TFmode. */
19677 if (mode2
== XFmode
)
19678 return mode1
== SFmode
|| mode1
== DFmode
;
19680 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
19681 that we can tie it with SFmode. */
19682 if (mode2
== DFmode
)
19683 return mode1
== SFmode
;
19685 /* If MODE2 is only appropriate for an SSE register, then tie with
19686 any other mode acceptable to SSE registers. */
19687 if (GET_MODE_SIZE (mode2
) == 16
19688 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
19689 return (GET_MODE_SIZE (mode1
) == 16
19690 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
19692 /* If MODE2 is appropriate for an MMX register, then tie
19693 with any other mode acceptable to MMX registers. */
19694 if (GET_MODE_SIZE (mode2
) == 8
19695 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
19696 return (GET_MODE_SIZE (mode1
) == 8
19697 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
19702 /* Return the cost of moving data of mode M between a
19703 register and memory. A value of 2 is the default; this cost is
19704 relative to those in `REGISTER_MOVE_COST'.
19706 If moving between registers and memory is more expensive than
19707 between two registers, you should define this macro to express the
19710 Model also increased moving costs of QImode registers in non
19714 ix86_memory_move_cost (enum machine_mode mode
, enum reg_class
class, int in
)
19716 if (FLOAT_CLASS_P (class))
19733 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
19735 if (SSE_CLASS_P (class))
19738 switch (GET_MODE_SIZE (mode
))
19752 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
19754 if (MMX_CLASS_P (class))
19757 switch (GET_MODE_SIZE (mode
))
19768 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
19770 switch (GET_MODE_SIZE (mode
))
19774 return (Q_CLASS_P (class) ? ix86_cost
->int_load
[0]
19775 : ix86_cost
->movzbl_load
);
19777 return (Q_CLASS_P (class) ? ix86_cost
->int_store
[0]
19778 : ix86_cost
->int_store
[0] + 4);
19781 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
19783 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
19784 if (mode
== TFmode
)
19786 return ((in
? ix86_cost
->int_load
[2] : ix86_cost
->int_store
[2])
19787 * (((int) GET_MODE_SIZE (mode
)
19788 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
19792 /* Compute a (partial) cost for rtx X. Return true if the complete
19793 cost has been computed, and false if subexpressions should be
19794 scanned. In either case, *TOTAL contains the cost result. */
19797 ix86_rtx_costs (rtx x
, int code
, int outer_code
, int *total
)
19799 enum machine_mode mode
= GET_MODE (x
);
19807 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
19809 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
19811 else if (flag_pic
&& SYMBOLIC_CONST (x
)
19813 || (!GET_CODE (x
) != LABEL_REF
19814 && (GET_CODE (x
) != SYMBOL_REF
19815 || !SYMBOL_REF_LOCAL_P (x
)))))
19822 if (mode
== VOIDmode
)
19825 switch (standard_80387_constant_p (x
))
19830 default: /* Other constants */
19835 /* Start with (MEM (SYMBOL_REF)), since that's where
19836 it'll probably end up. Add a penalty for size. */
19837 *total
= (COSTS_N_INSNS (1)
19838 + (flag_pic
!= 0 && !TARGET_64BIT
)
19839 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
19845 /* The zero extensions is often completely free on x86_64, so make
19846 it as cheap as possible. */
19847 if (TARGET_64BIT
&& mode
== DImode
19848 && GET_MODE (XEXP (x
, 0)) == SImode
)
19850 else if (TARGET_ZERO_EXTEND_WITH_AND
)
19851 *total
= ix86_cost
->add
;
19853 *total
= ix86_cost
->movzx
;
19857 *total
= ix86_cost
->movsx
;
19861 if (CONST_INT_P (XEXP (x
, 1))
19862 && (GET_MODE (XEXP (x
, 0)) != DImode
|| TARGET_64BIT
))
19864 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
19867 *total
= ix86_cost
->add
;
19870 if ((value
== 2 || value
== 3)
19871 && ix86_cost
->lea
<= ix86_cost
->shift_const
)
19873 *total
= ix86_cost
->lea
;
19883 if (!TARGET_64BIT
&& GET_MODE (XEXP (x
, 0)) == DImode
)
19885 if (CONST_INT_P (XEXP (x
, 1)))
19887 if (INTVAL (XEXP (x
, 1)) > 32)
19888 *total
= ix86_cost
->shift_const
+ COSTS_N_INSNS (2);
19890 *total
= ix86_cost
->shift_const
* 2;
19894 if (GET_CODE (XEXP (x
, 1)) == AND
)
19895 *total
= ix86_cost
->shift_var
* 2;
19897 *total
= ix86_cost
->shift_var
* 6 + COSTS_N_INSNS (2);
19902 if (CONST_INT_P (XEXP (x
, 1)))
19903 *total
= ix86_cost
->shift_const
;
19905 *total
= ix86_cost
->shift_var
;
19910 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
19912 /* ??? SSE scalar cost should be used here. */
19913 *total
= ix86_cost
->fmul
;
19916 else if (X87_FLOAT_MODE_P (mode
))
19918 *total
= ix86_cost
->fmul
;
19921 else if (FLOAT_MODE_P (mode
))
19923 /* ??? SSE vector cost should be used here. */
19924 *total
= ix86_cost
->fmul
;
19929 rtx op0
= XEXP (x
, 0);
19930 rtx op1
= XEXP (x
, 1);
19932 if (CONST_INT_P (XEXP (x
, 1)))
19934 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
19935 for (nbits
= 0; value
!= 0; value
&= value
- 1)
19939 /* This is arbitrary. */
19942 /* Compute costs correctly for widening multiplication. */
19943 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op1
) == ZERO_EXTEND
)
19944 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
19945 == GET_MODE_SIZE (mode
))
19947 int is_mulwiden
= 0;
19948 enum machine_mode inner_mode
= GET_MODE (op0
);
19950 if (GET_CODE (op0
) == GET_CODE (op1
))
19951 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
19952 else if (CONST_INT_P (op1
))
19954 if (GET_CODE (op0
) == SIGN_EXTEND
)
19955 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
19958 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
19962 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
19965 *total
= (ix86_cost
->mult_init
[MODE_INDEX (mode
)]
19966 + nbits
* ix86_cost
->mult_bit
19967 + rtx_cost (op0
, outer_code
) + rtx_cost (op1
, outer_code
));
19976 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
19977 /* ??? SSE cost should be used here. */
19978 *total
= ix86_cost
->fdiv
;
19979 else if (X87_FLOAT_MODE_P (mode
))
19980 *total
= ix86_cost
->fdiv
;
19981 else if (FLOAT_MODE_P (mode
))
19982 /* ??? SSE vector cost should be used here. */
19983 *total
= ix86_cost
->fdiv
;
19985 *total
= ix86_cost
->divide
[MODE_INDEX (mode
)];
19989 if (GET_MODE_CLASS (mode
) == MODE_INT
19990 && GET_MODE_BITSIZE (mode
) <= GET_MODE_BITSIZE (Pmode
))
19992 if (GET_CODE (XEXP (x
, 0)) == PLUS
19993 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
19994 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
19995 && CONSTANT_P (XEXP (x
, 1)))
19997 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
19998 if (val
== 2 || val
== 4 || val
== 8)
20000 *total
= ix86_cost
->lea
;
20001 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
20002 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
20004 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
20008 else if (GET_CODE (XEXP (x
, 0)) == MULT
20009 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
20011 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
20012 if (val
== 2 || val
== 4 || val
== 8)
20014 *total
= ix86_cost
->lea
;
20015 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
20016 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
20020 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
20022 *total
= ix86_cost
->lea
;
20023 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
20024 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
20025 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
20032 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
20034 /* ??? SSE cost should be used here. */
20035 *total
= ix86_cost
->fadd
;
20038 else if (X87_FLOAT_MODE_P (mode
))
20040 *total
= ix86_cost
->fadd
;
20043 else if (FLOAT_MODE_P (mode
))
20045 /* ??? SSE vector cost should be used here. */
20046 *total
= ix86_cost
->fadd
;
20054 if (!TARGET_64BIT
&& mode
== DImode
)
20056 *total
= (ix86_cost
->add
* 2
20057 + (rtx_cost (XEXP (x
, 0), outer_code
)
20058 << (GET_MODE (XEXP (x
, 0)) != DImode
))
20059 + (rtx_cost (XEXP (x
, 1), outer_code
)
20060 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
20066 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
20068 /* ??? SSE cost should be used here. */
20069 *total
= ix86_cost
->fchs
;
20072 else if (X87_FLOAT_MODE_P (mode
))
20074 *total
= ix86_cost
->fchs
;
20077 else if (FLOAT_MODE_P (mode
))
20079 /* ??? SSE vector cost should be used here. */
20080 *total
= ix86_cost
->fchs
;
20086 if (!TARGET_64BIT
&& mode
== DImode
)
20087 *total
= ix86_cost
->add
* 2;
20089 *total
= ix86_cost
->add
;
20093 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
20094 && XEXP (XEXP (x
, 0), 1) == const1_rtx
20095 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
20096 && XEXP (x
, 1) == const0_rtx
)
20098 /* This kind of construct is implemented using test[bwl].
20099 Treat it as if we had an AND. */
20100 *total
= (ix86_cost
->add
20101 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
)
20102 + rtx_cost (const1_rtx
, outer_code
));
20108 if (!(SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
))
20113 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
20114 /* ??? SSE cost should be used here. */
20115 *total
= ix86_cost
->fabs
;
20116 else if (X87_FLOAT_MODE_P (mode
))
20117 *total
= ix86_cost
->fabs
;
20118 else if (FLOAT_MODE_P (mode
))
20119 /* ??? SSE vector cost should be used here. */
20120 *total
= ix86_cost
->fabs
;
20124 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
20125 /* ??? SSE cost should be used here. */
20126 *total
= ix86_cost
->fsqrt
;
20127 else if (X87_FLOAT_MODE_P (mode
))
20128 *total
= ix86_cost
->fsqrt
;
20129 else if (FLOAT_MODE_P (mode
))
20130 /* ??? SSE vector cost should be used here. */
20131 *total
= ix86_cost
->fsqrt
;
20135 if (XINT (x
, 1) == UNSPEC_TP
)
20146 static int current_machopic_label_num
;
20148 /* Given a symbol name and its associated stub, write out the
20149 definition of the stub. */
20152 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
20154 unsigned int length
;
20155 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
20156 int label
= ++current_machopic_label_num
;
20158 /* For 64-bit we shouldn't get here. */
20159 gcc_assert (!TARGET_64BIT
);
20161 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
20162 symb
= (*targetm
.strip_name_encoding
) (symb
);
20164 length
= strlen (stub
);
20165 binder_name
= alloca (length
+ 32);
20166 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
20168 length
= strlen (symb
);
20169 symbol_name
= alloca (length
+ 32);
20170 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
20172 sprintf (lazy_ptr_name
, "L%d$lz", label
);
20175 switch_to_section (darwin_sections
[machopic_picsymbol_stub_section
]);
20177 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
20179 fprintf (file
, "%s:\n", stub
);
20180 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
20184 fprintf (file
, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label
, label
);
20185 fprintf (file
, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name
, label
);
20186 fprintf (file
, "\tjmp\t*%%edx\n");
20189 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
20191 fprintf (file
, "%s:\n", binder_name
);
20195 fprintf (file
, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name
, label
);
20196 fprintf (file
, "\tpushl\t%%eax\n");
20199 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
20201 fprintf (file
, "\tjmp\tdyld_stub_binding_helper\n");
20203 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr_section
]);
20204 fprintf (file
, "%s:\n", lazy_ptr_name
);
20205 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
20206 fprintf (file
, "\t.long %s\n", binder_name
);
20210 darwin_x86_file_end (void)
20212 darwin_file_end ();
20215 #endif /* TARGET_MACHO */
20217 /* Order the registers for register allocator. */
20220 x86_order_regs_for_local_alloc (void)
20225 /* First allocate the local general purpose registers. */
20226 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
20227 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
20228 reg_alloc_order
[pos
++] = i
;
20230 /* Global general purpose registers. */
20231 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
20232 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
20233 reg_alloc_order
[pos
++] = i
;
20235 /* x87 registers come first in case we are doing FP math
20237 if (!TARGET_SSE_MATH
)
20238 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
20239 reg_alloc_order
[pos
++] = i
;
20241 /* SSE registers. */
20242 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
20243 reg_alloc_order
[pos
++] = i
;
20244 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
20245 reg_alloc_order
[pos
++] = i
;
20247 /* x87 registers. */
20248 if (TARGET_SSE_MATH
)
20249 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
20250 reg_alloc_order
[pos
++] = i
;
20252 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
20253 reg_alloc_order
[pos
++] = i
;
20255 /* Initialize the rest of array as we do not allocate some registers
20257 while (pos
< FIRST_PSEUDO_REGISTER
)
20258 reg_alloc_order
[pos
++] = 0;
20261 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
20262 struct attribute_spec.handler. */
20264 ix86_handle_struct_attribute (tree
*node
, tree name
,
20265 tree args ATTRIBUTE_UNUSED
,
20266 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
20269 if (DECL_P (*node
))
20271 if (TREE_CODE (*node
) == TYPE_DECL
)
20272 type
= &TREE_TYPE (*node
);
20277 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
20278 || TREE_CODE (*type
) == UNION_TYPE
)))
20280 warning (OPT_Wattributes
, "%qs attribute ignored",
20281 IDENTIFIER_POINTER (name
));
20282 *no_add_attrs
= true;
20285 else if ((is_attribute_p ("ms_struct", name
)
20286 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
20287 || ((is_attribute_p ("gcc_struct", name
)
20288 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
20290 warning (OPT_Wattributes
, "%qs incompatible attribute ignored",
20291 IDENTIFIER_POINTER (name
));
20292 *no_add_attrs
= true;
20299 ix86_ms_bitfield_layout_p (tree record_type
)
20301 return (TARGET_MS_BITFIELD_LAYOUT
&&
20302 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
20303 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
));
20306 /* Returns an expression indicating where the this parameter is
20307 located on entry to the FUNCTION. */
20310 x86_this_parameter (tree function
)
20312 tree type
= TREE_TYPE (function
);
20313 bool aggr
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
20317 const int *parm_regs
;
20319 if (TARGET_64BIT_MS_ABI
)
20320 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
20322 parm_regs
= x86_64_int_parameter_registers
;
20323 return gen_rtx_REG (DImode
, parm_regs
[aggr
]);
20326 if (ix86_function_regparm (type
, function
) > 0
20327 && !type_has_variadic_args_p (type
))
20330 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
20332 return gen_rtx_REG (SImode
, regno
);
20335 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, aggr
? 8 : 4));
20338 /* Determine whether x86_output_mi_thunk can succeed. */
20341 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED
,
20342 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
20343 HOST_WIDE_INT vcall_offset
, tree function
)
20345 /* 64-bit can handle anything. */
20349 /* For 32-bit, everything's fine if we have one free register. */
20350 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
20353 /* Need a free register for vcall_offset. */
20357 /* Need a free register for GOT references. */
20358 if (flag_pic
&& !(*targetm
.binds_local_p
) (function
))
20361 /* Otherwise ok. */
20365 /* Output the assembler code for a thunk function. THUNK_DECL is the
20366 declaration for the thunk function itself, FUNCTION is the decl for
20367 the target function. DELTA is an immediate constant offset to be
20368 added to THIS. If VCALL_OFFSET is nonzero, the word at
20369 *(*this + vcall_offset) should be added to THIS. */
20372 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED
,
20373 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
20374 HOST_WIDE_INT vcall_offset
, tree function
)
20377 rtx
this = x86_this_parameter (function
);
20380 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
20381 pull it in now and let DELTA benefit. */
20384 else if (vcall_offset
)
20386 /* Put the this parameter into %eax. */
20388 xops
[1] = this_reg
= gen_rtx_REG (Pmode
, 0);
20389 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
20392 this_reg
= NULL_RTX
;
20394 /* Adjust the this parameter by a fixed constant. */
20397 xops
[0] = GEN_INT (delta
);
20398 xops
[1] = this_reg
? this_reg
: this;
20401 if (!x86_64_general_operand (xops
[0], DImode
))
20403 tmp
= gen_rtx_REG (DImode
, R10_REG
);
20405 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops
);
20409 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
20412 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
20415 /* Adjust the this parameter by a value stored in the vtable. */
20419 tmp
= gen_rtx_REG (DImode
, R10_REG
);
20422 int tmp_regno
= 2 /* ECX */;
20423 if (lookup_attribute ("fastcall",
20424 TYPE_ATTRIBUTES (TREE_TYPE (function
))))
20425 tmp_regno
= 0 /* EAX */;
20426 tmp
= gen_rtx_REG (SImode
, tmp_regno
);
20429 xops
[0] = gen_rtx_MEM (Pmode
, this_reg
);
20432 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
20434 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
20436 /* Adjust the this parameter. */
20437 xops
[0] = gen_rtx_MEM (Pmode
, plus_constant (tmp
, vcall_offset
));
20438 if (TARGET_64BIT
&& !memory_operand (xops
[0], Pmode
))
20440 rtx tmp2
= gen_rtx_REG (DImode
, R11_REG
);
20441 xops
[0] = GEN_INT (vcall_offset
);
20443 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
20444 xops
[0] = gen_rtx_MEM (Pmode
, gen_rtx_PLUS (Pmode
, tmp
, tmp2
));
20446 xops
[1] = this_reg
;
20448 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
20450 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
20453 /* If necessary, drop THIS back to its stack slot. */
20454 if (this_reg
&& this_reg
!= this)
20456 xops
[0] = this_reg
;
20458 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
20461 xops
[0] = XEXP (DECL_RTL (function
), 0);
20464 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
20465 output_asm_insn ("jmp\t%P0", xops
);
20466 /* All thunks should be in the same object as their target,
20467 and thus binds_local_p should be true. */
20468 else if (TARGET_64BIT_MS_ABI
)
20469 gcc_unreachable ();
20472 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, xops
[0]), UNSPEC_GOTPCREL
);
20473 tmp
= gen_rtx_CONST (Pmode
, tmp
);
20474 tmp
= gen_rtx_MEM (QImode
, tmp
);
20476 output_asm_insn ("jmp\t%A0", xops
);
20481 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
20482 output_asm_insn ("jmp\t%P0", xops
);
20487 rtx sym_ref
= XEXP (DECL_RTL (function
), 0);
20488 tmp
= (gen_rtx_SYMBOL_REF
20490 machopic_indirection_name (sym_ref
, /*stub_p=*/true)));
20491 tmp
= gen_rtx_MEM (QImode
, tmp
);
20493 output_asm_insn ("jmp\t%0", xops
);
20496 #endif /* TARGET_MACHO */
20498 tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
20499 output_set_got (tmp
, NULL_RTX
);
20502 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops
);
20503 output_asm_insn ("jmp\t{*}%1", xops
);
20509 x86_file_start (void)
20511 default_file_start ();
20513 darwin_file_start ();
20515 if (X86_FILE_START_VERSION_DIRECTIVE
)
20516 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
20517 if (X86_FILE_START_FLTUSED
)
20518 fputs ("\t.global\t__fltused\n", asm_out_file
);
20519 if (ix86_asm_dialect
== ASM_INTEL
)
20520 fputs ("\t.intel_syntax\n", asm_out_file
);
20524 x86_field_alignment (tree field
, int computed
)
20526 enum machine_mode mode
;
20527 tree type
= TREE_TYPE (field
);
20529 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
20531 mode
= TYPE_MODE (TREE_CODE (type
) == ARRAY_TYPE
20532 ? get_inner_array_type (type
) : type
);
20533 if (mode
== DFmode
|| mode
== DCmode
20534 || GET_MODE_CLASS (mode
) == MODE_INT
20535 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
20536 return MIN (32, computed
);
20540 /* Output assembler code to FILE to increment profiler label # LABELNO
20541 for profiling a function entry. */
20543 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
20547 #ifndef NO_PROFILE_COUNTERS
20548 fprintf (file
, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX
, labelno
);
20551 if (!TARGET_64BIT_MS_ABI
&& flag_pic
)
20552 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME
);
20554 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
20558 #ifndef NO_PROFILE_COUNTERS
20559 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
20560 LPREFIX
, labelno
, PROFILE_COUNT_REGISTER
);
20562 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME
);
20566 #ifndef NO_PROFILE_COUNTERS
20567 fprintf (file
, "\tmovl\t$%sP%d,%%%s\n", LPREFIX
, labelno
,
20568 PROFILE_COUNT_REGISTER
);
20570 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
20574 /* We don't have exact information about the insn sizes, but we may assume
20575 quite safely that we are informed about all 1 byte insns and memory
20576 address sizes. This is enough to eliminate unnecessary padding in
20580 min_insn_size (rtx insn
)
20584 if (!INSN_P (insn
) || !active_insn_p (insn
))
20587 /* Discard alignments we've emit and jump instructions. */
20588 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
20589 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
20592 && (GET_CODE (PATTERN (insn
)) == ADDR_VEC
20593 || GET_CODE (PATTERN (insn
)) == ADDR_DIFF_VEC
))
20596 /* Important case - calls are always 5 bytes.
20597 It is common to have many calls in the row. */
20599 && symbolic_reference_mentioned_p (PATTERN (insn
))
20600 && !SIBLING_CALL_P (insn
))
20602 if (get_attr_length (insn
) <= 1)
20605 /* For normal instructions we may rely on the sizes of addresses
20606 and the presence of symbol to require 4 bytes of encoding.
20607 This is not the case for jumps where references are PC relative. */
20608 if (!JUMP_P (insn
))
20610 l
= get_attr_length_address (insn
);
20611 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
20620 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
20624 ix86_avoid_jump_misspredicts (void)
20626 rtx insn
, start
= get_insns ();
20627 int nbytes
= 0, njumps
= 0;
20630 /* Look for all minimal intervals of instructions containing 4 jumps.
20631 The intervals are bounded by START and INSN. NBYTES is the total
20632 size of instructions in the interval including INSN and not including
20633 START. When the NBYTES is smaller than 16 bytes, it is possible
20634 that the end of START and INSN ends up in the same 16byte page.
20636 The smallest offset in the page INSN can start is the case where START
20637 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
20638 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
20640 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
20643 nbytes
+= min_insn_size (insn
);
20645 fprintf(dump_file
, "Insn %i estimated to %i bytes\n",
20646 INSN_UID (insn
), min_insn_size (insn
));
20648 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
20649 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
20657 start
= NEXT_INSN (start
);
20658 if ((JUMP_P (start
)
20659 && GET_CODE (PATTERN (start
)) != ADDR_VEC
20660 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
20662 njumps
--, isjump
= 1;
20665 nbytes
-= min_insn_size (start
);
20667 gcc_assert (njumps
>= 0);
20669 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
20670 INSN_UID (start
), INSN_UID (insn
), nbytes
);
20672 if (njumps
== 3 && isjump
&& nbytes
< 16)
20674 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
20677 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
20678 INSN_UID (insn
), padsize
);
20679 emit_insn_before (gen_align (GEN_INT (padsize
)), insn
);
20684 /* AMD Athlon works faster
20685 when RET is not destination of conditional jump or directly preceded
20686 by other jump instruction. We avoid the penalty by inserting NOP just
20687 before the RET instructions in such cases. */
20689 ix86_pad_returns (void)
20694 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
20696 basic_block bb
= e
->src
;
20697 rtx ret
= BB_END (bb
);
20699 bool replace
= false;
20701 if (!JUMP_P (ret
) || GET_CODE (PATTERN (ret
)) != RETURN
20702 || !maybe_hot_bb_p (bb
))
20704 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
20705 if (active_insn_p (prev
) || LABEL_P (prev
))
20707 if (prev
&& LABEL_P (prev
))
20712 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
20713 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
20714 && !(e
->flags
& EDGE_FALLTHRU
))
20719 prev
= prev_active_insn (ret
);
20721 && ((JUMP_P (prev
) && any_condjump_p (prev
))
20724 /* Empty functions get branch mispredict even when the jump destination
20725 is not visible to us. */
20726 if (!prev
&& cfun
->function_frequency
> FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
)
20731 emit_insn_before (gen_return_internal_long (), ret
);
20737 /* Implement machine specific optimizations. We implement padding of returns
20738 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
20742 if (TARGET_PAD_RETURNS
&& optimize
&& !optimize_size
)
20743 ix86_pad_returns ();
20744 if (TARGET_FOUR_JUMP_LIMIT
&& optimize
&& !optimize_size
)
20745 ix86_avoid_jump_misspredicts ();
20748 /* Return nonzero when QImode register that must be represented via REX prefix
20751 x86_extended_QIreg_mentioned_p (rtx insn
)
20754 extract_insn_cached (insn
);
20755 for (i
= 0; i
< recog_data
.n_operands
; i
++)
20756 if (REG_P (recog_data
.operand
[i
])
20757 && REGNO (recog_data
.operand
[i
]) >= 4)
20762 /* Return nonzero when P points to register encoded via REX prefix.
20763 Called via for_each_rtx. */
20765 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
20767 unsigned int regno
;
20770 regno
= REGNO (*p
);
20771 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
20774 /* Return true when INSN mentions register that must be encoded using REX
20777 x86_extended_reg_mentioned_p (rtx insn
)
20779 return for_each_rtx (&PATTERN (insn
), extended_reg_mentioned_1
, NULL
);
20782 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
20783 optabs would emit if we didn't have TFmode patterns. */
20786 x86_emit_floatuns (rtx operands
[2])
20788 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
20789 enum machine_mode mode
, inmode
;
20791 inmode
= GET_MODE (operands
[1]);
20792 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
20795 in
= force_reg (inmode
, operands
[1]);
20796 mode
= GET_MODE (out
);
20797 neglab
= gen_label_rtx ();
20798 donelab
= gen_label_rtx ();
20799 f0
= gen_reg_rtx (mode
);
20801 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
20803 expand_float (out
, in
, 0);
20805 emit_jump_insn (gen_jump (donelab
));
20808 emit_label (neglab
);
20810 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
20812 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
20814 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
20816 expand_float (f0
, i0
, 0);
20818 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
20820 emit_label (donelab
);
20823 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20824 with all elements equal to VAR. Return true if successful. */
20827 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
20828 rtx target
, rtx val
)
20830 enum machine_mode smode
, wsmode
, wvmode
;
20845 val
= force_reg (GET_MODE_INNER (mode
), val
);
20846 x
= gen_rtx_VEC_DUPLICATE (mode
, val
);
20847 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
20853 if (TARGET_SSE
|| TARGET_3DNOW_A
)
20855 val
= gen_lowpart (SImode
, val
);
20856 x
= gen_rtx_TRUNCATE (HImode
, val
);
20857 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
20858 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
20880 /* Extend HImode to SImode using a paradoxical SUBREG. */
20881 tmp1
= gen_reg_rtx (SImode
);
20882 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
20883 /* Insert the SImode value as low element of V4SImode vector. */
20884 tmp2
= gen_reg_rtx (V4SImode
);
20885 tmp1
= gen_rtx_VEC_MERGE (V4SImode
,
20886 gen_rtx_VEC_DUPLICATE (V4SImode
, tmp1
),
20887 CONST0_RTX (V4SImode
),
20889 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, tmp1
));
20890 /* Cast the V4SImode vector back to a V8HImode vector. */
20891 tmp1
= gen_reg_rtx (V8HImode
);
20892 emit_move_insn (tmp1
, gen_lowpart (V8HImode
, tmp2
));
20893 /* Duplicate the low short through the whole low SImode word. */
20894 emit_insn (gen_sse2_punpcklwd (tmp1
, tmp1
, tmp1
));
20895 /* Cast the V8HImode vector back to a V4SImode vector. */
20896 tmp2
= gen_reg_rtx (V4SImode
);
20897 emit_move_insn (tmp2
, gen_lowpart (V4SImode
, tmp1
));
20898 /* Replicate the low element of the V4SImode vector. */
20899 emit_insn (gen_sse2_pshufd (tmp2
, tmp2
, const0_rtx
));
20900 /* Cast the V2SImode back to V8HImode, and store in target. */
20901 emit_move_insn (target
, gen_lowpart (V8HImode
, tmp2
));
20912 /* Extend QImode to SImode using a paradoxical SUBREG. */
20913 tmp1
= gen_reg_rtx (SImode
);
20914 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
20915 /* Insert the SImode value as low element of V4SImode vector. */
20916 tmp2
= gen_reg_rtx (V4SImode
);
20917 tmp1
= gen_rtx_VEC_MERGE (V4SImode
,
20918 gen_rtx_VEC_DUPLICATE (V4SImode
, tmp1
),
20919 CONST0_RTX (V4SImode
),
20921 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, tmp1
));
20922 /* Cast the V4SImode vector back to a V16QImode vector. */
20923 tmp1
= gen_reg_rtx (V16QImode
);
20924 emit_move_insn (tmp1
, gen_lowpart (V16QImode
, tmp2
));
20925 /* Duplicate the low byte through the whole low SImode word. */
20926 emit_insn (gen_sse2_punpcklbw (tmp1
, tmp1
, tmp1
));
20927 emit_insn (gen_sse2_punpcklbw (tmp1
, tmp1
, tmp1
));
20928 /* Cast the V16QImode vector back to a V4SImode vector. */
20929 tmp2
= gen_reg_rtx (V4SImode
);
20930 emit_move_insn (tmp2
, gen_lowpart (V4SImode
, tmp1
));
20931 /* Replicate the low element of the V4SImode vector. */
20932 emit_insn (gen_sse2_pshufd (tmp2
, tmp2
, const0_rtx
));
20933 /* Cast the V2SImode back to V16QImode, and store in target. */
20934 emit_move_insn (target
, gen_lowpart (V16QImode
, tmp2
));
20942 /* Replicate the value once into the next wider mode and recurse. */
20943 val
= convert_modes (wsmode
, smode
, val
, true);
20944 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
20945 GEN_INT (GET_MODE_BITSIZE (smode
)),
20946 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
20947 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
20949 x
= gen_reg_rtx (wvmode
);
20950 if (!ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
))
20951 gcc_unreachable ();
20952 emit_move_insn (target
, gen_lowpart (mode
, x
));
20960 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20961 whose ONE_VAR element is VAR, and other elements are zero. Return true
20965 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
20966 rtx target
, rtx var
, int one_var
)
20968 enum machine_mode vsimode
;
20984 var
= force_reg (GET_MODE_INNER (mode
), var
);
20985 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
20986 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
20991 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
20992 new_target
= gen_reg_rtx (mode
);
20994 new_target
= target
;
20995 var
= force_reg (GET_MODE_INNER (mode
), var
);
20996 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
20997 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
20998 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
21001 /* We need to shuffle the value to the correct position, so
21002 create a new pseudo to store the intermediate result. */
21004 /* With SSE2, we can use the integer shuffle insns. */
21005 if (mode
!= V4SFmode
&& TARGET_SSE2
)
21007 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
21009 GEN_INT (one_var
== 1 ? 0 : 1),
21010 GEN_INT (one_var
== 2 ? 0 : 1),
21011 GEN_INT (one_var
== 3 ? 0 : 1)));
21012 if (target
!= new_target
)
21013 emit_move_insn (target
, new_target
);
21017 /* Otherwise convert the intermediate result to V4SFmode and
21018 use the SSE1 shuffle instructions. */
21019 if (mode
!= V4SFmode
)
21021 tmp
= gen_reg_rtx (V4SFmode
);
21022 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
21027 emit_insn (gen_sse_shufps_1 (tmp
, tmp
, tmp
,
21029 GEN_INT (one_var
== 1 ? 0 : 1),
21030 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
21031 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
21033 if (mode
!= V4SFmode
)
21034 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
21035 else if (tmp
!= target
)
21036 emit_move_insn (target
, tmp
);
21038 else if (target
!= new_target
)
21039 emit_move_insn (target
, new_target
);
21044 vsimode
= V4SImode
;
21050 vsimode
= V2SImode
;
21056 /* Zero extend the variable element to SImode and recurse. */
21057 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
21059 x
= gen_reg_rtx (vsimode
);
21060 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
21062 gcc_unreachable ();
21064 emit_move_insn (target
, gen_lowpart (mode
, x
));
21072 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
21073 consisting of the values in VALS. It is known that all elements
21074 except ONE_VAR are constants. Return true if successful. */
21077 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
21078 rtx target
, rtx vals
, int one_var
)
21080 rtx var
= XVECEXP (vals
, 0, one_var
);
21081 enum machine_mode wmode
;
21084 const_vec
= copy_rtx (vals
);
21085 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
21086 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
21094 /* For the two element vectors, it's just as easy to use
21095 the general case. */
21111 /* There's no way to set one QImode entry easily. Combine
21112 the variable value with its adjacent constant value, and
21113 promote to an HImode set. */
21114 x
= XVECEXP (vals
, 0, one_var
^ 1);
21117 var
= convert_modes (HImode
, QImode
, var
, true);
21118 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
21119 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
21120 x
= GEN_INT (INTVAL (x
) & 0xff);
21124 var
= convert_modes (HImode
, QImode
, var
, true);
21125 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
21127 if (x
!= const0_rtx
)
21128 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
21129 1, OPTAB_LIB_WIDEN
);
21131 x
= gen_reg_rtx (wmode
);
21132 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
21133 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
21135 emit_move_insn (target
, gen_lowpart (mode
, x
));
21142 emit_move_insn (target
, const_vec
);
21143 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
21147 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
21148 all values variable, and none identical. */
21151 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
21152 rtx target
, rtx vals
)
21154 enum machine_mode half_mode
= GET_MODE_INNER (mode
);
21155 rtx op0
= NULL
, op1
= NULL
;
21156 bool use_vec_concat
= false;
21162 if (!mmx_ok
&& !TARGET_SSE
)
21168 /* For the two element vectors, we always implement VEC_CONCAT. */
21169 op0
= XVECEXP (vals
, 0, 0);
21170 op1
= XVECEXP (vals
, 0, 1);
21171 use_vec_concat
= true;
21175 half_mode
= V2SFmode
;
21178 half_mode
= V2SImode
;
21184 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
21185 Recurse to load the two halves. */
21187 op0
= gen_reg_rtx (half_mode
);
21188 v
= gen_rtvec (2, XVECEXP (vals
, 0, 0), XVECEXP (vals
, 0, 1));
21189 ix86_expand_vector_init (false, op0
, gen_rtx_PARALLEL (half_mode
, v
));
21191 op1
= gen_reg_rtx (half_mode
);
21192 v
= gen_rtvec (2, XVECEXP (vals
, 0, 2), XVECEXP (vals
, 0, 3));
21193 ix86_expand_vector_init (false, op1
, gen_rtx_PARALLEL (half_mode
, v
));
21195 use_vec_concat
= true;
21206 gcc_unreachable ();
21209 if (use_vec_concat
)
21211 if (!register_operand (op0
, half_mode
))
21212 op0
= force_reg (half_mode
, op0
);
21213 if (!register_operand (op1
, half_mode
))
21214 op1
= force_reg (half_mode
, op1
);
21216 emit_insn (gen_rtx_SET (VOIDmode
, target
,
21217 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
21221 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
21222 enum machine_mode inner_mode
;
21223 rtx words
[4], shift
;
21225 inner_mode
= GET_MODE_INNER (mode
);
21226 n_elts
= GET_MODE_NUNITS (mode
);
21227 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
21228 n_elt_per_word
= n_elts
/ n_words
;
21229 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
21231 for (i
= 0; i
< n_words
; ++i
)
21233 rtx word
= NULL_RTX
;
21235 for (j
= 0; j
< n_elt_per_word
; ++j
)
21237 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
21238 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
21244 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
21245 word
, 1, OPTAB_LIB_WIDEN
);
21246 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
21247 word
, 1, OPTAB_LIB_WIDEN
);
21255 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
21256 else if (n_words
== 2)
21258 rtx tmp
= gen_reg_rtx (mode
);
21259 emit_insn (gen_rtx_CLOBBER (VOIDmode
, tmp
));
21260 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
21261 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
21262 emit_move_insn (target
, tmp
);
21264 else if (n_words
== 4)
21266 rtx tmp
= gen_reg_rtx (V4SImode
);
21267 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
21268 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
21269 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
21272 gcc_unreachable ();
21276 /* Initialize vector TARGET via VALS. Suppress the use of MMX
21277 instructions unless MMX_OK is true. */
21280 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
21282 enum machine_mode mode
= GET_MODE (target
);
21283 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
21284 int n_elts
= GET_MODE_NUNITS (mode
);
21285 int n_var
= 0, one_var
= -1;
21286 bool all_same
= true, all_const_zero
= true;
21290 for (i
= 0; i
< n_elts
; ++i
)
21292 x
= XVECEXP (vals
, 0, i
);
21293 if (!CONSTANT_P (x
))
21294 n_var
++, one_var
= i
;
21295 else if (x
!= CONST0_RTX (inner_mode
))
21296 all_const_zero
= false;
21297 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
21301 /* Constants are best loaded from the constant pool. */
21304 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
21308 /* If all values are identical, broadcast the value. */
21310 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
21311 XVECEXP (vals
, 0, 0)))
21314 /* Values where only one field is non-constant are best loaded from
21315 the pool and overwritten via move later. */
21319 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
21320 XVECEXP (vals
, 0, one_var
),
21324 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
21328 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
21332 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
21334 enum machine_mode mode
= GET_MODE (target
);
21335 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
21336 bool use_vec_merge
= false;
21345 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
21346 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
21348 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
21350 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
21351 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
21357 use_vec_merge
= TARGET_SSE4_1
;
21365 /* For the two element vectors, we implement a VEC_CONCAT with
21366 the extraction of the other element. */
21368 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
21369 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
21372 op0
= val
, op1
= tmp
;
21374 op0
= tmp
, op1
= val
;
21376 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
21377 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
21382 use_vec_merge
= TARGET_SSE4_1
;
21389 use_vec_merge
= true;
21393 /* tmp = target = A B C D */
21394 tmp
= copy_to_reg (target
);
21395 /* target = A A B B */
21396 emit_insn (gen_sse_unpcklps (target
, target
, target
));
21397 /* target = X A B B */
21398 ix86_expand_vector_set (false, target
, val
, 0);
21399 /* target = A X C D */
21400 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
21401 GEN_INT (1), GEN_INT (0),
21402 GEN_INT (2+4), GEN_INT (3+4)));
21406 /* tmp = target = A B C D */
21407 tmp
= copy_to_reg (target
);
21408 /* tmp = X B C D */
21409 ix86_expand_vector_set (false, tmp
, val
, 0);
21410 /* target = A B X D */
21411 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
21412 GEN_INT (0), GEN_INT (1),
21413 GEN_INT (0+4), GEN_INT (3+4)));
21417 /* tmp = target = A B C D */
21418 tmp
= copy_to_reg (target
);
21419 /* tmp = X B C D */
21420 ix86_expand_vector_set (false, tmp
, val
, 0);
21421 /* target = A B X D */
21422 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
21423 GEN_INT (0), GEN_INT (1),
21424 GEN_INT (2+4), GEN_INT (0+4)));
21428 gcc_unreachable ();
21433 use_vec_merge
= TARGET_SSE4_1
;
21437 /* Element 0 handled by vec_merge below. */
21440 use_vec_merge
= true;
21446 /* With SSE2, use integer shuffles to swap element 0 and ELT,
21447 store into element 0, then shuffle them back. */
21451 order
[0] = GEN_INT (elt
);
21452 order
[1] = const1_rtx
;
21453 order
[2] = const2_rtx
;
21454 order
[3] = GEN_INT (3);
21455 order
[elt
] = const0_rtx
;
21457 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
21458 order
[1], order
[2], order
[3]));
21460 ix86_expand_vector_set (false, target
, val
, 0);
21462 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
21463 order
[1], order
[2], order
[3]));
21467 /* For SSE1, we have to reuse the V4SF code. */
21468 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
21469 gen_lowpart (SFmode
, val
), elt
);
21474 use_vec_merge
= TARGET_SSE2
;
21477 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
21481 use_vec_merge
= TARGET_SSE4_1
;
21491 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
21492 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
21493 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
21497 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
21499 emit_move_insn (mem
, target
);
21501 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
21502 emit_move_insn (tmp
, val
);
21504 emit_move_insn (target
, mem
);
21509 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
21511 enum machine_mode mode
= GET_MODE (vec
);
21512 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
21513 bool use_vec_extr
= false;
21526 use_vec_extr
= true;
21530 use_vec_extr
= TARGET_SSE4_1
;
21542 tmp
= gen_reg_rtx (mode
);
21543 emit_insn (gen_sse_shufps_1 (tmp
, vec
, vec
,
21544 GEN_INT (elt
), GEN_INT (elt
),
21545 GEN_INT (elt
+4), GEN_INT (elt
+4)));
21549 tmp
= gen_reg_rtx (mode
);
21550 emit_insn (gen_sse_unpckhps (tmp
, vec
, vec
));
21554 gcc_unreachable ();
21557 use_vec_extr
= true;
21562 use_vec_extr
= TARGET_SSE4_1
;
21576 tmp
= gen_reg_rtx (mode
);
21577 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
21578 GEN_INT (elt
), GEN_INT (elt
),
21579 GEN_INT (elt
), GEN_INT (elt
)));
21583 tmp
= gen_reg_rtx (mode
);
21584 emit_insn (gen_sse2_punpckhdq (tmp
, vec
, vec
));
21588 gcc_unreachable ();
21591 use_vec_extr
= true;
21596 /* For SSE1, we have to reuse the V4SF code. */
21597 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
21598 gen_lowpart (V4SFmode
, vec
), elt
);
21604 use_vec_extr
= TARGET_SSE2
;
21607 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
21611 use_vec_extr
= TARGET_SSE4_1
;
21615 /* ??? Could extract the appropriate HImode element and shift. */
21622 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
21623 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
21625 /* Let the rtl optimizers know about the zero extension performed. */
21626 if (inner_mode
== QImode
|| inner_mode
== HImode
)
21628 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
21629 target
= gen_lowpart (SImode
, target
);
21632 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
21636 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
21638 emit_move_insn (mem
, vec
);
21640 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
21641 emit_move_insn (target
, tmp
);
21645 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
21646 pattern to reduce; DEST is the destination; IN is the input vector. */
21649 ix86_expand_reduc_v4sf (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
21651 rtx tmp1
, tmp2
, tmp3
;
21653 tmp1
= gen_reg_rtx (V4SFmode
);
21654 tmp2
= gen_reg_rtx (V4SFmode
);
21655 tmp3
= gen_reg_rtx (V4SFmode
);
21657 emit_insn (gen_sse_movhlps (tmp1
, in
, in
));
21658 emit_insn (fn (tmp2
, tmp1
, in
));
21660 emit_insn (gen_sse_shufps_1 (tmp3
, tmp2
, tmp2
,
21661 GEN_INT (1), GEN_INT (1),
21662 GEN_INT (1+4), GEN_INT (1+4)));
21663 emit_insn (fn (dest
, tmp2
, tmp3
));
21666 /* Target hook for scalar_mode_supported_p. */
21668 ix86_scalar_mode_supported_p (enum machine_mode mode
)
21670 if (DECIMAL_FLOAT_MODE_P (mode
))
21672 else if (mode
== TFmode
)
21673 return TARGET_64BIT
;
21675 return default_scalar_mode_supported_p (mode
);
21678 /* Implements target hook vector_mode_supported_p. */
21680 ix86_vector_mode_supported_p (enum machine_mode mode
)
21682 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
21684 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
21686 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
21688 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
21693 /* Worker function for TARGET_MD_ASM_CLOBBERS.
21695 We do this in the new i386 backend to maintain source compatibility
21696 with the old cc0-based compiler. */
21699 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
21700 tree inputs ATTRIBUTE_UNUSED
,
21703 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
21705 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
21710 /* Implements target vector targetm.asm.encode_section_info. This
21711 is not used by netware. */
21713 static void ATTRIBUTE_UNUSED
21714 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
21716 default_encode_section_info (decl
, rtl
, first
);
21718 if (TREE_CODE (decl
) == VAR_DECL
21719 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
21720 && ix86_in_large_data_p (decl
))
21721 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
21724 /* Worker function for REVERSE_CONDITION. */
21727 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
21729 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
21730 ? reverse_condition (code
)
21731 : reverse_condition_maybe_unordered (code
));
21734 /* Output code to perform an x87 FP register move, from OPERANDS[1]
21738 output_387_reg_move (rtx insn
, rtx
*operands
)
21740 if (REG_P (operands
[0]))
21742 if (REG_P (operands
[1])
21743 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
21745 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
21746 return output_387_ffreep (operands
, 0);
21747 return "fstp\t%y0";
21749 if (STACK_TOP_P (operands
[0]))
21750 return "fld%z1\t%y1";
21753 else if (MEM_P (operands
[0]))
21755 gcc_assert (REG_P (operands
[1]));
21756 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
21757 return "fstp%z0\t%y0";
21760 /* There is no non-popping store to memory for XFmode.
21761 So if we need one, follow the store with a load. */
21762 if (GET_MODE (operands
[0]) == XFmode
)
21763 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
21765 return "fst%z0\t%y0";
21772 /* Output code to perform a conditional jump to LABEL, if C2 flag in
21773 FP status register is set. */
21776 ix86_emit_fp_unordered_jump (rtx label
)
21778 rtx reg
= gen_reg_rtx (HImode
);
21781 emit_insn (gen_x86_fnstsw_1 (reg
));
21783 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_size
))
21785 emit_insn (gen_x86_sahf_1 (reg
));
21787 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
21788 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
21792 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
21794 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
21795 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
21798 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
21799 gen_rtx_LABEL_REF (VOIDmode
, label
),
21801 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
21803 emit_jump_insn (temp
);
21804 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
21807 /* Output code to perform a log1p XFmode calculation. */
21809 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
21811 rtx label1
= gen_label_rtx ();
21812 rtx label2
= gen_label_rtx ();
21814 rtx tmp
= gen_reg_rtx (XFmode
);
21815 rtx tmp2
= gen_reg_rtx (XFmode
);
21817 emit_insn (gen_absxf2 (tmp
, op1
));
21818 emit_insn (gen_cmpxf (tmp
,
21819 CONST_DOUBLE_FROM_REAL_VALUE (
21820 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
21822 emit_jump_insn (gen_bge (label1
));
21824 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
21825 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
21826 emit_jump (label2
);
21828 emit_label (label1
);
21829 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
21830 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
21831 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
21832 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
21834 emit_label (label2
);
21837 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
21839 static void ATTRIBUTE_UNUSED
21840 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
21843 /* With Binutils 2.15, the "@unwind" marker must be specified on
21844 every occurrence of the ".eh_frame" section, not just the first
21847 && strcmp (name
, ".eh_frame") == 0)
21849 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
21850 flags
& SECTION_WRITE
? "aw" : "a");
21853 default_elf_asm_named_section (name
, flags
, decl
);
21856 /* Return the mangling of TYPE if it is an extended fundamental type. */
21858 static const char *
21859 ix86_mangle_fundamental_type (tree type
)
21861 switch (TYPE_MODE (type
))
21864 /* __float128 is "g". */
21867 /* "long double" or __float80 is "e". */
21874 /* For 32-bit code we can save PIC register setup by using
21875 __stack_chk_fail_local hidden function instead of calling
21876 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
21877 register, so it is better to call __stack_chk_fail directly. */
21880 ix86_stack_protect_fail (void)
21882 return TARGET_64BIT
21883 ? default_external_stack_protect_fail ()
21884 : default_hidden_stack_protect_fail ();
21887 /* Select a format to encode pointers in exception handling data. CODE
21888 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
21889 true if the symbol may be affected by dynamic relocations.
21891 ??? All x86 object file formats are capable of representing this.
21892 After all, the relocation needed is the same as for the call insn.
21893 Whether or not a particular assembler allows us to enter such, I
21894 guess we'll have to see. */
21896 asm_preferred_eh_data_format (int code
, int global
)
21900 int type
= DW_EH_PE_sdata8
;
21902 || ix86_cmodel
== CM_SMALL_PIC
21903 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
21904 type
= DW_EH_PE_sdata4
;
21905 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
21907 if (ix86_cmodel
== CM_SMALL
21908 || (ix86_cmodel
== CM_MEDIUM
&& code
))
21909 return DW_EH_PE_udata4
;
21910 return DW_EH_PE_absptr
;
21913 /* Expand copysign from SIGN to the positive value ABS_VALUE
21914 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
21917 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
21919 enum machine_mode mode
= GET_MODE (sign
);
21920 rtx sgn
= gen_reg_rtx (mode
);
21921 if (mask
== NULL_RTX
)
21923 mask
= ix86_build_signbit_mask (mode
, VECTOR_MODE_P (mode
), false);
21924 if (!VECTOR_MODE_P (mode
))
21926 /* We need to generate a scalar mode mask in this case. */
21927 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
21928 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
21929 mask
= gen_reg_rtx (mode
);
21930 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
21934 mask
= gen_rtx_NOT (mode
, mask
);
21935 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
21936 gen_rtx_AND (mode
, mask
, sign
)));
21937 emit_insn (gen_rtx_SET (VOIDmode
, result
,
21938 gen_rtx_IOR (mode
, abs_value
, sgn
)));
21941 /* Expand fabs (OP0) and return a new rtx that holds the result. The
21942 mask for masking out the sign-bit is stored in *SMASK, if that is
21945 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
21947 enum machine_mode mode
= GET_MODE (op0
);
21950 xa
= gen_reg_rtx (mode
);
21951 mask
= ix86_build_signbit_mask (mode
, VECTOR_MODE_P (mode
), true);
21952 if (!VECTOR_MODE_P (mode
))
21954 /* We need to generate a scalar mode mask in this case. */
21955 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
21956 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
21957 mask
= gen_reg_rtx (mode
);
21958 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
21960 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
21961 gen_rtx_AND (mode
, op0
, mask
)));
21969 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
21970 swapping the operands if SWAP_OPERANDS is true. The expanded
21971 code is a forward jump to a newly created label in case the
21972 comparison is true. The generated label rtx is returned. */
21974 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
21975 bool swap_operands
)
21986 label
= gen_label_rtx ();
21987 tmp
= gen_rtx_REG (CCFPUmode
, FLAGS_REG
);
21988 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21989 gen_rtx_COMPARE (CCFPUmode
, op0
, op1
)));
21990 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
21991 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
21992 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
21993 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
21994 JUMP_LABEL (tmp
) = label
;
21999 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
22000 using comparison code CODE. Operands are swapped for the comparison if
22001 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
22003 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
22004 bool swap_operands
)
22006 enum machine_mode mode
= GET_MODE (op0
);
22007 rtx mask
= gen_reg_rtx (mode
);
22016 if (mode
== DFmode
)
22017 emit_insn (gen_sse2_maskcmpdf3 (mask
, op0
, op1
,
22018 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
22020 emit_insn (gen_sse_maskcmpsf3 (mask
, op0
, op1
,
22021 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
22026 /* Generate and return a rtx of mode MODE for 2**n where n is the number
22027 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
22029 ix86_gen_TWO52 (enum machine_mode mode
)
22031 REAL_VALUE_TYPE TWO52r
;
22034 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
22035 TWO52
= const_double_from_real_value (TWO52r
, mode
);
22036 TWO52
= force_reg (mode
, TWO52
);
22041 /* Expand SSE sequence for computing lround from OP1 storing
22044 ix86_expand_lround (rtx op0
, rtx op1
)
22046 /* C code for the stuff we're doing below:
22047 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
22050 enum machine_mode mode
= GET_MODE (op1
);
22051 const struct real_format
*fmt
;
22052 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
22055 /* load nextafter (0.5, 0.0) */
22056 fmt
= REAL_MODE_FORMAT (mode
);
22057 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1);
22058 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
22060 /* adj = copysign (0.5, op1) */
22061 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
22062 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
22064 /* adj = op1 + adj */
22065 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
22067 /* op0 = (imode)adj */
22068 expand_fix (op0
, adj
, 0);
22071 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
22074 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
22076 /* C code for the stuff we're doing below (for do_floor):
22078 xi -= (double)xi > op1 ? 1 : 0;
22081 enum machine_mode fmode
= GET_MODE (op1
);
22082 enum machine_mode imode
= GET_MODE (op0
);
22083 rtx ireg
, freg
, label
, tmp
;
22085 /* reg = (long)op1 */
22086 ireg
= gen_reg_rtx (imode
);
22087 expand_fix (ireg
, op1
, 0);
22089 /* freg = (double)reg */
22090 freg
= gen_reg_rtx (fmode
);
22091 expand_float (freg
, ireg
, 0);
22093 /* ireg = (freg > op1) ? ireg - 1 : ireg */
22094 label
= ix86_expand_sse_compare_and_jump (UNLE
,
22095 freg
, op1
, !do_floor
);
22096 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
22097 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
22098 emit_move_insn (ireg
, tmp
);
22100 emit_label (label
);
22101 LABEL_NUSES (label
) = 1;
22103 emit_move_insn (op0
, ireg
);
22106 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
22107 result in OPERAND0. */
22109 ix86_expand_rint (rtx operand0
, rtx operand1
)
22111 /* C code for the stuff we're doing below:
22112 xa = fabs (operand1);
22113 if (!isless (xa, 2**52))
22115 xa = xa + 2**52 - 2**52;
22116 return copysign (xa, operand1);
22118 enum machine_mode mode
= GET_MODE (operand0
);
22119 rtx res
, xa
, label
, TWO52
, mask
;
22121 res
= gen_reg_rtx (mode
);
22122 emit_move_insn (res
, operand1
);
22124 /* xa = abs (operand1) */
22125 xa
= ix86_expand_sse_fabs (res
, &mask
);
22127 /* if (!isless (xa, TWO52)) goto label; */
22128 TWO52
= ix86_gen_TWO52 (mode
);
22129 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
22131 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
22132 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
22134 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
22136 emit_label (label
);
22137 LABEL_NUSES (label
) = 1;
22139 emit_move_insn (operand0
, res
);
22142 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
22145 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
22147 /* C code for the stuff we expand below.
22148 double xa = fabs (x), x2;
22149 if (!isless (xa, TWO52))
22151 xa = xa + TWO52 - TWO52;
22152 x2 = copysign (xa, x);
22161 enum machine_mode mode
= GET_MODE (operand0
);
22162 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
22164 TWO52
= ix86_gen_TWO52 (mode
);
22166 /* Temporary for holding the result, initialized to the input
22167 operand to ease control flow. */
22168 res
= gen_reg_rtx (mode
);
22169 emit_move_insn (res
, operand1
);
22171 /* xa = abs (operand1) */
22172 xa
= ix86_expand_sse_fabs (res
, &mask
);
22174 /* if (!isless (xa, TWO52)) goto label; */
22175 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
22177 /* xa = xa + TWO52 - TWO52; */
22178 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
22179 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
22181 /* xa = copysign (xa, operand1) */
22182 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
22184 /* generate 1.0 or -1.0 */
22185 one
= force_reg (mode
,
22186 const_double_from_real_value (do_floor
22187 ? dconst1
: dconstm1
, mode
));
22189 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
22190 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
22191 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
22192 gen_rtx_AND (mode
, one
, tmp
)));
22193 /* We always need to subtract here to preserve signed zero. */
22194 tmp
= expand_simple_binop (mode
, MINUS
,
22195 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
22196 emit_move_insn (res
, tmp
);
22198 emit_label (label
);
22199 LABEL_NUSES (label
) = 1;
22201 emit_move_insn (operand0
, res
);
22204 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
22207 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
22209 /* C code for the stuff we expand below.
22210 double xa = fabs (x), x2;
22211 if (!isless (xa, TWO52))
22213 x2 = (double)(long)x;
22220 if (HONOR_SIGNED_ZEROS (mode))
22221 return copysign (x2, x);
22224 enum machine_mode mode
= GET_MODE (operand0
);
22225 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
22227 TWO52
= ix86_gen_TWO52 (mode
);
22229 /* Temporary for holding the result, initialized to the input
22230 operand to ease control flow. */
22231 res
= gen_reg_rtx (mode
);
22232 emit_move_insn (res
, operand1
);
22234 /* xa = abs (operand1) */
22235 xa
= ix86_expand_sse_fabs (res
, &mask
);
22237 /* if (!isless (xa, TWO52)) goto label; */
22238 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
22240 /* xa = (double)(long)x */
22241 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
22242 expand_fix (xi
, res
, 0);
22243 expand_float (xa
, xi
, 0);
22246 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
22248 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
22249 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
22250 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
22251 gen_rtx_AND (mode
, one
, tmp
)));
22252 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
22253 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
22254 emit_move_insn (res
, tmp
);
22256 if (HONOR_SIGNED_ZEROS (mode
))
22257 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
22259 emit_label (label
);
22260 LABEL_NUSES (label
) = 1;
22262 emit_move_insn (operand0
, res
);
22265 /* Expand SSE sequence for computing round from OPERAND1 storing
22266 into OPERAND0. Sequence that works without relying on DImode truncation
22267 via cvttsd2siq that is only available on 64bit targets. */
22269 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
22271 /* C code for the stuff we expand below.
22272 double xa = fabs (x), xa2, x2;
22273 if (!isless (xa, TWO52))
22275 Using the absolute value and copying back sign makes
22276 -0.0 -> -0.0 correct.
22277 xa2 = xa + TWO52 - TWO52;
22282 else if (dxa > 0.5)
22284 x2 = copysign (xa2, x);
22287 enum machine_mode mode
= GET_MODE (operand0
);
22288 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
22290 TWO52
= ix86_gen_TWO52 (mode
);
22292 /* Temporary for holding the result, initialized to the input
22293 operand to ease control flow. */
22294 res
= gen_reg_rtx (mode
);
22295 emit_move_insn (res
, operand1
);
22297 /* xa = abs (operand1) */
22298 xa
= ix86_expand_sse_fabs (res
, &mask
);
22300 /* if (!isless (xa, TWO52)) goto label; */
22301 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
22303 /* xa2 = xa + TWO52 - TWO52; */
22304 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
22305 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
22307 /* dxa = xa2 - xa; */
22308 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
22310 /* generate 0.5, 1.0 and -0.5 */
22311 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
22312 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
22313 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
22317 tmp
= gen_reg_rtx (mode
);
22318 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
22319 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
22320 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
22321 gen_rtx_AND (mode
, one
, tmp
)));
22322 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
22323 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
22324 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
22325 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
22326 gen_rtx_AND (mode
, one
, tmp
)));
22327 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
22329 /* res = copysign (xa2, operand1) */
22330 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
22332 emit_label (label
);
22333 LABEL_NUSES (label
) = 1;
22335 emit_move_insn (operand0
, res
);
22338 /* Expand SSE sequence for computing trunc from OPERAND1 storing
22341 ix86_expand_trunc (rtx operand0
, rtx operand1
)
22343 /* C code for SSE variant we expand below.
22344 double xa = fabs (x), x2;
22345 if (!isless (xa, TWO52))
22347 x2 = (double)(long)x;
22348 if (HONOR_SIGNED_ZEROS (mode))
22349 return copysign (x2, x);
22352 enum machine_mode mode
= GET_MODE (operand0
);
22353 rtx xa
, xi
, TWO52
, label
, res
, mask
;
22355 TWO52
= ix86_gen_TWO52 (mode
);
22357 /* Temporary for holding the result, initialized to the input
22358 operand to ease control flow. */
22359 res
= gen_reg_rtx (mode
);
22360 emit_move_insn (res
, operand1
);
22362 /* xa = abs (operand1) */
22363 xa
= ix86_expand_sse_fabs (res
, &mask
);
22365 /* if (!isless (xa, TWO52)) goto label; */
22366 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
22368 /* x = (double)(long)x */
22369 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
22370 expand_fix (xi
, res
, 0);
22371 expand_float (res
, xi
, 0);
22373 if (HONOR_SIGNED_ZEROS (mode
))
22374 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
22376 emit_label (label
);
22377 LABEL_NUSES (label
) = 1;
22379 emit_move_insn (operand0
, res
);
22382 /* Expand SSE sequence for computing trunc from OPERAND1 storing
22385 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
22387 enum machine_mode mode
= GET_MODE (operand0
);
22388 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
, tmp
;
22390 /* C code for SSE variant we expand below.
22391 double xa = fabs (x), x2;
22392 if (!isless (xa, TWO52))
22394 xa2 = xa + TWO52 - TWO52;
22398 x2 = copysign (xa2, x);
22402 TWO52
= ix86_gen_TWO52 (mode
);
22404 /* Temporary for holding the result, initialized to the input
22405 operand to ease control flow. */
22406 res
= gen_reg_rtx (mode
);
22407 emit_move_insn (res
, operand1
);
22409 /* xa = abs (operand1) */
22410 xa
= ix86_expand_sse_fabs (res
, &smask
);
22412 /* if (!isless (xa, TWO52)) goto label; */
22413 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
22415 /* res = xa + TWO52 - TWO52; */
22416 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
22417 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
22418 emit_move_insn (res
, tmp
);
22421 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
22423 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
22424 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
22425 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
22426 gen_rtx_AND (mode
, mask
, one
)));
22427 tmp
= expand_simple_binop (mode
, MINUS
,
22428 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
22429 emit_move_insn (res
, tmp
);
22431 /* res = copysign (res, operand1) */
22432 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
22434 emit_label (label
);
22435 LABEL_NUSES (label
) = 1;
22437 emit_move_insn (operand0
, res
);
22440 /* Expand SSE sequence for computing round from OPERAND1 storing
22443 ix86_expand_round (rtx operand0
, rtx operand1
)
22445 /* C code for the stuff we're doing below:
22446 double xa = fabs (x);
22447 if (!isless (xa, TWO52))
22449 xa = (double)(long)(xa + nextafter (0.5, 0.0));
22450 return copysign (xa, x);
22452 enum machine_mode mode
= GET_MODE (operand0
);
22453 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
22454 const struct real_format
*fmt
;
22455 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
22457 /* Temporary for holding the result, initialized to the input
22458 operand to ease control flow. */
22459 res
= gen_reg_rtx (mode
);
22460 emit_move_insn (res
, operand1
);
22462 TWO52
= ix86_gen_TWO52 (mode
);
22463 xa
= ix86_expand_sse_fabs (res
, &mask
);
22464 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
22466 /* load nextafter (0.5, 0.0) */
22467 fmt
= REAL_MODE_FORMAT (mode
);
22468 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1);
22469 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
22471 /* xa = xa + 0.5 */
22472 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
22473 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
22475 /* xa = (double)(int64_t)xa */
22476 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
22477 expand_fix (xi
, xa
, 0);
22478 expand_float (xa
, xi
, 0);
22480 /* res = copysign (xa, operand1) */
22481 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
22483 emit_label (label
);
22484 LABEL_NUSES (label
) = 1;
22486 emit_move_insn (operand0
, res
);
22490 /* Table of valid machine attributes. */
22491 static const struct attribute_spec ix86_attribute_table
[] =
22493 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
22494 /* Stdcall attribute says callee is responsible for popping arguments
22495 if they are not variable. */
22496 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
22497 /* Fastcall attribute says callee is responsible for popping arguments
22498 if they are not variable. */
22499 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
22500 /* Cdecl attribute says the callee is a normal C declaration */
22501 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
22502 /* Regparm attribute specifies how many integer arguments are to be
22503 passed in registers. */
22504 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
},
22505 /* Sseregparm attribute says we are using x86_64 calling conventions
22506 for FP arguments. */
22507 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
22508 /* force_align_arg_pointer says this function realigns the stack at entry. */
22509 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
22510 false, true, true, ix86_handle_cconv_attribute
},
22511 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
22512 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
},
22513 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
},
22514 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
},
22516 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
22517 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
22518 #ifdef SUBTARGET_ATTRIBUTE_TABLE
22519 SUBTARGET_ATTRIBUTE_TABLE
,
22521 { NULL
, 0, 0, false, false, false, NULL
}
22524 /* Initialize the GCC target structure. */
22525 #undef TARGET_ATTRIBUTE_TABLE
22526 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
22527 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
22528 # undef TARGET_MERGE_DECL_ATTRIBUTES
22529 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
22532 #undef TARGET_COMP_TYPE_ATTRIBUTES
22533 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
22535 #undef TARGET_INIT_BUILTINS
22536 #define TARGET_INIT_BUILTINS ix86_init_builtins
22537 #undef TARGET_EXPAND_BUILTIN
22538 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
22540 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
22541 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION ix86_builtin_vectorized_function
22542 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
22543 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_builtin_conversion
22545 #undef TARGET_ASM_FUNCTION_EPILOGUE
22546 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
22548 #undef TARGET_ENCODE_SECTION_INFO
22549 #ifndef SUBTARGET_ENCODE_SECTION_INFO
22550 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
22552 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
22555 #undef TARGET_ASM_OPEN_PAREN
22556 #define TARGET_ASM_OPEN_PAREN ""
22557 #undef TARGET_ASM_CLOSE_PAREN
22558 #define TARGET_ASM_CLOSE_PAREN ""
22560 #undef TARGET_ASM_ALIGNED_HI_OP
22561 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
22562 #undef TARGET_ASM_ALIGNED_SI_OP
22563 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
22565 #undef TARGET_ASM_ALIGNED_DI_OP
22566 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
22569 #undef TARGET_ASM_UNALIGNED_HI_OP
22570 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
22571 #undef TARGET_ASM_UNALIGNED_SI_OP
22572 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
22573 #undef TARGET_ASM_UNALIGNED_DI_OP
22574 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
22576 #undef TARGET_SCHED_ADJUST_COST
22577 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
22578 #undef TARGET_SCHED_ISSUE_RATE
22579 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
22580 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
22581 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
22582 ia32_multipass_dfa_lookahead
22584 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
22585 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
22588 #undef TARGET_HAVE_TLS
22589 #define TARGET_HAVE_TLS true
22591 #undef TARGET_CANNOT_FORCE_CONST_MEM
22592 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
22593 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
22594 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
22596 #undef TARGET_DELEGITIMIZE_ADDRESS
22597 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
22599 #undef TARGET_MS_BITFIELD_LAYOUT_P
22600 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
22603 #undef TARGET_BINDS_LOCAL_P
22604 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
22606 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
22607 #undef TARGET_BINDS_LOCAL_P
22608 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
22611 #undef TARGET_ASM_OUTPUT_MI_THUNK
22612 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
22613 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
22614 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
22616 #undef TARGET_ASM_FILE_START
22617 #define TARGET_ASM_FILE_START x86_file_start
22619 #undef TARGET_DEFAULT_TARGET_FLAGS
22620 #define TARGET_DEFAULT_TARGET_FLAGS \
22622 | TARGET_64BIT_DEFAULT \
22623 | TARGET_SUBTARGET_DEFAULT \
22624 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
22626 #undef TARGET_HANDLE_OPTION
22627 #define TARGET_HANDLE_OPTION ix86_handle_option
22629 #undef TARGET_RTX_COSTS
22630 #define TARGET_RTX_COSTS ix86_rtx_costs
22631 #undef TARGET_ADDRESS_COST
22632 #define TARGET_ADDRESS_COST ix86_address_cost
22634 #undef TARGET_FIXED_CONDITION_CODE_REGS
22635 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
22636 #undef TARGET_CC_MODES_COMPATIBLE
22637 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
22639 #undef TARGET_MACHINE_DEPENDENT_REORG
22640 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
22642 #undef TARGET_BUILD_BUILTIN_VA_LIST
22643 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
22645 #undef TARGET_MD_ASM_CLOBBERS
22646 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
22648 #undef TARGET_PROMOTE_PROTOTYPES
22649 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
22650 #undef TARGET_STRUCT_VALUE_RTX
22651 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
22652 #undef TARGET_SETUP_INCOMING_VARARGS
22653 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
22654 #undef TARGET_MUST_PASS_IN_STACK
22655 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
22656 #undef TARGET_PASS_BY_REFERENCE
22657 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
22658 #undef TARGET_INTERNAL_ARG_POINTER
22659 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
22660 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
22661 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
22662 #undef TARGET_STRICT_ARGUMENT_NAMING
22663 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
22665 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
22666 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
22668 #undef TARGET_SCALAR_MODE_SUPPORTED_P
22669 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
22671 #undef TARGET_VECTOR_MODE_SUPPORTED_P
22672 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
22675 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
22676 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
22679 #ifdef SUBTARGET_INSERT_ATTRIBUTES
22680 #undef TARGET_INSERT_ATTRIBUTES
22681 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
22684 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
22685 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
22687 #undef TARGET_STACK_PROTECT_FAIL
22688 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
22690 #undef TARGET_FUNCTION_VALUE
22691 #define TARGET_FUNCTION_VALUE ix86_function_value
22693 struct gcc_target targetm
= TARGET_INITIALIZER
;
22695 #include "gt-i386.h"